From 5ec28bd2259a26a676c6688399e11da2daa039a2 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 4 Aug 2024 21:15:48 -0800 Subject: [PATCH 001/589] adding in pytorch inference with some gen and custom code, added in install for powershell in windows env, base pytorch version off of tinygrid --- exo/inference/pytorch/index.ex.json | 3 + exo/inference/pytorch/inference.py | 221 ++++++++++++++++++++++++++ exo/inference/pytorch/models/llama.py | 0 install.ps1 | 8 + setup.py | 1 + 5 files changed, 233 insertions(+) create mode 100644 exo/inference/pytorch/index.ex.json create mode 100644 exo/inference/pytorch/inference.py create mode 100644 exo/inference/pytorch/models/llama.py create mode 100644 install.ps1 diff --git a/exo/inference/pytorch/index.ex.json b/exo/inference/pytorch/index.ex.json new file mode 100644 index 000000000..19b16d916 --- /dev/null +++ b/exo/inference/pytorch/index.ex.json @@ -0,0 +1,3 @@ +{ + "model": "huggingface_model_name" +} \ No newline at end of file diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py new file mode 100644 index 000000000..d8ae7289d --- /dev/null +++ b/exo/inference/pytorch/inference.py @@ -0,0 +1,221 @@ +# experimental, based off of tinygrad/inference.py + +import asyncio +from functools import partial +from pathlib import Path +from typing import List, Optional, Union, Callable, Dict +import json +import torch +from torch import nn +from transformers import AutoTokenizer, AutoModelForCausalLM +import numpy as np +import os + +MODEL_PARAMS = { + "8B": { + "args": { + "dim": 4096, + "n_heads": 32, + "n_kv_heads": 8, + "n_layers": 32, + "norm_eps": 1e-5, + "rope_theta": 500000, + "vocab_size": 128256, + "hidden_dim": 14336, + }, + "files": 1, + }, + "70B": { + "args": { + "dim": 8192, + "n_heads": 64, + "n_kv_heads": 8, + "n_layers": 80, + "norm_eps": 1e-5, + "rope_theta": 500000, + "vocab_size": 128256, + "hidden_dim": 28672, + }, + "files": 8, + }, +} + + +# **** helper functions **** +def load(fn: str) -> Union[str, Dict[str, torch.Tensor]]: + model = "" + if fn.endswith(".index.json"): + with open(fn) as fp: + model = json.load(fp)["model"] + + if model == "": + model = torch.load(fn, map_location="cpu") + + return model + +def build_transformer(model_path: Union[str, Path], model_size="8B", quantize=None, device=None): + # Load the model configuration and parameters + model = load(model_path) + if isinstance(model, str): + with torch.device(device): + model = AutoModelForCausalLM.from_pretrained( + model, + torch_dtype=torch.float16 if "cuda" in str(device) else torch.float32, + device_map="auto" if "cuda" in str(device) else None + ) + + # Quantize the model if specified + if quantize: + model = torch.quantization.quantize_dynamic(model, {nn.Linear}, dtype=torch.qint8) + + # Shard the model if using multiple devices + if isinstance(device, tuple): + for name, param in model.named_parameters(): + if "scale" in name: + param.data = param.data.chunk(len(device), dim=0) + elif ".attention." in name: + param.data = param.data.chunk(len(device), dim=-1) + elif ".feed_forward.w1." in name or ".feed_forward.w3." in name: + param.data = param.data.chunk(len(device), dim=0) + elif ".feed_forward." in name: + param.data = param.data.chunk(len(device), dim=-1) + elif "tok_embeddings.weight" in name or "output.weight" in name: + param.data = param.data.chunk(len(device), dim=0) + + return model + +# Sample function using the built transformer +def sample(logits: torch.Tensor, temp: float, k: int, p: float): + if temp < 1e-6: + return torch.argmax(logits) + + logits[torch.isnan(logits)] = -float("inf") + probs = torch.nn.functional.softmax(logits / temp, dim=-1) + + if k: + top_probs, top_indices = torch.topk(probs, k) + top_probs = top_probs[top_probs.cumsum(dim=-1) <= p] + top_indices = top_indices[:len(top_probs)] + sampled_idx = torch.multinomial(top_probs, 1) + return top_indices[sampled_idx] + + return torch.multinomial(probs, 1) + + +# default settings +TEMPERATURE = 0 # 0.85 +TOP_K = 25 +TOP_P = 0.9 +ALPHA_F = 0.1 +ALPHA_P = 0.0 + + +def prefill(model, toks, start_pos=0): + # prefill the model + for tok in tqdm(toks): + GlobalCounters.reset() + inputs = torch.tensor([[tok]], device=model.device) + model.generate(inputs, do_sample=True, temperature=TEMPERATURE, top_k=TOP_K, top_p=TOP_P, max_new_tokens=1) + start_pos += 1 + return start_pos + + +class PytorchDynamicShardInferenceEngine(InferenceEngine): + def __init__(self): + self.shard = None + + async def infer_prompt(self, request_id: str, shard: Shard, prompt: str, image_str: Optional[str] = None, inference_state: Optional[str] = None) -> (np.ndarray, str, bool): + await self.ensure_shard(shard) + start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 + + toks = self.tokenizer.encode(prompt) + start_pos = prefill(self.model, toks[:-1], start_pos=start_pos) + last_tok = toks[-1] + + input_ids = torch.tensor([[last_tok]], device=self.model.device) + output_data = self.model.generate(input_ids, do_sample=True, temperature=TEMPERATURE, top_k=TOP_K, top_p=TOP_P, max_new_tokens=1).cpu().numpy() + if output_data.size == 1: + start_pos += 1 + + return ( + output_data, + json.dumps({"start_pos": start_pos}), + output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], + ) + + async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarray, inference_state: Optional[str] = None) -> (np.ndarray, str, bool): + await self.ensure_shard(shard) + start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 + + input_ids = torch.tensor(input_data, device=self.model.device) + output_data = self.model.generate(input_ids, do_sample=True, temperature=TEMPERATURE, top_k=TOP_K, top_p=TOP_P, max_new_tokens=1).cpu().numpy() + if output_data.size == 1: + start_pos += 1 + + return ( + output_data, + json.dumps({"start_pos": start_pos}), + output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], + ) + + async def ensure_shard(self, shard: Shard): + if self.shard == shard: + return + + model_path = Path(shard.model_id) + models_dir = Path(_cache_dir) / "pytorch" / "downloads" + model_path = models_dir / shard.model_id + size = "8B" + if Path(model_path / "tokenizer_config.json").exists(): + model = model_path + else: + if DEBUG >= 2: + print(f"Downloading pytorch model {shard.model_id}...") + if shard.model_id.lower().find("llama3-8b-sfr") != -1: + num_files = 4 + for i in range(num_files): + await fetch_async( + f"https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/model-{(i+1):05d}-of-{num_files:05d}.bin", + f"model-{(i+1):05d}-of-{num_files:05d}.bin", + subdir=shard.model_id, + ) + await fetch_async( + "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/config.json", + "config.json", + subdir=shard.model_id, + ) + model = await fetch_async( + "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/raw/main/model.index.json", + "model.index.json", + subdir=shard.model_id, + ) + await fetch_async( + "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/special_tokens_map.json", + "special_tokens_map.json", + subdir=shard.model_id, + ) + await fetch_async( + "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/tokenizer.json", + "tokenizer.json", + subdir=shard.model_id, + ) + await fetch_async( + "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/tokenizer_config.json", + "tokenizer_config.json", + subdir=shard.model_id, + ) + size = "8B" + elif shard.model_id.lower().find("llama3-70b-sfr") != -1: + raise NotImplementedError("llama3-70b-sfr is not implemented for pytorch") + else: + raise ValueError(f"pytorch doesnt currently support arbitrary model downloading. unsupported model: {shard.model_id}") + + model = build_transformer(model_path, shard=shard, model_size=size) + tokenizer = AutoTokenizer.from_pretrained(model_path if model_path.is_dir() else model_path.parent) + + self.shard = shard + self.model = model + self.tokenizer = tokenizer + + def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): + pass diff --git a/exo/inference/pytorch/models/llama.py b/exo/inference/pytorch/models/llama.py new file mode 100644 index 000000000..e69de29bb diff --git a/install.ps1 b/install.ps1 new file mode 100644 index 000000000..c766cdd5b --- /dev/null +++ b/install.ps1 @@ -0,0 +1,8 @@ +# Create a virtual environment +python3 -m venv .venv + +# Activate the virtual environment +& .\.venv\Scripts\Activate.ps1 + +# Install the package in the virtual environment +pip install . diff --git a/setup.py b/setup.py index 77641a2b2..892548b1c 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ "transformers==4.43.3", "uuid==1.30", "tinygrad @ git+https://github.com/tinygrad/tinygrad.git@639af3f823cf242a1945dc24183e52a9df0af2b7", + "torch==2.4.0" ] # Add macOS-specific packages if on Darwin (macOS) From 7fcc89decaca669da8e1af39a0ded94140aaf6d7 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 4 Aug 2024 21:57:29 -0800 Subject: [PATCH 002/589] removing and rewriting pytorch inference, removing model file as using huggingface hub, adding in pytorch_model.bin.index.json loading to load method --- exo/inference/pytorch/index.ex.json | 3 - exo/inference/pytorch/inference.py | 166 ++++---------------------- exo/inference/pytorch/models/llama.py | 0 3 files changed, 23 insertions(+), 146 deletions(-) delete mode 100644 exo/inference/pytorch/index.ex.json delete mode 100644 exo/inference/pytorch/models/llama.py diff --git a/exo/inference/pytorch/index.ex.json b/exo/inference/pytorch/index.ex.json deleted file mode 100644 index 19b16d916..000000000 --- a/exo/inference/pytorch/index.ex.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "model": "huggingface_model_name" -} \ No newline at end of file diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index d8ae7289d..6d6a306d1 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -42,27 +42,35 @@ # **** helper functions **** +def concat_weights(models, device=None): + def convert(name) -> torch.Tensor: + disk_tensors: List[torch.Tensor] = [model[name] for model in models] + if len(disk_tensors) == 1 or len(disk_tensors[0].shape) == 1: + return disk_tensors[0].to(device=device) + axis = 1 if name.endswith(".attention.wo.weight") or name.endswith(".feed_forward.w2.weight") else 0 + lazy_tensors = [data.to(device=device) for data in disk_tensors] + return torch.cat(lazy_tensors, dim=axis) + + return {name: convert(name) for name in {name for model in models for name in model}} + def load(fn: str) -> Union[str, Dict[str, torch.Tensor]]: model = "" - if fn.endswith(".index.json"): + if fn.endswith("pytorch_model.bin.index.json"): with open(fn) as fp: - model = json.load(fp)["model"] - - if model == "": + weight_map = json.load(fp)["weight_map"] + parts = {n: torch.load(str(Path(fn).parent / Path(n).name), map_location="cpu") for n in set(weight_map.values())} + return {k: parts[n][k] for k, n in weight_map.items()} + else: model = torch.load(fn, map_location="cpu") - return model -def build_transformer(model_path: Union[str, Path], model_size="8B", quantize=None, device=None): - # Load the model configuration and parameters - model = load(model_path) - if isinstance(model, str): - with torch.device(device): - model = AutoModelForCausalLM.from_pretrained( - model, - torch_dtype=torch.float16 if "cuda" in str(device) else torch.float32, - device_map="auto" if "cuda" in str(device) else None - ) +def build_transformer(model_name: str, model_size="8B", quantize=None, device=None): + with torch.device(device): + model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=torch.float16 if "cuda" in str(device) else torch.float32, + device_map="auto" if "cuda" in str(device) else None + ) # Quantize the model if specified if quantize: @@ -84,24 +92,6 @@ def build_transformer(model_path: Union[str, Path], model_size="8B", quantize=No return model -# Sample function using the built transformer -def sample(logits: torch.Tensor, temp: float, k: int, p: float): - if temp < 1e-6: - return torch.argmax(logits) - - logits[torch.isnan(logits)] = -float("inf") - probs = torch.nn.functional.softmax(logits / temp, dim=-1) - - if k: - top_probs, top_indices = torch.topk(probs, k) - top_probs = top_probs[top_probs.cumsum(dim=-1) <= p] - top_indices = top_indices[:len(top_probs)] - sampled_idx = torch.multinomial(top_probs, 1) - return top_indices[sampled_idx] - - return torch.multinomial(probs, 1) - - # default settings TEMPERATURE = 0 # 0.85 TOP_K = 25 @@ -109,113 +99,3 @@ def sample(logits: torch.Tensor, temp: float, k: int, p: float): ALPHA_F = 0.1 ALPHA_P = 0.0 - -def prefill(model, toks, start_pos=0): - # prefill the model - for tok in tqdm(toks): - GlobalCounters.reset() - inputs = torch.tensor([[tok]], device=model.device) - model.generate(inputs, do_sample=True, temperature=TEMPERATURE, top_k=TOP_K, top_p=TOP_P, max_new_tokens=1) - start_pos += 1 - return start_pos - - -class PytorchDynamicShardInferenceEngine(InferenceEngine): - def __init__(self): - self.shard = None - - async def infer_prompt(self, request_id: str, shard: Shard, prompt: str, image_str: Optional[str] = None, inference_state: Optional[str] = None) -> (np.ndarray, str, bool): - await self.ensure_shard(shard) - start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 - - toks = self.tokenizer.encode(prompt) - start_pos = prefill(self.model, toks[:-1], start_pos=start_pos) - last_tok = toks[-1] - - input_ids = torch.tensor([[last_tok]], device=self.model.device) - output_data = self.model.generate(input_ids, do_sample=True, temperature=TEMPERATURE, top_k=TOP_K, top_p=TOP_P, max_new_tokens=1).cpu().numpy() - if output_data.size == 1: - start_pos += 1 - - return ( - output_data, - json.dumps({"start_pos": start_pos}), - output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], - ) - - async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarray, inference_state: Optional[str] = None) -> (np.ndarray, str, bool): - await self.ensure_shard(shard) - start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 - - input_ids = torch.tensor(input_data, device=self.model.device) - output_data = self.model.generate(input_ids, do_sample=True, temperature=TEMPERATURE, top_k=TOP_K, top_p=TOP_P, max_new_tokens=1).cpu().numpy() - if output_data.size == 1: - start_pos += 1 - - return ( - output_data, - json.dumps({"start_pos": start_pos}), - output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], - ) - - async def ensure_shard(self, shard: Shard): - if self.shard == shard: - return - - model_path = Path(shard.model_id) - models_dir = Path(_cache_dir) / "pytorch" / "downloads" - model_path = models_dir / shard.model_id - size = "8B" - if Path(model_path / "tokenizer_config.json").exists(): - model = model_path - else: - if DEBUG >= 2: - print(f"Downloading pytorch model {shard.model_id}...") - if shard.model_id.lower().find("llama3-8b-sfr") != -1: - num_files = 4 - for i in range(num_files): - await fetch_async( - f"https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/model-{(i+1):05d}-of-{num_files:05d}.bin", - f"model-{(i+1):05d}-of-{num_files:05d}.bin", - subdir=shard.model_id, - ) - await fetch_async( - "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/config.json", - "config.json", - subdir=shard.model_id, - ) - model = await fetch_async( - "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/raw/main/model.index.json", - "model.index.json", - subdir=shard.model_id, - ) - await fetch_async( - "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/special_tokens_map.json", - "special_tokens_map.json", - subdir=shard.model_id, - ) - await fetch_async( - "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/tokenizer.json", - "tokenizer.json", - subdir=shard.model_id, - ) - await fetch_async( - "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/tokenizer_config.json", - "tokenizer_config.json", - subdir=shard.model_id, - ) - size = "8B" - elif shard.model_id.lower().find("llama3-70b-sfr") != -1: - raise NotImplementedError("llama3-70b-sfr is not implemented for pytorch") - else: - raise ValueError(f"pytorch doesnt currently support arbitrary model downloading. unsupported model: {shard.model_id}") - - model = build_transformer(model_path, shard=shard, model_size=size) - tokenizer = AutoTokenizer.from_pretrained(model_path if model_path.is_dir() else model_path.parent) - - self.shard = shard - self.model = model - self.tokenizer = tokenizer - - def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): - pass diff --git a/exo/inference/pytorch/models/llama.py b/exo/inference/pytorch/models/llama.py deleted file mode 100644 index e69de29bb..000000000 From 54b330648e549306a8255d7e5716455c04b1ec66 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 4 Aug 2024 22:46:51 -0800 Subject: [PATCH 003/589] separating out helpers, building unittest for build_transformer helper, gen pydoc --- exo/inference/pytorch/helpers.py | 231 ++++++++++++++++++ exo/inference/pytorch/inference.py | 109 ++------- .../pytorch/test_build_transformer.py | 46 ++++ 3 files changed, 299 insertions(+), 87 deletions(-) create mode 100644 exo/inference/pytorch/helpers.py create mode 100644 exo/inference/pytorch/test_build_transformer.py diff --git a/exo/inference/pytorch/helpers.py b/exo/inference/pytorch/helpers.py new file mode 100644 index 000000000..8376ea26e --- /dev/null +++ b/exo/inference/pytorch/helpers.py @@ -0,0 +1,231 @@ +# Helper functions for pytorch inference +# Some code coming from tinygrad but written towards pytorch + +# import os +# import numpy as np +# import asyncio +import json +import torch +# from functools import partial +from pathlib import Path +from typing import List, Union, Dict, Any +from transformers import AutoModelForCausalLM +from exo.inference.shard import Shard +# from exo.inference.inference_engine import InferenceEngine + +MODEL_PARAMS = { + "8B": { + "args": { + "dim": 4096, + "n_heads": 32, + "n_kv_heads": 8, + "n_layers": 32, + "norm_eps": 1e-5, + "rope_theta": 500000, + "vocab_size": 128256, + "hidden_dim": 14336, + }, + "files": 1, + }, + "70B": { + "args": { + "dim": 8192, + "n_heads": 64, + "n_kv_heads": 8, + "n_layers": 80, + "norm_eps": 1e-5, + "rope_theta": 500000, + "vocab_size": 128256, + "hidden_dim": 28672, + }, + "files": 8, + }, +} + +def concat_weights(models, device=None): + """ + Concatenates weights from multiple model parts along the appropriate axis. + + Args: + models (List[Dict[str, torch.Tensor]]): List of dictionaries containing model weights. + device (Optional[torch.device]): The device to move the weights to (e.g., 'cpu' or 'cuda'). + + Returns: + Dict[str, torch.Tensor]: A dictionary where the keys are the weight names and the values + are the concatenated tensors moved to the specified device. + """ + def convert(name) -> torch.Tensor: + disk_tensors: List[torch.Tensor] = [model[name] for model in models] + if len(disk_tensors) == 1 or len(disk_tensors[0].shape) == 1: + return disk_tensors[0].to(device=device) + + ewn = name.endswith(".attention.wo.weight") or name.endswith(".feed_forward.w2.weight") + axis = 1 if ewn else 0 + + lazy_tensors = [data.to(device=device) for data in disk_tensors] + return torch.cat(lazy_tensors, dim=axis) + + return {name: convert(name) for name in {name for model in models for name in model}} + +def load_weights(fn: str) -> Union[str, Dict[str, torch.Tensor]]: + """ + Loads model weights from a specified file. Supports both individual model files and + index files that map to multiple weight files. + + Args: + fn (str): The file path to load weights from. + + Returns: + Union[str, Dict[str, torch.Tensor]]: A string representing the model or a + dictionary of model weights. + """ + model = "" + if fn.endswith("pytorch_model.bin.index.json"): + with open(fn) as fp: + weight_map = json.load(fp)["weight_map"] + + for n in set(weight_map.values()): + full_path = str(Path(fn).parent / Path(n).name) + parts = {n: torch.load(full_path, map_location="cpu")} + + return {k: parts[n][k] for k, n in weight_map.items()} + else: + model = torch.load(fn, map_location="cpu") + return model + +def convert_from_huggingface( + weights: Dict[str, torch.Tensor], + model: torch.nn.Module, + n_heads: int, + n_kv_heads: int, + shard: Shard) -> Dict[str, torch.Tensor]: + """ + Converts Hugging Face model weights to the format expected by the target model. + + Args: + weights (Dict[str, torch.Tensor]): Dictionary of Hugging Face model weights. + model (nn.Module): The target model. + n_heads (int): Number of attention heads. + n_kv_heads (int): Number of key-value heads. + shard (Shard): Shard object containing information about the model shard. + + Returns: + Dict[str, torch.Tensor]: Dictionary of converted weights. + """ + def permute(v: torch.Tensor, n_heads: int) -> torch.Tensor: + return v.view( + n_heads, + 2, + v.shape[0] // (2 * n_heads), + v.shape[1] + ).transpose(1, 2).reshape(*v.shape) + + keymap = { + "model.embed_tokens.weight": "tok_embeddings.weight", + **{f"model.layers.{l}.input_layernorm.weight": f"layers.{l}.attention_norm.weight" for l in range(len(model.layers))}, + **{f"model.layers.{l}.self_attn.{x}_proj.weight": f"layers.{l}.attention.w_{x}.weight" for x in ["q", "k", "v", "o"] for l in range(len(model.layers))}, + **{f"model.layers.{l}.post_attention_layernorm.weight": f"layers.{l}.ffn_norm.weight" for l in range(len(model.layers))}, + **{f"model.layers.{l}.mlp.{x}_proj.weight": f"layers.{l}.feed_forward.w_{y}.weight" for x, y in {"gate": "1", "down": "2", "up": "3"}.items() for l in range(len(model.layers))}, + "model.norm.weight": "norm.weight", + "lm_head.weight": "output.weight", + } + + sd = {} + for k, v in weights.items(): + if ".rotary_emb." in k: + continue + + if "model.layers" in k: + layer_num = int(k.split(".")[2]) + if shard.start_layer <= layer_num <= shard.end_layer: + k = f"model.layers.{layer_num - shard.start_layer}." + ".".join(k.split(".")[3:]) + else: + continue + + if "q_proj" in k: + v = permute(v, n_heads) + elif "k_proj" in k: + v = permute(v, n_kv_heads) + + if k in keymap: + sd[keymap[k]] = v + + return sd + +def fix_bf16(weights: Dict[Any, torch.Tensor]) -> Dict[Any, torch.Tensor]: + """ + Converts weights to bfloat16 if supported by the device, otherwise to float16. + + Args: + weights (Dict[Any, torch.Tensor]): Dictionary of model weights. + + Returns: + Dict[Any, torch.Tensor]: Dictionary of converted weights. + """ + supports_bf16 = torch.cuda.is_bf16_supported() + + if supports_bf16: + return {k: v.to(torch.bfloat16) if v.dtype == torch.float32 else v for k, v in weights.items()} + else: + return {k: v.to(torch.float16) if v.dtype == torch.bfloat16 else v for k, v in weights.items()} + + +def build_transformer(model_name: str, shard: Shard, model_size="8B", quantize=None, device=None): + # Load model from Hugging Face hub + model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=torch.float16 if "cuda" in str(device) else torch.float32, + device_map="auto" if "cuda" in str(device) else None + ) + + # Load weights + model_path = Path(model_name) + if model_path.is_dir(): + if (model_path / "pytorch_model.bin.index.json").exists(): + weights = load_weights(str(model_path / "pytorch_model.bin.index.json")) + else: + pth_weights = [] + for i in range(MODEL_PARAMS[model_size]["files"]): + pth_path = str(model_path / f"consolidated.{i:02d}.pth") + pth_weights.append(load_weights(pth_path)) + + weights = concat_weights( + pth_weights, + device[0] if isinstance(device, tuple) else device, + ) + else: + weights = load_weights(str(model_path)) + + if "model.embed_tokens.weight" in weights: + weights = convert_from_huggingface( + weights, + model, + MODEL_PARAMS[model_size]["args"]["n_heads"], + MODEL_PARAMS[model_size]["args"]["n_kv_heads"], + shard=shard, + ) + weights = fix_bf16(weights) + + # Quantize the model if specified + if quantize: + model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}) + + # Shard the model if using multiple devices + if isinstance(device, tuple): + for name, param in model.named_parameters(): + if "scale" in name: + param.data = param.data.chunk(len(device), dim=0) + elif ".attention." in name: + param.data = param.data.chunk(len(device), dim=-1) + elif ".feed_forward.w1." in name or ".feed_forward.w3." in name: + param.data = param.data.chunk(len(device), dim=0) + elif ".feed_forward." in name: + param.data = param.data.chunk(len(device), dim=-1) + elif "tok_embeddings.weight" in name or "output.weight" in name: + param.data = param.data.chunk(len(device), dim=0) + + # Replace weights in model + model.load_state_dict(weights, strict=False) + + return model + diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 6d6a306d1..d411481d5 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -1,96 +1,16 @@ # experimental, based off of tinygrad/inference.py - +import os +import numpy as np import asyncio +import json +import torch from functools import partial from pathlib import Path from typing import List, Optional, Union, Callable, Dict -import json -import torch -from torch import nn from transformers import AutoTokenizer, AutoModelForCausalLM -import numpy as np -import os - -MODEL_PARAMS = { - "8B": { - "args": { - "dim": 4096, - "n_heads": 32, - "n_kv_heads": 8, - "n_layers": 32, - "norm_eps": 1e-5, - "rope_theta": 500000, - "vocab_size": 128256, - "hidden_dim": 14336, - }, - "files": 1, - }, - "70B": { - "args": { - "dim": 8192, - "n_heads": 64, - "n_kv_heads": 8, - "n_layers": 80, - "norm_eps": 1e-5, - "rope_theta": 500000, - "vocab_size": 128256, - "hidden_dim": 28672, - }, - "files": 8, - }, -} - - -# **** helper functions **** -def concat_weights(models, device=None): - def convert(name) -> torch.Tensor: - disk_tensors: List[torch.Tensor] = [model[name] for model in models] - if len(disk_tensors) == 1 or len(disk_tensors[0].shape) == 1: - return disk_tensors[0].to(device=device) - axis = 1 if name.endswith(".attention.wo.weight") or name.endswith(".feed_forward.w2.weight") else 0 - lazy_tensors = [data.to(device=device) for data in disk_tensors] - return torch.cat(lazy_tensors, dim=axis) - - return {name: convert(name) for name in {name for model in models for name in model}} - -def load(fn: str) -> Union[str, Dict[str, torch.Tensor]]: - model = "" - if fn.endswith("pytorch_model.bin.index.json"): - with open(fn) as fp: - weight_map = json.load(fp)["weight_map"] - parts = {n: torch.load(str(Path(fn).parent / Path(n).name), map_location="cpu") for n in set(weight_map.values())} - return {k: parts[n][k] for k, n in weight_map.items()} - else: - model = torch.load(fn, map_location="cpu") - return model - -def build_transformer(model_name: str, model_size="8B", quantize=None, device=None): - with torch.device(device): - model = AutoModelForCausalLM.from_pretrained( - model_name, - torch_dtype=torch.float16 if "cuda" in str(device) else torch.float32, - device_map="auto" if "cuda" in str(device) else None - ) - - # Quantize the model if specified - if quantize: - model = torch.quantization.quantize_dynamic(model, {nn.Linear}, dtype=torch.qint8) - - # Shard the model if using multiple devices - if isinstance(device, tuple): - for name, param in model.named_parameters(): - if "scale" in name: - param.data = param.data.chunk(len(device), dim=0) - elif ".attention." in name: - param.data = param.data.chunk(len(device), dim=-1) - elif ".feed_forward.w1." in name or ".feed_forward.w3." in name: - param.data = param.data.chunk(len(device), dim=0) - elif ".feed_forward." in name: - param.data = param.data.chunk(len(device), dim=-1) - elif "tok_embeddings.weight" in name or "output.weight" in name: - param.data = param.data.chunk(len(device), dim=0) - - return model +from exo.inference.shard import Shard +from exo.inference.inference_engine import InferenceEngine +# from exo.inference.pytorch.helpers import # default settings TEMPERATURE = 0 # 0.85 @@ -99,3 +19,18 @@ def build_transformer(model_name: str, model_size="8B", quantize=None, device=No ALPHA_F = 0.1 ALPHA_P = 0.0 + +# don't think prefill is needed +# think that is used for stats but will look into + +class PyTorchDynamicShardInferenceEngine(InferenceEngine): + def __init__(self): + self.shard = None + + # async def infer_prompt + + # async def infer_tensor + + # async def ensure_shard + + # def set_on_download_progess [is this needed?] diff --git a/exo/inference/pytorch/test_build_transformer.py b/exo/inference/pytorch/test_build_transformer.py new file mode 100644 index 000000000..1d553f478 --- /dev/null +++ b/exo/inference/pytorch/test_build_transformer.py @@ -0,0 +1,46 @@ +import unittest +from unittest.mock import patch, MagicMock +from pathlib import Path +import torch +from transformers import AutoModelForCausalLM +from exo.inference.shard import Shard +from exo.inference.pytorch.helpers import build_transformer + +class TestBuildTransformer(unittest.TestCase): + + @patch('torch.load') + @patch('transformers.AutoModelForCausalLM.from_pretrained') + @patch('builtins.open', new_callable=unittest.mock.mock_open, read_data='{"weight_map": {"0": "pytorch_model.bin"}}') + def test_build_transformer(self, mock_open, mock_from_pretrained, mock_torch_load): + # Mocking model and weights + mock_model = MagicMock(spec=AutoModelForCausalLM) + mock_from_pretrained.return_value = mock_model + + mock_weights = { + "model.embed_tokens.weight": torch.randn(1024, 768), + "model.layers.0.self_attn.q_proj.weight": torch.randn(768, 768), + # Add other necessary mock weights here + } + mock_torch_load.return_value = mock_weights + + # Define the shard + shard = Shard(model_id="mock_model", start_layer=0, end_layer=0, n_layers=1) + + # Call the build_transformer function + model = build_transformer("mock_model", shard, model_size="8B", quantize=True, device="cpu") + + # Assertions to verify the function behavior + mock_from_pretrained.assert_called_once_with( + "mock_model", + torch_dtype=torch.float32, + device_map=None + ) + + mock_open.assert_called_once_with("mock_model/pytorch_model.bin.index.json") + mock_torch_load.assert_called() + + mock_model.load_state_dict.assert_called() + self.assertEqual(model, mock_model) + +if __name__ == '__main__': + unittest.main() From 207422ff26f3a65e7285e98a9a39d185860dac61 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 4 Aug 2024 23:00:10 -0800 Subject: [PATCH 004/589] cleaning up bad gen code test for convert, simplifying test, taking out loading weights as not needed --- exo/inference/pytorch/helpers.py | 49 ++++++------------- .../pytorch/test_build_transformer.py | 3 +- 2 files changed, 18 insertions(+), 34 deletions(-) diff --git a/exo/inference/pytorch/helpers.py b/exo/inference/pytorch/helpers.py index 8376ea26e..cb93adc0e 100644 --- a/exo/inference/pytorch/helpers.py +++ b/exo/inference/pytorch/helpers.py @@ -171,41 +171,27 @@ def fix_bf16(weights: Dict[Any, torch.Tensor]) -> Dict[Any, torch.Tensor]: def build_transformer(model_name: str, shard: Shard, model_size="8B", quantize=None, device=None): + """ + Builds a transformer model by loading it from the Hugging Face model hub and applying + weight conversion, quantization, and sharding as specified. + + Args: + model_name (str): The name of the model to load from the Hugging Face model hub. + shard (Shard): A Shard object containing information about the model shard. + model_size (str, optional): The size of the model to load (default is "8B"). + quantize (bool, optional): Whether to apply dynamic quantization to the model (default is None). + device (torch.device, optional): The device to load the model onto (default is None). + + Returns: + nn.Module: The constructed and configured transformer model. + """ # Load model from Hugging Face hub model = AutoModelForCausalLM.from_pretrained( - model_name, - torch_dtype=torch.float16 if "cuda" in str(device) else torch.float32, + model_name, + torch_dtype=torch.float16 if "cuda" in str(device) else torch.float32, device_map="auto" if "cuda" in str(device) else None ) - # Load weights - model_path = Path(model_name) - if model_path.is_dir(): - if (model_path / "pytorch_model.bin.index.json").exists(): - weights = load_weights(str(model_path / "pytorch_model.bin.index.json")) - else: - pth_weights = [] - for i in range(MODEL_PARAMS[model_size]["files"]): - pth_path = str(model_path / f"consolidated.{i:02d}.pth") - pth_weights.append(load_weights(pth_path)) - - weights = concat_weights( - pth_weights, - device[0] if isinstance(device, tuple) else device, - ) - else: - weights = load_weights(str(model_path)) - - if "model.embed_tokens.weight" in weights: - weights = convert_from_huggingface( - weights, - model, - MODEL_PARAMS[model_size]["args"]["n_heads"], - MODEL_PARAMS[model_size]["args"]["n_kv_heads"], - shard=shard, - ) - weights = fix_bf16(weights) - # Quantize the model if specified if quantize: model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}) @@ -224,8 +210,5 @@ def build_transformer(model_name: str, shard: Shard, model_size="8B", quantize=N elif "tok_embeddings.weight" in name or "output.weight" in name: param.data = param.data.chunk(len(device), dim=0) - # Replace weights in model - model.load_state_dict(weights, strict=False) - return model diff --git a/exo/inference/pytorch/test_build_transformer.py b/exo/inference/pytorch/test_build_transformer.py index 1d553f478..981d73b3a 100644 --- a/exo/inference/pytorch/test_build_transformer.py +++ b/exo/inference/pytorch/test_build_transformer.py @@ -14,6 +14,7 @@ class TestBuildTransformer(unittest.TestCase): def test_build_transformer(self, mock_open, mock_from_pretrained, mock_torch_load): # Mocking model and weights mock_model = MagicMock(spec=AutoModelForCausalLM) + mock_model.layers = [MagicMock()] * 2 # Mocking layers attribute mock_from_pretrained.return_value = mock_model mock_weights = { @@ -24,7 +25,7 @@ def test_build_transformer(self, mock_open, mock_from_pretrained, mock_torch_loa mock_torch_load.return_value = mock_weights # Define the shard - shard = Shard(model_id="mock_model", start_layer=0, end_layer=0, n_layers=1) + shard = Shard(model_id="mock_model", start_layer=0, end_layer=1, n_layers=2) # Call the build_transformer function model = build_transformer("mock_model", shard, model_size="8B", quantize=True, device="cpu") From fb7cf22bb3f5422b0f18259c4e10909a3a3d16de Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 4 Aug 2024 23:17:42 -0800 Subject: [PATCH 005/589] created PyTorchDynamicShardInferenceEngine and adding tooling --- exo/inference/pytorch/helpers.py | 27 +++- exo/inference/pytorch/inference.py | 117 +++++++++++++++++- .../pytorch/test_build_transformer.py | 36 +----- 3 files changed, 142 insertions(+), 38 deletions(-) diff --git a/exo/inference/pytorch/helpers.py b/exo/inference/pytorch/helpers.py index cb93adc0e..656f307c3 100644 --- a/exo/inference/pytorch/helpers.py +++ b/exo/inference/pytorch/helpers.py @@ -170,7 +170,7 @@ def fix_bf16(weights: Dict[Any, torch.Tensor]) -> Dict[Any, torch.Tensor]: return {k: v.to(torch.float16) if v.dtype == torch.bfloat16 else v for k, v in weights.items()} -def build_transformer(model_name: str, shard: Shard, model_size="8B", quantize=None, device=None): +def build_transformer(model_name: str, quantize=None, device=None): """ Builds a transformer model by loading it from the Hugging Face model hub and applying weight conversion, quantization, and sharding as specified. @@ -212,3 +212,28 @@ def build_transformer(model_name: str, shard: Shard, model_size="8B", quantize=N return model +def shard_model(model: Any, model_name: str, num_shards: int) -> List[Shard]: + # Get the total number of layers + if hasattr(model, 'config'): + n_layers = model.config.num_hidden_layers + else: + raise ValueError("Unable to determine the number of layers in the model") + + # Calculate layers per shard + layers_per_shard = n_layers // num_shards + remainder = n_layers % num_shards + + shards = [] + start_layer = 0 + for i in range(num_shards): + end_layer = start_layer + layers_per_shard - 1 + if i < remainder: + end_layer += 1 + + shard = Shard(model_name, start_layer, end_layer, n_layers) + shards.append(shard) + + start_layer = end_layer + 1 + + return shards + diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index d411481d5..c31950f98 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -6,11 +6,17 @@ import torch from functools import partial from pathlib import Path -from typing import List, Optional, Union, Callable, Dict +from typing import List, Optional, Union, Callable, Dict, Tuple from transformers import AutoTokenizer, AutoModelForCausalLM from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine -# from exo.inference.pytorch.helpers import +from exo.inference.pytorch.helpers import ( + fix_bf16, + build_transformer, + load_weights, + convert_from_huggingface, + MODEL_PARAMS +) # default settings TEMPERATURE = 0 # 0.85 @@ -26,11 +32,110 @@ class PyTorchDynamicShardInferenceEngine(InferenceEngine): def __init__(self): self.shard = None + self.model = None + self.tokenizer = None + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - # async def infer_prompt + async def infer_prompt( + self, + request_id: str, + shard: Shard, + prompt: str, + image_str: Optional[str] = None, + inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + await self.ensure_shard(shard) + start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 - # async def infer_tensor + input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device) + attention_mask = torch.ones_like(input_ids) + + with torch.no_grad(): + outputs = self.model.generate( + input_ids, + attention_mask=attention_mask, + max_new_tokens=1, + do_sample=True, + temperature=0.7, + top_k=50, + top_p=0.95, + pad_token_id=self.tokenizer.eos_token_id, + start_pos=start_pos + ) - # async def ensure_shard + output_token = outputs[0, -1].item() + output_data = np.array([output_token]) + start_pos += 1 - # def set_on_download_progess [is this needed?] + is_eos = output_token == self.tokenizer.eos_token_id + + return ( + output_data, + json.dumps({"start_pos": start_pos}), + is_eos + ) + + async def infer_tensor( + self, + request_id: str, + shard: Shard, + input_data: np.ndarray, + inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + await self.ensure_shard(shard) + start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 + + input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.device) + + with torch.no_grad(): + outputs = self.model.generate( + input_tensor, + max_new_tokens=1, + do_sample=True, + temperature=0.7, + top_k=50, + top_p=0.95, + pad_token_id=self.tokenizer.eos_token_id, + start_pos=start_pos + ) + + output_token = outputs[0, -1].item() + output_data = np.array([output_token]) + start_pos += 1 + + is_eos = output_token == self.tokenizer.eos_token_id + + return ( + output_data, + json.dumps({"start_pos": start_pos}), + is_eos + ) + + async def ensure_shard(self, shard: Shard): + if self.shard == shard: + return + + cache_dir = Path.home() / ".cache" / "huggingface" + model_path = cache_dir / "models--" / shard.model_id.replace('/', '--') + + if not model_path.exists(): + print(f"Downloading PyTorch model {shard.model_id}...") + weights = load_weights(str(model_path / "pytorch_model.bin")) + else: + weights = load_weights(str(model_path / "pytorch_model.bin")) + + model_size = "8B" # Assume 8B model, adjust as needed + n_heads = MODEL_PARAMS[model_size]["args"]["n_heads"] + n_kv_heads = MODEL_PARAMS[model_size]["args"]["n_kv_heads"] + + self.model = build_transformer(shard.model_id, device=self.device) + converted_weights = convert_from_huggingface(weights, self.model, n_heads, n_kv_heads, shard) + converted_weights = fix_bf16(converted_weights) + + self.model.load_state_dict(converted_weights, strict=False) + self.model.to(self.device) + + self.tokenizer = AutoTokenizer.from_pretrained(str(model_path)) + self.shard = shard + + def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): + # This method can be implemented if progress tracking is needed + pass \ No newline at end of file diff --git a/exo/inference/pytorch/test_build_transformer.py b/exo/inference/pytorch/test_build_transformer.py index 981d73b3a..fd0946582 100644 --- a/exo/inference/pytorch/test_build_transformer.py +++ b/exo/inference/pytorch/test_build_transformer.py @@ -2,46 +2,20 @@ from unittest.mock import patch, MagicMock from pathlib import Path import torch -from transformers import AutoModelForCausalLM from exo.inference.shard import Shard from exo.inference.pytorch.helpers import build_transformer class TestBuildTransformer(unittest.TestCase): - @patch('torch.load') - @patch('transformers.AutoModelForCausalLM.from_pretrained') - @patch('builtins.open', new_callable=unittest.mock.mock_open, read_data='{"weight_map": {"0": "pytorch_model.bin"}}') def test_build_transformer(self, mock_open, mock_from_pretrained, mock_torch_load): - # Mocking model and weights - mock_model = MagicMock(spec=AutoModelForCausalLM) - mock_model.layers = [MagicMock()] * 2 # Mocking layers attribute - mock_from_pretrained.return_value = mock_model - - mock_weights = { - "model.embed_tokens.weight": torch.randn(1024, 768), - "model.layers.0.self_attn.q_proj.weight": torch.randn(768, 768), - # Add other necessary mock weights here - } - mock_torch_load.return_value = mock_weights - - # Define the shard - shard = Shard(model_id="mock_model", start_layer=0, end_layer=1, n_layers=2) - # Call the build_transformer function - model = build_transformer("mock_model", shard, model_size="8B", quantize=True, device="cpu") - - # Assertions to verify the function behavior - mock_from_pretrained.assert_called_once_with( - "mock_model", - torch_dtype=torch.float32, - device_map=None + model = build_transformer( + "gpt2", + quantize=True, + device="cuda" ) - mock_open.assert_called_once_with("mock_model/pytorch_model.bin.index.json") - mock_torch_load.assert_called() - - mock_model.load_state_dict.assert_called() - self.assertEqual(model, mock_model) + self.assertIsNotNone(model) if __name__ == '__main__': unittest.main() From 5a262cfba7bb7ae0fd15e48c782bc5e506e3de4c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 4 Aug 2024 23:58:08 -0800 Subject: [PATCH 006/589] removing custom sharding from tinygrad inspo and using pytorch FSDP for sharding --- exo/inference/pytorch/helpers.py | 194 +---------------------------- exo/inference/pytorch/inference.py | 85 +++++-------- 2 files changed, 33 insertions(+), 246 deletions(-) diff --git a/exo/inference/pytorch/helpers.py b/exo/inference/pytorch/helpers.py index 656f307c3..32e0572ef 100644 --- a/exo/inference/pytorch/helpers.py +++ b/exo/inference/pytorch/helpers.py @@ -1,174 +1,8 @@ # Helper functions for pytorch inference # Some code coming from tinygrad but written towards pytorch -# import os -# import numpy as np -# import asyncio -import json import torch -# from functools import partial -from pathlib import Path -from typing import List, Union, Dict, Any from transformers import AutoModelForCausalLM -from exo.inference.shard import Shard -# from exo.inference.inference_engine import InferenceEngine - -MODEL_PARAMS = { - "8B": { - "args": { - "dim": 4096, - "n_heads": 32, - "n_kv_heads": 8, - "n_layers": 32, - "norm_eps": 1e-5, - "rope_theta": 500000, - "vocab_size": 128256, - "hidden_dim": 14336, - }, - "files": 1, - }, - "70B": { - "args": { - "dim": 8192, - "n_heads": 64, - "n_kv_heads": 8, - "n_layers": 80, - "norm_eps": 1e-5, - "rope_theta": 500000, - "vocab_size": 128256, - "hidden_dim": 28672, - }, - "files": 8, - }, -} - -def concat_weights(models, device=None): - """ - Concatenates weights from multiple model parts along the appropriate axis. - - Args: - models (List[Dict[str, torch.Tensor]]): List of dictionaries containing model weights. - device (Optional[torch.device]): The device to move the weights to (e.g., 'cpu' or 'cuda'). - - Returns: - Dict[str, torch.Tensor]: A dictionary where the keys are the weight names and the values - are the concatenated tensors moved to the specified device. - """ - def convert(name) -> torch.Tensor: - disk_tensors: List[torch.Tensor] = [model[name] for model in models] - if len(disk_tensors) == 1 or len(disk_tensors[0].shape) == 1: - return disk_tensors[0].to(device=device) - - ewn = name.endswith(".attention.wo.weight") or name.endswith(".feed_forward.w2.weight") - axis = 1 if ewn else 0 - - lazy_tensors = [data.to(device=device) for data in disk_tensors] - return torch.cat(lazy_tensors, dim=axis) - - return {name: convert(name) for name in {name for model in models for name in model}} - -def load_weights(fn: str) -> Union[str, Dict[str, torch.Tensor]]: - """ - Loads model weights from a specified file. Supports both individual model files and - index files that map to multiple weight files. - - Args: - fn (str): The file path to load weights from. - - Returns: - Union[str, Dict[str, torch.Tensor]]: A string representing the model or a - dictionary of model weights. - """ - model = "" - if fn.endswith("pytorch_model.bin.index.json"): - with open(fn) as fp: - weight_map = json.load(fp)["weight_map"] - - for n in set(weight_map.values()): - full_path = str(Path(fn).parent / Path(n).name) - parts = {n: torch.load(full_path, map_location="cpu")} - - return {k: parts[n][k] for k, n in weight_map.items()} - else: - model = torch.load(fn, map_location="cpu") - return model - -def convert_from_huggingface( - weights: Dict[str, torch.Tensor], - model: torch.nn.Module, - n_heads: int, - n_kv_heads: int, - shard: Shard) -> Dict[str, torch.Tensor]: - """ - Converts Hugging Face model weights to the format expected by the target model. - - Args: - weights (Dict[str, torch.Tensor]): Dictionary of Hugging Face model weights. - model (nn.Module): The target model. - n_heads (int): Number of attention heads. - n_kv_heads (int): Number of key-value heads. - shard (Shard): Shard object containing information about the model shard. - - Returns: - Dict[str, torch.Tensor]: Dictionary of converted weights. - """ - def permute(v: torch.Tensor, n_heads: int) -> torch.Tensor: - return v.view( - n_heads, - 2, - v.shape[0] // (2 * n_heads), - v.shape[1] - ).transpose(1, 2).reshape(*v.shape) - - keymap = { - "model.embed_tokens.weight": "tok_embeddings.weight", - **{f"model.layers.{l}.input_layernorm.weight": f"layers.{l}.attention_norm.weight" for l in range(len(model.layers))}, - **{f"model.layers.{l}.self_attn.{x}_proj.weight": f"layers.{l}.attention.w_{x}.weight" for x in ["q", "k", "v", "o"] for l in range(len(model.layers))}, - **{f"model.layers.{l}.post_attention_layernorm.weight": f"layers.{l}.ffn_norm.weight" for l in range(len(model.layers))}, - **{f"model.layers.{l}.mlp.{x}_proj.weight": f"layers.{l}.feed_forward.w_{y}.weight" for x, y in {"gate": "1", "down": "2", "up": "3"}.items() for l in range(len(model.layers))}, - "model.norm.weight": "norm.weight", - "lm_head.weight": "output.weight", - } - - sd = {} - for k, v in weights.items(): - if ".rotary_emb." in k: - continue - - if "model.layers" in k: - layer_num = int(k.split(".")[2]) - if shard.start_layer <= layer_num <= shard.end_layer: - k = f"model.layers.{layer_num - shard.start_layer}." + ".".join(k.split(".")[3:]) - else: - continue - - if "q_proj" in k: - v = permute(v, n_heads) - elif "k_proj" in k: - v = permute(v, n_kv_heads) - - if k in keymap: - sd[keymap[k]] = v - - return sd - -def fix_bf16(weights: Dict[Any, torch.Tensor]) -> Dict[Any, torch.Tensor]: - """ - Converts weights to bfloat16 if supported by the device, otherwise to float16. - - Args: - weights (Dict[Any, torch.Tensor]): Dictionary of model weights. - - Returns: - Dict[Any, torch.Tensor]: Dictionary of converted weights. - """ - supports_bf16 = torch.cuda.is_bf16_supported() - - if supports_bf16: - return {k: v.to(torch.bfloat16) if v.dtype == torch.float32 else v for k, v in weights.items()} - else: - return {k: v.to(torch.float16) if v.dtype == torch.bfloat16 else v for k, v in weights.items()} - def build_transformer(model_name: str, quantize=None, device=None): """ @@ -210,30 +44,4 @@ def build_transformer(model_name: str, quantize=None, device=None): elif "tok_embeddings.weight" in name or "output.weight" in name: param.data = param.data.chunk(len(device), dim=0) - return model - -def shard_model(model: Any, model_name: str, num_shards: int) -> List[Shard]: - # Get the total number of layers - if hasattr(model, 'config'): - n_layers = model.config.num_hidden_layers - else: - raise ValueError("Unable to determine the number of layers in the model") - - # Calculate layers per shard - layers_per_shard = n_layers // num_shards - remainder = n_layers % num_shards - - shards = [] - start_layer = 0 - for i in range(num_shards): - end_layer = start_layer + layers_per_shard - 1 - if i < remainder: - end_layer += 1 - - shard = Shard(model_name, start_layer, end_layer, n_layers) - shards.append(shard) - - start_layer = end_layer + 1 - - return shards - + return model \ No newline at end of file diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index c31950f98..e2b873f39 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -1,49 +1,39 @@ # experimental, based off of tinygrad/inference.py -import os +# utilizing pytorch FSDP for sharding + import numpy as np -import asyncio import json import torch -from functools import partial -from pathlib import Path -from typing import List, Optional, Union, Callable, Dict, Tuple -from transformers import AutoTokenizer, AutoModelForCausalLM +from typing import Optional, Callable, Tuple +from transformers import AutoTokenizer from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine -from exo.inference.pytorch.helpers import ( - fix_bf16, - build_transformer, - load_weights, - convert_from_huggingface, - MODEL_PARAMS -) - -# default settings -TEMPERATURE = 0 # 0.85 -TOP_K = 25 -TOP_P = 0.9 +from exo.inference.pytorch.helpers import build_transformer +from torch.distributed.fsdp import FullyShardedDataParallel as FSDP +from torch.distributed.fsdp.wrap import auto_wrap_policy +from torch.distributed import init_process_group, destroy_process_group + +# Default settings +TEMPERATURE = 0.7 +TOP_K = 50 +TOP_P = 0.95 ALPHA_F = 0.1 ALPHA_P = 0.0 - -# don't think prefill is needed -# think that is used for stats but will look into - class PyTorchDynamicShardInferenceEngine(InferenceEngine): def __init__(self): self.shard = None self.model = None self.tokenizer = None self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # Initialize process group + init_process_group(backend='nccl' if torch.cuda.is_available() else 'gloo') async def infer_prompt( self, - request_id: str, - shard: Shard, prompt: str, - image_str: Optional[str] = None, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: - await self.ensure_shard(shard) start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device) @@ -55,9 +45,9 @@ async def infer_prompt( attention_mask=attention_mask, max_new_tokens=1, do_sample=True, - temperature=0.7, - top_k=50, - top_p=0.95, + temperature=TEMPERATURE, + top_k=TOP_K, + top_p=TOP_P, pad_token_id=self.tokenizer.eos_token_id, start_pos=start_pos ) @@ -90,9 +80,9 @@ async def infer_tensor( input_tensor, max_new_tokens=1, do_sample=True, - temperature=0.7, - top_k=50, - top_p=0.95, + temperature=TEMPERATURE, + top_k=TOP_K, + top_p=TOP_P, pad_token_id=self.tokenizer.eos_token_id, start_pos=start_pos ) @@ -113,29 +103,18 @@ async def ensure_shard(self, shard: Shard): if self.shard == shard: return - cache_dir = Path.home() / ".cache" / "huggingface" - model_path = cache_dir / "models--" / shard.model_id.replace('/', '--') - - if not model_path.exists(): - print(f"Downloading PyTorch model {shard.model_id}...") - weights = load_weights(str(model_path / "pytorch_model.bin")) - else: - weights = load_weights(str(model_path / "pytorch_model.bin")) - - model_size = "8B" # Assume 8B model, adjust as needed - n_heads = MODEL_PARAMS[model_size]["args"]["n_heads"] - n_kv_heads = MODEL_PARAMS[model_size]["args"]["n_kv_heads"] - - self.model = build_transformer(shard.model_id, device=self.device) - converted_weights = convert_from_huggingface(weights, self.model, n_heads, n_kv_heads, shard) - converted_weights = fix_bf16(converted_weights) - - self.model.load_state_dict(converted_weights, strict=False) - self.model.to(self.device) + # Load model and tokenizer from Hugging Face hub + self.model = build_transformer(shard.model_id, shard, device=self.device) + + # Wrap the model with FSDP + self.model = FSDP(self.model, auto_wrap_policy=auto_wrap_policy) - self.tokenizer = AutoTokenizer.from_pretrained(str(model_path)) + self.tokenizer = AutoTokenizer.from_pretrained(shard.model_id) self.shard = shard def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): # This method can be implemented if progress tracking is needed - pass \ No newline at end of file + pass + + def __del__(self): + destroy_process_group() From ec1f656202c3622297f0fe3383c176554de2b554 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 5 Aug 2024 00:03:12 -0800 Subject: [PATCH 007/589] fixing infer_prompt, working on testing, cleaning up PyTorchDynamicShardInferenceEngine --- exo/inference/pytorch/inference.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index e2b873f39..ffe9f9d8a 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -32,8 +32,14 @@ def __init__(self): async def infer_prompt( self, + request_id: str, + shard: Shard, prompt: str, + image_str: Optional[str] = None, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + + await self.ensure_shard(shard) + start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device) @@ -70,7 +76,9 @@ async def infer_tensor( shard: Shard, input_data: np.ndarray, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + await self.ensure_shard(shard) + start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.device) From a612b1fe5520e55adaa98bb4c2316b3c198d2560 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 5 Aug 2024 00:04:44 -0800 Subject: [PATCH 008/589] fixing test --- exo/inference/pytorch/test_build_transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/test_build_transformer.py b/exo/inference/pytorch/test_build_transformer.py index fd0946582..cdbfa6fc4 100644 --- a/exo/inference/pytorch/test_build_transformer.py +++ b/exo/inference/pytorch/test_build_transformer.py @@ -7,7 +7,7 @@ class TestBuildTransformer(unittest.TestCase): - def test_build_transformer(self, mock_open, mock_from_pretrained, mock_torch_load): + def test_build_transformer(self): # Call the build_transformer function model = build_transformer( "gpt2", From d904caf9ea6805c14bb1b502c605c9ec439b8f49 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 5 Aug 2024 00:06:25 -0800 Subject: [PATCH 009/589] removing pytorch from setup.py, will need to think of another way to install right pytorch version --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 892548b1c..3ea7c8acf 100644 --- a/setup.py +++ b/setup.py @@ -26,8 +26,7 @@ "tqdm==4.66.4", "transformers==4.43.3", "uuid==1.30", - "tinygrad @ git+https://github.com/tinygrad/tinygrad.git@639af3f823cf242a1945dc24183e52a9df0af2b7", - "torch==2.4.0" + "tinygrad @ git+https://github.com/tinygrad/tinygrad.git@639af3f823cf242a1945dc24183e52a9df0af2b7" ] # Add macOS-specific packages if on Darwin (macOS) From 1fabef2067022b78067b82497ce2d37632862571 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 5 Aug 2024 01:06:45 -0800 Subject: [PATCH 010/589] working more on engine, removing pytorch fsd as exo is bascially what it is trying to do, implementing exo sharding next --- exo/inference/pytorch/helpers.py | 14 --------- exo/inference/pytorch/inference.py | 46 +++++++--------------------- exo/inference/pytorch/test_engine.py | 17 ++++++++++ 3 files changed, 28 insertions(+), 49 deletions(-) create mode 100644 exo/inference/pytorch/test_engine.py diff --git a/exo/inference/pytorch/helpers.py b/exo/inference/pytorch/helpers.py index 32e0572ef..27700509f 100644 --- a/exo/inference/pytorch/helpers.py +++ b/exo/inference/pytorch/helpers.py @@ -30,18 +30,4 @@ def build_transformer(model_name: str, quantize=None, device=None): if quantize: model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}) - # Shard the model if using multiple devices - if isinstance(device, tuple): - for name, param in model.named_parameters(): - if "scale" in name: - param.data = param.data.chunk(len(device), dim=0) - elif ".attention." in name: - param.data = param.data.chunk(len(device), dim=-1) - elif ".feed_forward.w1." in name or ".feed_forward.w3." in name: - param.data = param.data.chunk(len(device), dim=0) - elif ".feed_forward." in name: - param.data = param.data.chunk(len(device), dim=-1) - elif "tok_embeddings.weight" in name or "output.weight" in name: - param.data = param.data.chunk(len(device), dim=0) - return model \ No newline at end of file diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index ffe9f9d8a..69b63a133 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -1,17 +1,17 @@ # experimental, based off of tinygrad/inference.py # utilizing pytorch FSDP for sharding +# look into shard being optional for the inferece import numpy as np import json import torch +import functools +import os from typing import Optional, Callable, Tuple from transformers import AutoTokenizer from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.helpers import build_transformer -from torch.distributed.fsdp import FullyShardedDataParallel as FSDP -from torch.distributed.fsdp.wrap import auto_wrap_policy -from torch.distributed import init_process_group, destroy_process_group # Default settings TEMPERATURE = 0.7 @@ -21,25 +21,20 @@ ALPHA_P = 0.0 class PyTorchDynamicShardInferenceEngine(InferenceEngine): - def __init__(self): - self.shard = None - self.model = None - self.tokenizer = None - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - # Initialize process group - init_process_group(backend='nccl' if torch.cuda.is_available() else 'gloo') + def __init__(self, model_name: str = "gpt2", device: str = "cuda", tokenizer: str="gpt2"): + self.device = device + self.model_name = model_name + self.shard = Shard(model_id=model_name, start_layer=0, end_layer=1, n_layers=2) + self.model = build_transformer(self.shard.model_id, self.shard, device=self.device) + self.tokenizer = AutoTokenizer.from_pretrained(tokenizer) async def infer_prompt( self, request_id: str, shard: Shard, prompt: str, - image_str: Optional[str] = None, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: - await self.ensure_shard(shard) - start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device) @@ -54,8 +49,7 @@ async def infer_prompt( temperature=TEMPERATURE, top_k=TOP_K, top_p=TOP_P, - pad_token_id=self.tokenizer.eos_token_id, - start_pos=start_pos + pad_token_id=self.tokenizer.eos_token_id ) output_token = outputs[0, -1].item() @@ -77,8 +71,6 @@ async def infer_tensor( input_data: np.ndarray, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: - await self.ensure_shard(shard) - start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.device) @@ -107,22 +99,6 @@ async def infer_tensor( is_eos ) - async def ensure_shard(self, shard: Shard): - if self.shard == shard: - return - - # Load model and tokenizer from Hugging Face hub - self.model = build_transformer(shard.model_id, shard, device=self.device) - - # Wrap the model with FSDP - self.model = FSDP(self.model, auto_wrap_policy=auto_wrap_policy) - - self.tokenizer = AutoTokenizer.from_pretrained(shard.model_id) - self.shard = shard - def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): # This method can be implemented if progress tracking is needed - pass - - def __del__(self): - destroy_process_group() + pass \ No newline at end of file diff --git a/exo/inference/pytorch/test_engine.py b/exo/inference/pytorch/test_engine.py new file mode 100644 index 000000000..838958d42 --- /dev/null +++ b/exo/inference/pytorch/test_engine.py @@ -0,0 +1,17 @@ +import unittest +from .inference import PyTorchDynamicShardInferenceEngine +from exo.inference.shard import Shard +import asyncio + +class TestPyTorchDynamicShardInferenceEngine(unittest.TestCase): + def test_one(self): + shard = Shard(model_id="mock_model", start_layer=0, end_layer=1, n_layers=2) + engine = PyTorchDynamicShardInferenceEngine() + prompt_resp = asyncio.run( + engine.infer_prompt( + "", + shard, + "Why is the sky blue?") + ) + + self.assertIsNotNone(prompt_resp) From 1aff2e9425ccd5787e3800bc660f0482d5a2a479 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 6 Aug 2024 19:28:04 -0800 Subject: [PATCH 011/589] building out engine more with sharding implemented and kv caching, added a llama focused sharding model and started work on a more general hugging face sharding model --- exo/inference/pytorch/helpers.py | 47 ++--- exo/inference/pytorch/inference.py | 194 +++++++++++------- exo/inference/pytorch/model/hf.py | 52 +++++ exo/inference/pytorch/model/llama.py | 41 ++++ exo/inference/pytorch/test_engine.py | 17 -- .../pytorch/test_inference_engine.py | 55 +++++ 6 files changed, 291 insertions(+), 115 deletions(-) create mode 100644 exo/inference/pytorch/model/hf.py create mode 100644 exo/inference/pytorch/model/llama.py delete mode 100644 exo/inference/pytorch/test_engine.py create mode 100644 exo/inference/pytorch/test_inference_engine.py diff --git a/exo/inference/pytorch/helpers.py b/exo/inference/pytorch/helpers.py index 27700509f..addea2db7 100644 --- a/exo/inference/pytorch/helpers.py +++ b/exo/inference/pytorch/helpers.py @@ -1,33 +1,24 @@ # Helper functions for pytorch inference # Some code coming from tinygrad but written towards pytorch -import torch -from transformers import AutoModelForCausalLM +import asyncio +import aiohttp +from tqdm import tqdm +from pathlib import Path +from typing import List -def build_transformer(model_name: str, quantize=None, device=None): - """ - Builds a transformer model by loading it from the Hugging Face model hub and applying - weight conversion, quantization, and sharding as specified. +async def fetch_file_async(session, url: str, output_path: Path): + async with session.get(url) as response: + response.raise_for_status() + with open(output_path, 'wb') as f: + async for chunk in response.content.iter_chunked(8192): + f.write(chunk) - Args: - model_name (str): The name of the model to load from the Hugging Face model hub. - shard (Shard): A Shard object containing information about the model shard. - model_size (str, optional): The size of the model to load (default is "8B"). - quantize (bool, optional): Whether to apply dynamic quantization to the model (default is None). - device (torch.device, optional): The device to load the model onto (default is None). - - Returns: - nn.Module: The constructed and configured transformer model. - """ - # Load model from Hugging Face hub - model = AutoModelForCausalLM.from_pretrained( - model_name, - torch_dtype=torch.float16 if "cuda" in str(device) else torch.float32, - device_map="auto" if "cuda" in str(device) else None - ) - - # Quantize the model if specified - if quantize: - model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}) - - return model \ No newline at end of file +async def download_files(urls: List[str], output_paths: List[Path]): + async with aiohttp.ClientSession() as session: + tasks = [] + for url, output_path in zip(urls, output_paths): + tasks.append(fetch_file_async(session, url, output_path)) + + for f in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Downloading files"): + await f diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 69b63a133..306758e74 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -2,103 +2,157 @@ # utilizing pytorch FSDP for sharding # look into shard being optional for the inferece -import numpy as np +import os +import shutil import json import torch -import functools -import os +import numpy as np +from pathlib import Path from typing import Optional, Callable, Tuple -from transformers import AutoTokenizer +from transformers import AutoTokenizer, AutoModelForCausalLM from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine -from exo.inference.pytorch.helpers import build_transformer +from exo.inference.pytorch.helpers import download_files +from exo.inference.pytorch.model.llama import ShardedLLAMAModel # Default settings TEMPERATURE = 0.7 TOP_K = 50 -TOP_P = 0.95 -ALPHA_F = 0.1 -ALPHA_P = 0.0 class PyTorchDynamicShardInferenceEngine(InferenceEngine): - def __init__(self, model_name: str = "gpt2", device: str = "cuda", tokenizer: str="gpt2"): - self.device = device - self.model_name = model_name - self.shard = Shard(model_id=model_name, start_layer=0, end_layer=1, n_layers=2) - self.model = build_transformer(self.shard.model_id, self.shard, device=self.device) - self.tokenizer = AutoTokenizer.from_pretrained(tokenizer) + def __init__(self, debug: bool = False): + self.shard = None + self.debug = debug async def infer_prompt( self, request_id: str, - shard: Shard, + shard: Optional[Shard], prompt: str, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: - - start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 + await self.ensure_shard(shard) - input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device) - attention_mask = torch.ones_like(input_ids) + input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.model.device) - with torch.no_grad(): - outputs = self.model.generate( - input_ids, - attention_mask=attention_mask, - max_new_tokens=1, - do_sample=True, - temperature=TEMPERATURE, - top_k=TOP_K, - top_p=TOP_P, - pad_token_id=self.tokenizer.eos_token_id - ) - - output_token = outputs[0, -1].item() - output_data = np.array([output_token]) - start_pos += 1 - - is_eos = output_token == self.tokenizer.eos_token_id - - return ( - output_data, - json.dumps({"start_pos": start_pos}), - is_eos - ) + # Continue the sequence if inference state exists + past_key_values = None + if inference_state: + past_key_values = self._load_kv_cache(json.loads(inference_state).get("past_key_values")) + + output, past_key_values = self.model(input_ids, past_key_values=past_key_values) + + if self.shard.is_last_layer(): + logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) + next_token = torch.argmax(logits[:, -1, :], dim=-1) + output_data = np.array([next_token.item()]) + is_eos = next_token.item() == self.tokenizer.eos_token_id + else: + output_data = output.cpu().numpy() + is_eos = False + + new_inference_state = json.dumps({"past_key_values": self._save_kv_cache(past_key_values)}) + + if self.debug: + print(f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") + + return output_data, new_inference_state, is_eos async def infer_tensor( self, request_id: str, - shard: Shard, + shard: Optional[Shard], input_data: np.ndarray, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: - - start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 + await self.ensure_shard(shard) - input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.device) + input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.model.device) - with torch.no_grad(): - outputs = self.model.generate( - input_tensor, - max_new_tokens=1, - do_sample=True, - temperature=TEMPERATURE, - top_k=TOP_K, - top_p=TOP_P, - pad_token_id=self.tokenizer.eos_token_id, - start_pos=start_pos - ) - - output_token = outputs[0, -1].item() - output_data = np.array([output_token]) - start_pos += 1 - - is_eos = output_token == self.tokenizer.eos_token_id - - return ( - output_data, - json.dumps({"start_pos": start_pos}), - is_eos - ) + # Continue the sequence if inference state exists + past_key_values = None + if inference_state: + past_key_values = self._load_kv_cache(json.loads(inference_state).get("past_key_values")) + + output, past_key_values = self.model(input_tensor, past_key_values=past_key_values) + + if self.shard.is_last_layer(): + logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) + next_token = torch.argmax(logits[:, -1, :], dim=-1) + output_data = np.array([next_token.item()]) + is_eos = next_token.item() == self.tokenizer.eos_token_id + else: + output_data = output.cpu().numpy() + is_eos = False + + new_inference_state = json.dumps({"past_key_values": self._save_kv_cache(past_key_values)}) + + if self.debug: + print(f"Infer Tensor Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") + + return output_data, new_inference_state, is_eos + + def _apply_generation_settings(self, logits, temperature, top_k): + logits = logits / temperature + if top_k > 0: + top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) + logits = logits.scatter(1, top_k_indices, top_k_values) + return logits + + def _load_kv_cache(self, past_key_values_list): + if past_key_values_list is None: + return None + return [torch.tensor(kv, device=self.model.device) for kv in past_key_values_list] + + def _save_kv_cache(self, past_key_values): + return [kv.cpu().tolist() for kv in past_key_values] + + async def ensure_shard(self, shard: Optional[Shard]): + if self.shard == shard: + return + + model_path = Path(f".cache/{shard.model_id}") + if not model_path.exists(): + os.makedirs(model_path, exist_ok=True) + else: + shutil.rmtree(model_path) + os.makedirs(model_path) + + if shard.model_id.lower().find("llama3-8b-sfr") != -1: + num_files = 4 + urls = [ + f"https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/model-{(i+1):05d}-of-{num_files:05d}.safetensors" + for i in range(num_files) + ] + urls.extend([ + "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/config.json", + "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/raw/main/model.safetensors.index.json", + "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/special_tokens_map.json", + "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/tokenizer.json", + "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/tokenizer_config.json" + ]) + + output_paths = [ + model_path / f"model-{(i+1):05d}-of-{num_files:05d}.safetensors" + for i in range(num_files) + ] + output_paths.extend([ + model_path / "config.json", + model_path / "model.safetensors.index.json", + model_path / "special_tokens_map.json", + model_path / "tokenizer.json", + model_path / "tokenizer_config.json" + ]) + + await download_files(urls, output_paths) + else: + raise ValueError(f"Unsupported model: {shard.model_id}") + + # Load model and tokenizer from the downloaded files + model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32) + self.model = ShardedLLAMAModel(model, shard) + self.tokenizer = AutoTokenizer.from_pretrained(model_path) + + self.shard = shard def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): # This method can be implemented if progress tracking is needed - pass \ No newline at end of file + pass diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py new file mode 100644 index 000000000..ba838be86 --- /dev/null +++ b/exo/inference/pytorch/model/hf.py @@ -0,0 +1,52 @@ +# Work in progress on a generic hugging face model sharder +# right now doesn't work with all models + +import torch +import torch.nn as nn +from transformers import AutoModelForCausalLM +from exo.inference.shard import Shard +import logging + +class ShardedHuggingFaceModel(nn.Module): + def __init__(self, model_name: str, shard: Shard): + super(ShardedHuggingFaceModel, self).__init__() + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.shard = shard + + # Load the model + self.model = AutoModelForCausalLM.from_pretrained(model_name) + + # Only keep layers corresponding to this shard + self.layers = nn.ModuleList([ + self.model.transformer.h[i] for i in range(shard.start_layer, shard.end_layer + 1) + ]) + + logging.info(f"layers: {self.layers}") + + self.model.transformer.wte.to(self.device) + self.model.transformer.wpe.to(self.device) + + def forward(self, input_ids, past_key_values=None): + hidden_states = self._get_initial_hidden_states(input_ids) + hidden_states, new_past_key_values = self._process_layers(hidden_states, past_key_values) + + if self.shard.is_last_layer(): + hidden_states = self.model.transformer.ln_f(hidden_states.to(self.device)) + logits = self.model.lm_head(hidden_states) + return logits, new_past_key_values + else: + return hidden_states, new_past_key_values + + def _get_initial_hidden_states(self, input_ids): + input_embeds = self.model.transformer.wte(input_ids.to(self.device)) + position_embeds = self.model.transformer.wpe(torch.arange(input_ids.shape[1], device=self.device)) + return input_embeds + position_embeds + + def _process_layers(self, hidden_states, past_key_values): + new_past_key_values = [] + for i, layer in enumerate(self.layers): + layer_past = past_key_values[i] if past_key_values else None + hidden_states, new_layer_past = layer(hidden_states, past_key_values=layer_past) + new_past_key_values.append(new_layer_past) + return hidden_states, new_past_key_values + diff --git a/exo/inference/pytorch/model/llama.py b/exo/inference/pytorch/model/llama.py new file mode 100644 index 000000000..2871e357b --- /dev/null +++ b/exo/inference/pytorch/model/llama.py @@ -0,0 +1,41 @@ +import torch +import torch.nn as nn +from exo.inference.shard import Shard + +class ShardedLLAMAModel(nn.Module): + def __init__(self, model, shard: Shard): + super(ShardedLLAMAModel, self).__init__() + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.shard = shard + + # Only keep layers corresponding to this shard + self.layers = nn.ModuleList([model.transformer.h[i] for i in range(shard.start_layer, shard.end_layer + 1)]) + + # Move embeddings to the appropriate device + self.model = model + self.model.transformer.wte.to(self.device) + self.model.transformer.wpe.to(self.device) + + def forward(self, input_ids, past_key_values=None): + hidden_states = self._get_initial_hidden_states(input_ids) + hidden_states, new_past_key_values = self._process_layers(hidden_states, past_key_values) + + if self.shard.is_last_layer(): + hidden_states = self.model.transformer.ln_f(hidden_states.to(self.device)) + logits = self.model.lm_head(hidden_states) + return logits, new_past_key_values + else: + return hidden_states, new_past_key_values + + def _get_initial_hidden_states(self, input_ids): + input_embeds = self.model.transformer.wte(input_ids.to(self.device)) + position_embeds = self.model.transformer.wpe(torch.arange(input_ids.shape[1], device=self.device)) + return input_embeds + position_embeds + + def _process_layers(self, hidden_states, past_key_values): + new_past_key_values = [] + for i, layer in enumerate(self.layers): + layer_past = past_key_values[i] if past_key_values else None + hidden_states, new_layer_past = layer(hidden_states, past_key_values=layer_past) + new_past_key_values.append(new_layer_past) + return hidden_states, new_past_key_values diff --git a/exo/inference/pytorch/test_engine.py b/exo/inference/pytorch/test_engine.py deleted file mode 100644 index 838958d42..000000000 --- a/exo/inference/pytorch/test_engine.py +++ /dev/null @@ -1,17 +0,0 @@ -import unittest -from .inference import PyTorchDynamicShardInferenceEngine -from exo.inference.shard import Shard -import asyncio - -class TestPyTorchDynamicShardInferenceEngine(unittest.TestCase): - def test_one(self): - shard = Shard(model_id="mock_model", start_layer=0, end_layer=1, n_layers=2) - engine = PyTorchDynamicShardInferenceEngine() - prompt_resp = asyncio.run( - engine.infer_prompt( - "", - shard, - "Why is the sky blue?") - ) - - self.assertIsNotNone(prompt_resp) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py new file mode 100644 index 000000000..3be152762 --- /dev/null +++ b/exo/inference/pytorch/test_inference_engine.py @@ -0,0 +1,55 @@ +import unittest +import asyncio +from exo.inference.shard import Shard +from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine + +class TestPyTorchDynamicShardInferenceEngine(unittest.TestCase): + + @classmethod + def setUpClass(cls): + + # Create a shard + cls.shard = Shard( + model_id="llama3-8b-sfr", + start_layer=0, + end_layer=0, + n_layers=12 + ) + + # Initialize the inference engine + cls.engine = PyTorchDynamicShardInferenceEngine(debug=True) + + def test_infer_prompt(self): + # Prepare the prompt + prompt = "Why is the sky blue?" + + # Run inference + loop = asyncio.get_event_loop() + output_data, new_inference_state, is_eos = loop.run_until_complete( + self.engine.infer_prompt( + request_id="test_request", shard=self.shard, prompt=prompt + ) + ) + + # Assertions + self.assertIsNotNone(output_data) + self.assertIsNotNone(new_inference_state) + self.assertFalse(is_eos) + + # def test_infer_tensor(self): + # # Prepare the input tensor + # input_ids = self.tokenizer.encode("Hello, world!", return_tensors="pt").numpy() + + # # Run inference + # loop = asyncio.get_event_loop() + # output_data, new_inference_state, is_eos = loop.run_until_complete(self.engine.infer_tensor( + # request_id="test_request", shard=self.shard, input_data=input_ids + # )) + + # # Assertions + # self.assertIsNotNone(output_data) + # self.assertIsNotNone(new_inference_state) + # self.assertFalse(is_eos) + +if __name__ == '__main__': + unittest.main() From 4b11fe71ef25a7d2ffc1a07bedabcd5706357f7d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 6 Aug 2024 22:32:43 -0800 Subject: [PATCH 012/589] rebuilding based on LlamaForCausalLM and redefining its forward --- exo/inference/pytorch/inference.py | 95 +++++++++++++++++++++++----- exo/inference/pytorch/model/llama.py | 71 ++++++++++++++------- 2 files changed, 128 insertions(+), 38 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 306758e74..0fe932bc1 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -1,7 +1,3 @@ -# experimental, based off of tinygrad/inference.py -# utilizing pytorch FSDP for sharding -# look into shard being optional for the inferece - import os import shutil import json @@ -9,18 +5,27 @@ import numpy as np from pathlib import Path from typing import Optional, Callable, Tuple -from transformers import AutoTokenizer, AutoModelForCausalLM +from transformers import AutoTokenizer, LlamaForCausalLM from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.helpers import download_files -from exo.inference.pytorch.model.llama import ShardedLLAMAModel # Default settings TEMPERATURE = 0.7 TOP_K = 50 class PyTorchDynamicShardInferenceEngine(InferenceEngine): + """ + PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. + """ + def __init__(self, debug: bool = False): + """ + Initialize the inference engine. + + Args: + debug (bool): If True, enables debug logging. Defaults to False. + """ self.shard = None self.debug = debug @@ -30,6 +35,18 @@ async def infer_prompt( shard: Optional[Shard], prompt: str, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + """ + Perform inference based on a text prompt. + + Args: + request_id (str): Unique identifier for the request. + shard (Optional[Shard]): Shard information for the model. + prompt (str): The input text prompt for inference. + inference_state (Optional[str]): The previous inference state. + + Returns: + Tuple[np.ndarray, str, bool]: The output data, new inference state, and end-of-sequence flag. + """ await self.ensure_shard(shard) input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.model.device) @@ -63,6 +80,18 @@ async def infer_tensor( shard: Optional[Shard], input_data: np.ndarray, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + """ + Perform inference based on an input tensor. + + Args: + request_id (str): Unique identifier for the request. + shard (Optional[Shard]): Shard information for the model. + input_data (np.ndarray): The input tensor for inference. + inference_state (Optional[str]): The previous inference state. + + Returns: + Tuple[np.ndarray, str, bool]: The output data, new inference state, and end-of-sequence flag. + """ await self.ensure_shard(shard) input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.model.device) @@ -91,6 +120,17 @@ async def infer_tensor( return output_data, new_inference_state, is_eos def _apply_generation_settings(self, logits, temperature, top_k): + """ + Apply temperature and top_k settings to logits. + + Args: + logits (torch.Tensor): The logits to be adjusted. + temperature (float): The temperature setting for generation. + top_k (int): The top_k setting for generation. + + Returns: + torch.Tensor: The adjusted logits. + """ logits = logits / temperature if top_k > 0: top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) @@ -98,23 +138,47 @@ def _apply_generation_settings(self, logits, temperature, top_k): return logits def _load_kv_cache(self, past_key_values_list): + """ + Load key-value cache from the inference state. + + Args: + past_key_values_list (list): List of past key-value tensors. + + Returns: + list: List of loaded past key-value tensors. + """ if past_key_values_list is None: return None return [torch.tensor(kv, device=self.model.device) for kv in past_key_values_list] def _save_kv_cache(self, past_key_values): + """ + Save key-value cache to the inference state. + + Args: + past_key_values (list): List of past key-value tensors. + + Returns: + list: List of key-value tensors in a format suitable for saving. + """ return [kv.cpu().tolist() for kv in past_key_values] async def ensure_shard(self, shard: Optional[Shard]): + """ + Ensure the model shard is loaded and ready for inference. + + Args: + shard (Optional[Shard]): Shard information for the model. + """ if self.shard == shard: return - model_path = Path(f".cache/{shard.model_id}") + model_path = Path(self.model_name) + models_dir = Path(__file__).parent / "temp_model_dir" + model_path = models_dir / shard.model_id + if not model_path.exists(): os.makedirs(model_path, exist_ok=True) - else: - shutil.rmtree(model_path) - os.makedirs(model_path) if shard.model_id.lower().find("llama3-8b-sfr") != -1: num_files = 4 @@ -147,12 +211,11 @@ async def ensure_shard(self, shard: Optional[Shard]): raise ValueError(f"Unsupported model: {shard.model_id}") # Load model and tokenizer from the downloaded files - model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32) - self.model = ShardedLLAMAModel(model, shard) + # This is written for llama model but need to add in option for others + self.model = LlamaForCausalLM.from_pretrained( + model_path, + torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32) + self.tokenizer = AutoTokenizer.from_pretrained(model_path) self.shard = shard - - def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): - # This method can be implemented if progress tracking is needed - pass diff --git a/exo/inference/pytorch/model/llama.py b/exo/inference/pytorch/model/llama.py index 2871e357b..01a694454 100644 --- a/exo/inference/pytorch/model/llama.py +++ b/exo/inference/pytorch/model/llama.py @@ -1,41 +1,68 @@ import torch import torch.nn as nn +from transformers.models.llama.modeling_llama import LlamaForCausalLM from exo.inference.shard import Shard class ShardedLLAMAModel(nn.Module): - def __init__(self, model, shard: Shard): + """ + Sharded LLAMA Model for performing inference with a subset of model layers. + """ + + def __init__(self, model_path: str, shard: Shard): + """ + Initialize the ShardedLLAMAModel. + + Args: + model_path (str): Path to the pretrained model. + shard (Shard): Shard information indicating which layers to include. + """ super(ShardedLLAMAModel, self).__init__() self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.shard = shard - # Only keep layers corresponding to this shard - self.layers = nn.ModuleList([model.transformer.h[i] for i in range(shard.start_layer, shard.end_layer + 1)]) + # Load the full model and move to device + self.full_model = LlamaForCausalLM.from_pretrained(model_path) + self.full_model.to(self.device) - # Move embeddings to the appropriate device - self.model = model - self.model.transformer.wte.to(self.device) - self.model.transformer.wpe.to(self.device) + # Extract only the layers for this shard + self.layers = nn.ModuleList([ + self.full_model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) + ]) + + # Embeddings and final layer norm + self.embed_tokens = self.full_model.model.embed_tokens + self.embed_positions = self.full_model.model.embed_positions + self.norm = self.full_model.model.norm + self.lm_head = self.full_model.lm_head def forward(self, input_ids, past_key_values=None): - hidden_states = self._get_initial_hidden_states(input_ids) - hidden_states, new_past_key_values = self._process_layers(hidden_states, past_key_values) + """ + Perform a forward pass through the model. - if self.shard.is_last_layer(): - hidden_states = self.model.transformer.ln_f(hidden_states.to(self.device)) - logits = self.model.lm_head(hidden_states) - return logits, new_past_key_values - else: - return hidden_states, new_past_key_values + Args: + input_ids (torch.Tensor): Input token IDs. + past_key_values (list, optional): List of past key-value states for attention layers. - def _get_initial_hidden_states(self, input_ids): - input_embeds = self.model.transformer.wte(input_ids.to(self.device)) - position_embeds = self.model.transformer.wpe(torch.arange(input_ids.shape[1], device=self.device)) - return input_embeds + position_embeds + Returns: + tuple: Output logits or hidden states and the new past key-values. + """ + if past_key_values is None: + past_key_values = [None] * len(self.layers) - def _process_layers(self, hidden_states, past_key_values): + # Token and position embeddings + hidden_states = self.embed_tokens(input_ids) + self.embed_positions(input_ids) + + # Apply each layer in this shard new_past_key_values = [] for i, layer in enumerate(self.layers): - layer_past = past_key_values[i] if past_key_values else None + layer_past = past_key_values[i] hidden_states, new_layer_past = layer(hidden_states, past_key_values=layer_past) new_past_key_values.append(new_layer_past) - return hidden_states, new_past_key_values + + if self.shard.is_last_layer(): + # Apply final layer norm and compute logits + hidden_states = self.norm(hidden_states) + logits = self.lm_head(hidden_states) + return logits, new_past_key_values + else: + return hidden_states, new_past_key_values From a0476568cac167dedc60c3e4d9aea3b146ff178a Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 6 Aug 2024 22:34:11 -0800 Subject: [PATCH 013/589] adding back set_on_download_progress --- exo/inference/pytorch/inference.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 0fe932bc1..d73e3a754 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -219,3 +219,14 @@ async def ensure_shard(self, shard: Optional[Shard]): self.tokenizer = AutoTokenizer.from_pretrained(model_path) self.shard = shard + + def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): + """ + Set a callback function to track download progress. + + Args: + on_download_progress (Callable[[int, int], None]): Callback function to track progress. + """ + # must have this function or inference engine breaks + # This method can be implemented if progress tracking is needed + pass From 36f675e99aa8eda00cf794d9bad54ef45ad4b90c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 6 Aug 2024 22:35:38 -0800 Subject: [PATCH 014/589] fixing model path --- exo/inference/pytorch/inference.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index d73e3a754..8ca34f48f 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -173,10 +173,7 @@ async def ensure_shard(self, shard: Optional[Shard]): if self.shard == shard: return - model_path = Path(self.model_name) - models_dir = Path(__file__).parent / "temp_model_dir" - model_path = models_dir / shard.model_id - + model_path = Path(f".cache/{shard.model_id}") if not model_path.exists(): os.makedirs(model_path, exist_ok=True) From 103a6bc3a934cc85cc4d34e8c9550ec59eee1667 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 6 Aug 2024 22:41:01 -0800 Subject: [PATCH 015/589] trying to fix output changing from numpy to string at end of layer --- exo/inference/pytorch/inference.py | 5 +++-- exo/inference/pytorch/model/llama.py | 14 +++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 8ca34f48f..38ab500ca 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -9,6 +9,7 @@ from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.helpers import download_files +import logging # Default settings TEMPERATURE = 0.7 @@ -19,7 +20,7 @@ class PyTorchDynamicShardInferenceEngine(InferenceEngine): PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. """ - def __init__(self, debug: bool = False): + def __init__(self, debug: bool = True): """ Initialize the inference engine. @@ -70,7 +71,7 @@ async def infer_prompt( new_inference_state = json.dumps({"past_key_values": self._save_kv_cache(past_key_values)}) if self.debug: - print(f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") + logging.info(f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") return output_data, new_inference_state, is_eos diff --git a/exo/inference/pytorch/model/llama.py b/exo/inference/pytorch/model/llama.py index 01a694454..b64908444 100644 --- a/exo/inference/pytorch/model/llama.py +++ b/exo/inference/pytorch/model/llama.py @@ -2,6 +2,7 @@ import torch.nn as nn from transformers.models.llama.modeling_llama import LlamaForCausalLM from exo.inference.shard import Shard +from transformers import Cache class ShardedLLAMAModel(nn.Module): """ @@ -41,22 +42,25 @@ def forward(self, input_ids, past_key_values=None): Args: input_ids (torch.Tensor): Input token IDs. - past_key_values (list, optional): List of past key-value states for attention layers. + past_key_values (Cache, optional): Cache object for past key-value states. Returns: tuple: Output logits or hidden states and the new past key-values. """ if past_key_values is None: - past_key_values = [None] * len(self.layers) + past_key_values = Cache() # Token and position embeddings hidden_states = self.embed_tokens(input_ids) + self.embed_positions(input_ids) # Apply each layer in this shard - new_past_key_values = [] + new_past_key_values = Cache() for i, layer in enumerate(self.layers): - layer_past = past_key_values[i] - hidden_states, new_layer_past = layer(hidden_states, past_key_values=layer_past) + layer_past = past_key_values[i] if past_key_values else None + hidden_states, new_layer_past = layer( + hidden_states, + past_key_values=layer_past + ) new_past_key_values.append(new_layer_past) if self.shard.is_last_layer(): From b7fd4ed7d9f7148818a24db9bb0cb5aaa88c442b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 6 Aug 2024 22:45:44 -0800 Subject: [PATCH 016/589] using print for debugging for now, fixed using cache for past kvs --- exo/inference/pytorch/inference.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 38ab500ca..0840893a6 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -5,7 +5,7 @@ import numpy as np from pathlib import Path from typing import Optional, Callable, Tuple -from transformers import AutoTokenizer, LlamaForCausalLM +from transformers import AutoTokenizer, LlamaForCausalLM, Cache from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.helpers import download_files @@ -71,7 +71,7 @@ async def infer_prompt( new_inference_state = json.dumps({"past_key_values": self._save_kv_cache(past_key_values)}) if self.debug: - logging.info(f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") + print(f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") return output_data, new_inference_state, is_eos @@ -146,11 +146,14 @@ def _load_kv_cache(self, past_key_values_list): past_key_values_list (list): List of past key-value tensors. Returns: - list: List of loaded past key-value tensors. + Cache: Loaded past key-value cache. """ if past_key_values_list is None: - return None - return [torch.tensor(kv, device=self.model.device) for kv in past_key_values_list] + return Cache() + cache = Cache() + for kv in past_key_values_list: + cache.append(torch.tensor(kv, device=self.model.device)) + return cache def _save_kv_cache(self, past_key_values): """ From 034bd5af743b315ab246844706e40ad951130d0d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 6 Aug 2024 22:49:25 -0800 Subject: [PATCH 017/589] trying to get some logging out --- exo/inference/pytorch/inference.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 0840893a6..675b65637 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -10,6 +10,8 @@ from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.helpers import download_files import logging +logging.basicConfig() +logging.getLogger("pytorch.inference").setLevel( logging.INFO ) # Default settings TEMPERATURE = 0.7 @@ -71,7 +73,8 @@ async def infer_prompt( new_inference_state = json.dumps({"past_key_values": self._save_kv_cache(past_key_values)}) if self.debug: - print(f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") + logging.info( + f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") return output_data, new_inference_state, is_eos From f5b4056adc19e8f6e80c9ca2eb497015c6acdbd1 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 6 Aug 2024 22:50:24 -0800 Subject: [PATCH 018/589] lowering log level --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 675b65637..df6585b25 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -11,7 +11,7 @@ from exo.inference.pytorch.helpers import download_files import logging logging.basicConfig() -logging.getLogger("pytorch.inference").setLevel( logging.INFO ) +logging.getLogger("pytorch.inference").setLevel(logging.DEBUG) # Default settings TEMPERATURE = 0.7 From 58443557e0ad747fac5889675cce465704d0e9c7 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 6 Aug 2024 22:55:08 -0800 Subject: [PATCH 019/589] trying to get testing to show output still --- exo/inference/pytorch/test_inference_engine.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index 3be152762..d2165baf0 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -2,12 +2,14 @@ import asyncio from exo.inference.shard import Shard from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine +import logging +logging.basicConfig() +logging.getLogger("pytorch.inference.test_engine").setLevel(logging.DEBUG) class TestPyTorchDynamicShardInferenceEngine(unittest.TestCase): @classmethod def setUpClass(cls): - # Create a shard cls.shard = Shard( model_id="llama3-8b-sfr", @@ -23,6 +25,8 @@ def test_infer_prompt(self): # Prepare the prompt prompt = "Why is the sky blue?" + logging.info(f"Testing infer_prompt with prompt {prompt}") + # Run inference loop = asyncio.get_event_loop() output_data, new_inference_state, is_eos = loop.run_until_complete( From d015088e0cf5847ab2d25bac2887f5ee368c8824 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 6 Aug 2024 22:57:39 -0800 Subject: [PATCH 020/589] logging --- exo/inference/pytorch/test_inference_engine.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index d2165baf0..1748c77f0 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -3,8 +3,7 @@ from exo.inference.shard import Shard from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine import logging -logging.basicConfig() -logging.getLogger("pytorch.inference.test_engine").setLevel(logging.DEBUG) + class TestPyTorchDynamicShardInferenceEngine(unittest.TestCase): @@ -56,4 +55,6 @@ def test_infer_prompt(self): # self.assertFalse(is_eos) if __name__ == '__main__': + logging.basicConfig() + logging.getLogger("pytorch.inference.test_engine").setLevel(logging.DEBUG) unittest.main() From f0e51bc91ea873d9d3aff905601ff402fbbb6c97 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 6 Aug 2024 23:09:07 -0800 Subject: [PATCH 021/589] logging --- exo/inference/pytorch/test_inference_engine.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index 1748c77f0..aa4aab8e9 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -21,10 +21,12 @@ def setUpClass(cls): cls.engine = PyTorchDynamicShardInferenceEngine(debug=True) def test_infer_prompt(self): + log = logging.getLogger("pytorch.inference.test_engine") + # Prepare the prompt prompt = "Why is the sky blue?" - logging.info(f"Testing infer_prompt with prompt {prompt}") + log.info(f"Testing infer_prompt with prompt {prompt}") # Run inference loop = asyncio.get_event_loop() From 4d4362101f98372821eb863ab2b42bffe252bc84 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 6 Aug 2024 23:12:11 -0800 Subject: [PATCH 022/589] logging --- exo/inference/pytorch/inference.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index df6585b25..9601773f6 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -31,6 +31,7 @@ def __init__(self, debug: bool = True): """ self.shard = None self.debug = debug + self.log = logging.getLogger("pytorch.inference") async def infer_prompt( self, @@ -73,7 +74,7 @@ async def infer_prompt( new_inference_state = json.dumps({"past_key_values": self._save_kv_cache(past_key_values)}) if self.debug: - logging.info( + self.log.info( f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") return output_data, new_inference_state, is_eos @@ -119,7 +120,7 @@ async def infer_tensor( new_inference_state = json.dumps({"past_key_values": self._save_kv_cache(past_key_values)}) if self.debug: - print(f"Infer Tensor Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") + self.log.info(f"Infer Tensor Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") return output_data, new_inference_state, is_eos From 002d02bd5eb657a22c6592ddabab6174744e2130 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 6 Aug 2024 23:31:22 -0800 Subject: [PATCH 023/589] updating to go through each layer instead of whole model --- exo/inference/pytorch/inference.py | 3 +-- exo/inference/pytorch/model/llama.py | 34 ++++++++-------------------- 2 files changed, 10 insertions(+), 27 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 9601773f6..6328339ca 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -1,5 +1,4 @@ import os -import shutil import json import torch import numpy as np @@ -10,6 +9,7 @@ from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.helpers import download_files import logging + logging.basicConfig() logging.getLogger("pytorch.inference").setLevel(logging.DEBUG) @@ -232,6 +232,5 @@ def set_on_download_progress(self, on_download_progress: Callable[[int, int], No Args: on_download_progress (Callable[[int, int], None]): Callback function to track progress. """ - # must have this function or inference engine breaks # This method can be implemented if progress tracking is needed pass diff --git a/exo/inference/pytorch/model/llama.py b/exo/inference/pytorch/model/llama.py index b64908444..f6427e025 100644 --- a/exo/inference/pytorch/model/llama.py +++ b/exo/inference/pytorch/model/llama.py @@ -2,26 +2,14 @@ import torch.nn as nn from transformers.models.llama.modeling_llama import LlamaForCausalLM from exo.inference.shard import Shard -from transformers import Cache class ShardedLLAMAModel(nn.Module): - """ - Sharded LLAMA Model for performing inference with a subset of model layers. - """ - def __init__(self, model_path: str, shard: Shard): - """ - Initialize the ShardedLLAMAModel. - - Args: - model_path (str): Path to the pretrained model. - shard (Shard): Shard information indicating which layers to include. - """ super(ShardedLLAMAModel, self).__init__() self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.shard = shard - # Load the full model and move to device + # Load the full model self.full_model = LlamaForCausalLM.from_pretrained(model_path) self.full_model.to(self.device) @@ -36,35 +24,31 @@ def __init__(self, model_path: str, shard: Shard): self.norm = self.full_model.model.norm self.lm_head = self.full_model.lm_head - def forward(self, input_ids, past_key_values=None): + def forward_layers(self, input_ids, past_key_values=None): """ - Perform a forward pass through the model. + Forward pass through the specified layers. Args: input_ids (torch.Tensor): Input token IDs. - past_key_values (Cache, optional): Cache object for past key-value states. + past_key_values (list, optional): Past key values for caching. Returns: - tuple: Output logits or hidden states and the new past key-values. + tuple: Hidden states and new past key values. """ if past_key_values is None: - past_key_values = Cache() + past_key_values = [None] * len(self.layers) # Token and position embeddings hidden_states = self.embed_tokens(input_ids) + self.embed_positions(input_ids) # Apply each layer in this shard - new_past_key_values = Cache() + new_past_key_values = [] for i, layer in enumerate(self.layers): - layer_past = past_key_values[i] if past_key_values else None - hidden_states, new_layer_past = layer( - hidden_states, - past_key_values=layer_past - ) + layer_past = past_key_values[i] + hidden_states, new_layer_past = layer(hidden_states, past_key_values=layer_past, use_cache=True) new_past_key_values.append(new_layer_past) if self.shard.is_last_layer(): - # Apply final layer norm and compute logits hidden_states = self.norm(hidden_states) logits = self.lm_head(hidden_states) return logits, new_past_key_values From 1f5c45ebc525647a77e440b6e8c02fd58bb65495 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 6 Aug 2024 23:41:16 -0800 Subject: [PATCH 024/589] fixing forward pass through specific layers --- exo/inference/pytorch/inference.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 6328339ca..eeb3cc4f0 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -1,13 +1,16 @@ +# experimental, based off of tinygrad/inference.py + import os import json import torch import numpy as np from pathlib import Path from typing import Optional, Callable, Tuple -from transformers import AutoTokenizer, LlamaForCausalLM, Cache +from transformers import AutoTokenizer, Cache from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.helpers import download_files +from exo.inference.pytorch.model.llama import ShardedLLAMAModel import logging logging.basicConfig() @@ -60,7 +63,7 @@ async def infer_prompt( if inference_state: past_key_values = self._load_kv_cache(json.loads(inference_state).get("past_key_values")) - output, past_key_values = self.model(input_ids, past_key_values=past_key_values) + output, past_key_values = self.model.forward_layers(input_ids, past_key_values=past_key_values) if self.shard.is_last_layer(): logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) @@ -106,7 +109,7 @@ async def infer_tensor( if inference_state: past_key_values = self._load_kv_cache(json.loads(inference_state).get("past_key_values")) - output, past_key_values = self.model(input_tensor, past_key_values=past_key_values) + output, past_key_values = self.model.forward_layers(input_tensor, past_key_values=past_key_values) if self.shard.is_last_layer(): logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) @@ -215,12 +218,10 @@ async def ensure_shard(self, shard: Optional[Shard]): else: raise ValueError(f"Unsupported model: {shard.model_id}") - # Load model and tokenizer from the downloaded files - # This is written for llama model but need to add in option for others - self.model = LlamaForCausalLM.from_pretrained( - model_path, - torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32) + # Load the sharded model + self.model = ShardedLLAMAModel(model_path, shard) + # Load tokenizer self.tokenizer = AutoTokenizer.from_pretrained(model_path) self.shard = shard From 378871620888734653bd5f14e3bb99d7b6897ab8 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 6 Aug 2024 23:46:24 -0800 Subject: [PATCH 025/589] adding pytorch data parallel for multi gpu support --- exo/inference/pytorch/inference.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index eeb3cc4f0..4ab0c3607 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -219,7 +219,12 @@ async def ensure_shard(self, shard: Optional[Shard]): raise ValueError(f"Unsupported model: {shard.model_id}") # Load the sharded model - self.model = ShardedLLAMAModel(model_path, shard) + sharded_model = ShardedLLAMAModel(model_path, shard) + + # Use DataParallel for multi-GPU support + self.model = torch.nn.DataParallel(sharded_model) + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.model.to(self.device) # Load tokenizer self.tokenizer = AutoTokenizer.from_pretrained(model_path) From c2be3640f7dcaf512e8327c4ddde9c5abb472ae5 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 6 Aug 2024 23:51:42 -0800 Subject: [PATCH 026/589] add in device ids --- exo/inference/pytorch/inference.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 4ab0c3607..f0b565bcb 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -222,7 +222,8 @@ async def ensure_shard(self, shard: Optional[Shard]): sharded_model = ShardedLLAMAModel(model_path, shard) # Use DataParallel for multi-GPU support - self.model = torch.nn.DataParallel(sharded_model) + device_ids = [i for i in range(torch.cuda.device_count())] + self.model = torch.nn.DataParallel(sharded_model, device_ids=device_ids) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model.to(self.device) From ad265d8e9c0afc97196de2c621d1db3628fd3ac4 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 00:02:14 -0800 Subject: [PATCH 027/589] adding distributed data parallel for pytorch to fix single gpu utilization issue --- exo/inference/pytorch/inference.py | 41 ++++++++--- .../pytorch/test_inference_engine.py | 71 +++++++++++-------- 2 files changed, 72 insertions(+), 40 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index f0b565bcb..b52285600 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -11,6 +11,9 @@ from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.helpers import download_files from exo.inference.pytorch.model.llama import ShardedLLAMAModel +import torch.distributed as dist +import torch.multiprocessing as mp +from torch.nn.parallel import DistributedDataParallel as DDP import logging logging.basicConfig() @@ -25,16 +28,34 @@ class PyTorchDynamicShardInferenceEngine(InferenceEngine): PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. """ - def __init__(self, debug: bool = True): + def __init__(self, debug: bool = True, rank: int = 0, world_size: int = 1): """ Initialize the inference engine. Args: debug (bool): If True, enables debug logging. Defaults to False. + rank (int): Rank of the current process in distributed training. + world_size (int): Total number of processes in distributed training. """ self.shard = None self.debug = debug + self.rank = rank + self.world_size = world_size + self.device = torch.device(f"cuda:{rank}") self.log = logging.getLogger("pytorch.inference") + self.setup_distributed() + + def setup_distributed(self): + """ + Initialize the process group for distributed training. + """ + dist.init_process_group(backend='nccl', init_method='env://', world_size=self.world_size, rank=self.rank) + + def cleanup_distributed(self): + """ + Clean up the process group for distributed training. + """ + dist.destroy_process_group() async def infer_prompt( self, @@ -56,14 +77,14 @@ async def infer_prompt( """ await self.ensure_shard(shard) - input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.model.device) + input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device) # Continue the sequence if inference state exists past_key_values = None if inference_state: past_key_values = self._load_kv_cache(json.loads(inference_state).get("past_key_values")) - output, past_key_values = self.model.forward_layers(input_ids, past_key_values=past_key_values) + output, past_key_values = self.model(input_ids, past_key_values=past_key_values) if self.shard.is_last_layer(): logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) @@ -102,14 +123,14 @@ async def infer_tensor( """ await self.ensure_shard(shard) - input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.model.device) + input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.device) # Continue the sequence if inference state exists past_key_values = None if inference_state: past_key_values = self._load_kv_cache(json.loads(inference_state).get("past_key_values")) - output, past_key_values = self.model.forward_layers(input_tensor, past_key_values=past_key_values) + output, past_key_values = self.model(input_tensor, past_key_values=past_key_values) if self.shard.is_last_layer(): logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) @@ -159,7 +180,7 @@ def _load_kv_cache(self, past_key_values_list): return Cache() cache = Cache() for kv in past_key_values_list: - cache.append(torch.tensor(kv, device=self.model.device)) + cache.append(torch.tensor(kv, device=self.device)) return cache def _save_kv_cache(self, past_key_values): @@ -221,11 +242,9 @@ async def ensure_shard(self, shard: Optional[Shard]): # Load the sharded model sharded_model = ShardedLLAMAModel(model_path, shard) - # Use DataParallel for multi-GPU support - device_ids = [i for i in range(torch.cuda.device_count())] - self.model = torch.nn.DataParallel(sharded_model, device_ids=device_ids) + # Use DistributedDataParallel for multi-GPU support + self.model = DDP(sharded_model.to(self.device), device_ids=[self.rank], output_device=self.rank) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - self.model.to(self.device) # Load tokenizer self.tokenizer = AutoTokenizer.from_pretrained(model_path) @@ -240,4 +259,4 @@ def set_on_download_progress(self, on_download_progress: Callable[[int, int], No on_download_progress (Callable[[int, int], None]): Callback function to track progress. """ # This method can be implemented if progress tracking is needed - pass + pass \ No newline at end of file diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index aa4aab8e9..0309c5ffe 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -1,14 +1,15 @@ import unittest +import torch import asyncio +import torch.multiprocessing as mp from exo.inference.shard import Shard from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine -import logging - class TestPyTorchDynamicShardInferenceEngine(unittest.TestCase): - @classmethod def setUpClass(cls): + cls.world_size = torch.cuda.device_count() + # Create a shard cls.shard = Shard( model_id="llama3-8b-sfr", @@ -17,46 +18,58 @@ def setUpClass(cls): n_layers=12 ) - # Initialize the inference engine - cls.engine = PyTorchDynamicShardInferenceEngine(debug=True) + def run_engine(rank, world_size, shard, queue): + """ + Run the inference engine in a distributed setting. + """ + # Initialize the engine + engine = PyTorchDynamicShardInferenceEngine(debug=True, rank=rank, world_size=world_size) - def test_infer_prompt(self): - log = logging.getLogger("pytorch.inference.test_engine") + # Run ensure_shard to set up the model + asyncio.run(engine.ensure_shard(shard)) # Prepare the prompt prompt = "Why is the sky blue?" - log.info(f"Testing infer_prompt with prompt {prompt}") - # Run inference - loop = asyncio.get_event_loop() - output_data, new_inference_state, is_eos = loop.run_until_complete( - self.engine.infer_prompt( - request_id="test_request", shard=self.shard, prompt=prompt + output_data, new_inference_state, is_eos = asyncio.run( + engine.infer_prompt( + request_id="test_request", shard=shard, prompt=prompt ) ) + # Put results in the queue to be checked in the test + queue.put((output_data, new_inference_state, is_eos)) + + def test_infer_prompt(self): + """ + Test the inference on a text prompt in a distributed setting. + """ + mp.set_start_method('spawn') + queue = mp.Queue() + + processes = [] + for rank in range(self.world_size): + p = mp.Process(target=self.run_engine, args=(rank, self.world_size, self.shard, queue)) + p.start() + processes.append(p) + + for p in processes: + p.join() + + output_data, new_inference_state, is_eos = queue.get() + # Assertions self.assertIsNotNone(output_data) self.assertIsNotNone(new_inference_state) self.assertFalse(is_eos) - # def test_infer_tensor(self): - # # Prepare the input tensor - # input_ids = self.tokenizer.encode("Hello, world!", return_tensors="pt").numpy() - - # # Run inference - # loop = asyncio.get_event_loop() - # output_data, new_inference_state, is_eos = loop.run_until_complete(self.engine.infer_tensor( - # request_id="test_request", shard=self.shard, input_data=input_ids - # )) - - # # Assertions - # self.assertIsNotNone(output_data) - # self.assertIsNotNone(new_inference_state) - # self.assertFalse(is_eos) + @classmethod + def tearDownClass(cls): + """ + Clean up after the test. + """ + mp.set_start_method('fork', force=True) # Reset the multiprocessing start method to default if __name__ == '__main__': - logging.basicConfig() - logging.getLogger("pytorch.inference.test_engine").setLevel(logging.DEBUG) unittest.main() From 3e21a5d91b6265779bea86166351a67d88763635 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 00:11:15 -0800 Subject: [PATCH 028/589] ddp fixes --- exo/inference/pytorch/inference.py | 7 +- .../pytorch/test_inference_engine.py | 72 ++++++++++++------- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index b52285600..cc4370927 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -49,7 +49,12 @@ def setup_distributed(self): """ Initialize the process group for distributed training. """ - dist.init_process_group(backend='nccl', init_method='env://', world_size=self.world_size, rank=self.rank) + dist.init_process_group( + backend='nccl', + init_method='env://', + world_size=self.world_size, + rank=self.rank + ) def cleanup_distributed(self): """ diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index 0309c5ffe..39fdd54b2 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -1,15 +1,58 @@ import unittest import torch -import asyncio import torch.multiprocessing as mp +import torch.distributed as dist +import os +import asyncio from exo.inference.shard import Shard from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine +def setup(rank, world_size): + """ + Set up the distributed environment. + """ + os.environ['MASTER_ADDR'] = 'localhost' + os.environ['MASTER_PORT'] = '12355' + dist.init_process_group("nccl", rank=rank, world_size=world_size) + +def cleanup(): + """ + Clean up the distributed environment. + """ + dist.destroy_process_group() + +def run_engine(rank, world_size, shard, queue): + """ + Run the inference engine in a distributed setting. + """ + setup(rank, world_size) + + # Initialize the engine + engine = PyTorchDynamicShardInferenceEngine(debug=True) + + # Run ensure_shard to set up the model + asyncio.run(engine.ensure_shard(shard)) + + # Prepare the prompt + prompt = "Why is the sky blue?" + + # Run inference + output_data, new_inference_state, is_eos = asyncio.run( + engine.infer_prompt( + request_id="test_request", shard=shard, prompt=prompt + ) + ) + + # Put results in the queue to be checked in the test + queue.put((output_data, new_inference_state, is_eos)) + + cleanup() + class TestPyTorchDynamicShardInferenceEngine(unittest.TestCase): @classmethod def setUpClass(cls): cls.world_size = torch.cuda.device_count() - + # Create a shard cls.shard = Shard( model_id="llama3-8b-sfr", @@ -18,29 +61,6 @@ def setUpClass(cls): n_layers=12 ) - def run_engine(rank, world_size, shard, queue): - """ - Run the inference engine in a distributed setting. - """ - # Initialize the engine - engine = PyTorchDynamicShardInferenceEngine(debug=True, rank=rank, world_size=world_size) - - # Run ensure_shard to set up the model - asyncio.run(engine.ensure_shard(shard)) - - # Prepare the prompt - prompt = "Why is the sky blue?" - - # Run inference - output_data, new_inference_state, is_eos = asyncio.run( - engine.infer_prompt( - request_id="test_request", shard=shard, prompt=prompt - ) - ) - - # Put results in the queue to be checked in the test - queue.put((output_data, new_inference_state, is_eos)) - def test_infer_prompt(self): """ Test the inference on a text prompt in a distributed setting. @@ -50,7 +70,7 @@ def test_infer_prompt(self): processes = [] for rank in range(self.world_size): - p = mp.Process(target=self.run_engine, args=(rank, self.world_size, self.shard, queue)) + p = mp.Process(target=run_engine, args=(rank, self.world_size, self.shard, queue)) p.start() processes.append(p) From 93d838fa34c329997adbea9f0a77d5db75cb1a91 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 00:14:11 -0800 Subject: [PATCH 029/589] update testing --- exo/inference/pytorch/test_inference_engine.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index 39fdd54b2..7ac659a9e 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -2,8 +2,9 @@ import torch import torch.multiprocessing as mp import torch.distributed as dist -import os import asyncio +import os +from transformers import AutoTokenizer from exo.inference.shard import Shard from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine @@ -65,7 +66,7 @@ def test_infer_prompt(self): """ Test the inference on a text prompt in a distributed setting. """ - mp.set_start_method('spawn') + mp.set_start_method('spawn', force=True) queue = mp.Queue() processes = [] From 6c0f6af61720d35f47b32c902c1e5f768a334a10 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 00:16:30 -0800 Subject: [PATCH 030/589] fixing init issue --- exo/inference/pytorch/inference.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index cc4370927..7c5ff1268 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -47,20 +47,21 @@ def __init__(self, debug: bool = True, rank: int = 0, world_size: int = 1): def setup_distributed(self): """ - Initialize the process group for distributed training. + Set up the distributed environment. """ - dist.init_process_group( - backend='nccl', - init_method='env://', - world_size=self.world_size, - rank=self.rank - ) + if not dist.is_initialized(): + dist.init_process_group( + backend="nccl" if torch.cuda.is_available() else "gloo", + rank=self.rank, + world_size=self.world_size + ) def cleanup_distributed(self): """ - Clean up the process group for distributed training. + Clean up the distributed environment. """ - dist.destroy_process_group() + if dist.is_initialized(): + dist.destroy_process_group() async def infer_prompt( self, From db1786b3d0b28f9749122064c532d81f2e87f524 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 00:38:40 -0800 Subject: [PATCH 031/589] going back to DataParallel, using device map auto for from_pretrained --- exo/inference/pytorch/inference.py | 45 +++++----- .../pytorch/test_inference_engine.py | 86 ++++--------------- 2 files changed, 39 insertions(+), 92 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 7c5ff1268..b7cee23ed 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -1,19 +1,18 @@ # experimental, based off of tinygrad/inference.py import os +import shutil import json import torch -import numpy as np +import torch.distributed as dist +import torch.multiprocessing as mp +import torch.nn as nn from pathlib import Path from typing import Optional, Callable, Tuple -from transformers import AutoTokenizer, Cache +from transformers import AutoTokenizer, LlamaForCausalLM, Cache from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.helpers import download_files -from exo.inference.pytorch.model.llama import ShardedLLAMAModel -import torch.distributed as dist -import torch.multiprocessing as mp -from torch.nn.parallel import DistributedDataParallel as DDP import logging logging.basicConfig() @@ -28,22 +27,20 @@ class PyTorchDynamicShardInferenceEngine(InferenceEngine): PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. """ - def __init__(self, debug: bool = True, rank: int = 0, world_size: int = 1): + def __init__(self, debug: bool = True): """ Initialize the inference engine. Args: debug (bool): If True, enables debug logging. Defaults to False. - rank (int): Rank of the current process in distributed training. - world_size (int): Total number of processes in distributed training. """ self.shard = None self.debug = debug - self.rank = rank - self.world_size = world_size - self.device = torch.device(f"cuda:{rank}") self.log = logging.getLogger("pytorch.inference") - self.setup_distributed() + self.device_ids = list(range(torch.cuda.device_count())) + self.rank = int(os.getenv("RANK", "0")) + self.world_size = int(os.getenv("WORLD_SIZE", "1")) + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def setup_distributed(self): """ @@ -245,18 +242,23 @@ async def ensure_shard(self, shard: Optional[Shard]): else: raise ValueError(f"Unsupported model: {shard.model_id}") - # Load the sharded model - sharded_model = ShardedLLAMAModel(model_path, shard) - - # Use DistributedDataParallel for multi-GPU support - self.model = DDP(sharded_model.to(self.device), device_ids=[self.rank], output_device=self.rank) - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + # Load model and tokenizer from the downloaded files + # This is written for llama model but need to add in option for others + self.model = LlamaForCausalLM.from_pretrained( + model_path, + torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, + device_map="auto" + ) - # Load tokenizer self.tokenizer = AutoTokenizer.from_pretrained(model_path) + if torch.cuda.device_count() > 1: + self.model = nn.DataParallel(self.model, device_ids=self.device_ids) + + self.model.to(self.device) self.shard = shard + def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): """ Set a callback function to track download progress. @@ -264,5 +266,6 @@ def set_on_download_progress(self, on_download_progress: Callable[[int, int], No Args: on_download_progress (Callable[[int, int], None]): Callback function to track progress. """ + # must have this function or inference engine breaks # This method can be implemented if progress tracking is needed - pass \ No newline at end of file + pass diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index 7ac659a9e..d50e6d7d1 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -1,59 +1,13 @@ import unittest -import torch -import torch.multiprocessing as mp -import torch.distributed as dist import asyncio -import os -from transformers import AutoTokenizer from exo.inference.shard import Shard from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine -def setup(rank, world_size): - """ - Set up the distributed environment. - """ - os.environ['MASTER_ADDR'] = 'localhost' - os.environ['MASTER_PORT'] = '12355' - dist.init_process_group("nccl", rank=rank, world_size=world_size) - -def cleanup(): - """ - Clean up the distributed environment. - """ - dist.destroy_process_group() - -def run_engine(rank, world_size, shard, queue): - """ - Run the inference engine in a distributed setting. - """ - setup(rank, world_size) - - # Initialize the engine - engine = PyTorchDynamicShardInferenceEngine(debug=True) - - # Run ensure_shard to set up the model - asyncio.run(engine.ensure_shard(shard)) - - # Prepare the prompt - prompt = "Why is the sky blue?" - - # Run inference - output_data, new_inference_state, is_eos = asyncio.run( - engine.infer_prompt( - request_id="test_request", shard=shard, prompt=prompt - ) - ) - - # Put results in the queue to be checked in the test - queue.put((output_data, new_inference_state, is_eos)) - - cleanup() - class TestPyTorchDynamicShardInferenceEngine(unittest.TestCase): + @classmethod def setUpClass(cls): - cls.world_size = torch.cuda.device_count() - + # Create a shard cls.shard = Shard( model_id="llama3-8b-sfr", @@ -62,35 +16,25 @@ def setUpClass(cls): n_layers=12 ) - def test_infer_prompt(self): - """ - Test the inference on a text prompt in a distributed setting. - """ - mp.set_start_method('spawn', force=True) - queue = mp.Queue() - - processes = [] - for rank in range(self.world_size): - p = mp.Process(target=run_engine, args=(rank, self.world_size, self.shard, queue)) - p.start() - processes.append(p) - - for p in processes: - p.join() + # Initialize the inference engine + cls.engine = PyTorchDynamicShardInferenceEngine(debug=True) - output_data, new_inference_state, is_eos = queue.get() + def test_infer_prompt(self): + # Prepare the prompt + prompt = "Why is the sky blue?" + + # Run inference + loop = asyncio.get_event_loop() + output_data, new_inference_state, is_eos = loop.run_until_complete( + self.engine.infer_prompt( + request_id="test_request", shard=self.shard, prompt=prompt + ) + ) # Assertions self.assertIsNotNone(output_data) self.assertIsNotNone(new_inference_state) self.assertFalse(is_eos) - @classmethod - def tearDownClass(cls): - """ - Clean up after the test. - """ - mp.set_start_method('fork', force=True) # Reset the multiprocessing start method to default - if __name__ == '__main__': unittest.main() From 9eacb5a556abd31ddf00677021c1b8b311c4e8d3 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 00:42:02 -0800 Subject: [PATCH 032/589] numpy fix --- exo/inference/pytorch/inference.py | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index b7cee23ed..fc672dc0c 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -1,12 +1,10 @@ # experimental, based off of tinygrad/inference.py import os -import shutil import json import torch -import torch.distributed as dist -import torch.multiprocessing as mp import torch.nn as nn +import numpy as np from pathlib import Path from typing import Optional, Callable, Tuple from transformers import AutoTokenizer, LlamaForCausalLM, Cache @@ -42,24 +40,6 @@ def __init__(self, debug: bool = True): self.world_size = int(os.getenv("WORLD_SIZE", "1")) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - def setup_distributed(self): - """ - Set up the distributed environment. - """ - if not dist.is_initialized(): - dist.init_process_group( - backend="nccl" if torch.cuda.is_available() else "gloo", - rank=self.rank, - world_size=self.world_size - ) - - def cleanup_distributed(self): - """ - Clean up the distributed environment. - """ - if dist.is_initialized(): - dist.destroy_process_group() - async def infer_prompt( self, request_id: str, From 52eb966883b47459b70509d766d9c823b45eb2a7 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 01:06:28 -0800 Subject: [PATCH 033/589] using llama3 and hf repo with token --- exo/inference/pytorch/inference.py | 60 +++++------------------------ exo/inference/pytorch/model/hf.py | 61 ++++++++++++++++++++---------- 2 files changed, 50 insertions(+), 71 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index fc672dc0c..8dadc8df9 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -7,10 +7,10 @@ import numpy as np from pathlib import Path from typing import Optional, Callable, Tuple -from transformers import AutoTokenizer, LlamaForCausalLM, Cache +from transformers import AutoTokenizer, Cache from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine -from exo.inference.pytorch.helpers import download_files +from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel import logging logging.basicConfig() @@ -25,7 +25,7 @@ class PyTorchDynamicShardInferenceEngine(InferenceEngine): PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. """ - def __init__(self, debug: bool = True): + def __init__(self, model_name: str, debug: bool = True): """ Initialize the inference engine. @@ -33,9 +33,10 @@ def __init__(self, debug: bool = True): debug (bool): If True, enables debug logging. Defaults to False. """ self.shard = None + self.model = None + self.model_name = model_name if model_name else "meta-llama/Meta-Llama-3-8B" self.debug = debug self.log = logging.getLogger("pytorch.inference") - self.device_ids = list(range(torch.cuda.device_count())) self.rank = int(os.getenv("RANK", "0")) self.world_size = int(os.getenv("WORLD_SIZE", "1")) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -188,57 +189,14 @@ async def ensure_shard(self, shard: Optional[Shard]): if self.shard == shard: return - model_path = Path(f".cache/{shard.model_id}") - if not model_path.exists(): - os.makedirs(model_path, exist_ok=True) - - if shard.model_id.lower().find("llama3-8b-sfr") != -1: - num_files = 4 - urls = [ - f"https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/model-{(i+1):05d}-of-{num_files:05d}.safetensors" - for i in range(num_files) - ] - urls.extend([ - "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/config.json", - "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/raw/main/model.safetensors.index.json", - "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/special_tokens_map.json", - "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/tokenizer.json", - "https://huggingface.co/mlx-community/Meta-Llama-3-8B-Instruct/resolve/main/tokenizer_config.json" - ]) - - output_paths = [ - model_path / f"model-{(i+1):05d}-of-{num_files:05d}.safetensors" - for i in range(num_files) - ] - output_paths.extend([ - model_path / "config.json", - model_path / "model.safetensors.index.json", - model_path / "special_tokens_map.json", - model_path / "tokenizer.json", - model_path / "tokenizer_config.json" - ]) - - await download_files(urls, output_paths) - else: - raise ValueError(f"Unsupported model: {shard.model_id}") - # Load model and tokenizer from the downloaded files # This is written for llama model but need to add in option for others - self.model = LlamaForCausalLM.from_pretrained( - model_path, - torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, - device_map="auto" - ) - - self.tokenizer = AutoTokenizer.from_pretrained(model_path) - - if torch.cuda.device_count() > 1: - self.model = nn.DataParallel(self.model, device_ids=self.device_ids) - - self.model.to(self.device) + if not self.model: + self.model = ShardedHuggingFaceModel(self.model_name, shard) + self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) + self.shard = shard - def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): """ Set a callback function to track download progress. diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index ba838be86..861f259f7 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -12,9 +12,21 @@ def __init__(self, model_name: str, shard: Shard): super(ShardedHuggingFaceModel, self).__init__() self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.shard = shard + self.device_ids = list(range(torch.cuda.device_count())) # Load the model - self.model = AutoModelForCausalLM.from_pretrained(model_name) + if torch.cuda.device_count() > 1: + self.model = nn.DataParallel(AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, + device_map="auto" + )) + else: + self.model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, + device_map="auto" + ) # Only keep layers corresponding to this shard self.layers = nn.ModuleList([ @@ -23,30 +35,39 @@ def __init__(self, model_name: str, shard: Shard): logging.info(f"layers: {self.layers}") - self.model.transformer.wte.to(self.device) - self.model.transformer.wpe.to(self.device) + # Embeddings and final layer norm + self.embed_tokens = self.full_model.model.embed_tokens + self.embed_positions = self.full_model.model.embed_positions + self.norm = self.full_model.model.norm + self.lm_head = self.full_model.lm_head - def forward(self, input_ids, past_key_values=None): - hidden_states = self._get_initial_hidden_states(input_ids) - hidden_states, new_past_key_values = self._process_layers(hidden_states, past_key_values) + def forward_layers(self, input_ids, past_key_values=None): + """ + Forward pass through the specified layers. - if self.shard.is_last_layer(): - hidden_states = self.model.transformer.ln_f(hidden_states.to(self.device)) - logits = self.model.lm_head(hidden_states) - return logits, new_past_key_values - else: - return hidden_states, new_past_key_values + Args: + input_ids (torch.Tensor): Input token IDs. + past_key_values (list, optional): Past key values for caching. - def _get_initial_hidden_states(self, input_ids): - input_embeds = self.model.transformer.wte(input_ids.to(self.device)) - position_embeds = self.model.transformer.wpe(torch.arange(input_ids.shape[1], device=self.device)) - return input_embeds + position_embeds + Returns: + tuple: Hidden states and new past key values. + """ + if past_key_values is None: + past_key_values = [None] * len(self.layers) - def _process_layers(self, hidden_states, past_key_values): + # Token and position embeddings + hidden_states = self.embed_tokens(input_ids) + self.embed_positions(input_ids) + + # Apply each layer in this shard new_past_key_values = [] for i, layer in enumerate(self.layers): - layer_past = past_key_values[i] if past_key_values else None - hidden_states, new_layer_past = layer(hidden_states, past_key_values=layer_past) + layer_past = past_key_values[i] + hidden_states, new_layer_past = layer(hidden_states, past_key_values=layer_past, use_cache=True) new_past_key_values.append(new_layer_past) - return hidden_states, new_past_key_values + if self.shard.is_last_layer(): + hidden_states = self.norm(hidden_states) + logits = self.lm_head(hidden_states) + return logits, new_past_key_values + else: + return hidden_states, new_past_key_values \ No newline at end of file From e9b931fb0cf214845676fbc0f14044cd11dd0ecb Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 01:08:25 -0800 Subject: [PATCH 034/589] fixing test --- exo/inference/pytorch/test_inference_engine.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index d50e6d7d1..96889919b 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -10,14 +10,17 @@ def setUpClass(cls): # Create a shard cls.shard = Shard( - model_id="llama3-8b-sfr", + model_id="meta-llama/Meta-Llama-3-8B", start_layer=0, end_layer=0, n_layers=12 ) # Initialize the inference engine - cls.engine = PyTorchDynamicShardInferenceEngine(debug=True) + cls.engine = PyTorchDynamicShardInferenceEngine( + cls.shard.model_id, + debug=True + ) def test_infer_prompt(self): # Prepare the prompt From 0f3787091abc2e8637ba1c97497a23f2a3178951 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 01:14:34 -0800 Subject: [PATCH 035/589] removing nn distributed for less complexity and just using device_map with accelerate --- exo/inference/pytorch/inference.py | 3 +-- exo/inference/pytorch/model/hf.py | 17 +++++------------ 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 8dadc8df9..f85daf22e 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -5,7 +5,6 @@ import torch import torch.nn as nn import numpy as np -from pathlib import Path from typing import Optional, Callable, Tuple from transformers import AutoTokenizer, Cache from exo.inference.shard import Shard @@ -194,7 +193,7 @@ async def ensure_shard(self, shard: Optional[Shard]): if not self.model: self.model = ShardedHuggingFaceModel(self.model_name, shard) self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) - + self.shard = shard def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 861f259f7..57896e335 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -15,18 +15,11 @@ def __init__(self, model_name: str, shard: Shard): self.device_ids = list(range(torch.cuda.device_count())) # Load the model - if torch.cuda.device_count() > 1: - self.model = nn.DataParallel(AutoModelForCausalLM.from_pretrained( - model_name, - torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, - device_map="auto" - )) - else: - self.model = AutoModelForCausalLM.from_pretrained( - model_name, - torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, - device_map="auto" - ) + self.model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, + device_map="auto" + ) # Only keep layers corresponding to this shard self.layers = nn.ModuleList([ From 11085bebe7a1a8f3ad17f525737a7855e9b1e40d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 01:16:36 -0800 Subject: [PATCH 036/589] fixing layers call --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 57896e335..f2b9cf581 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -23,7 +23,7 @@ def __init__(self, model_name: str, shard: Shard): # Only keep layers corresponding to this shard self.layers = nn.ModuleList([ - self.model.transformer.h[i] for i in range(shard.start_layer, shard.end_layer + 1) + self.full_model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) ]) logging.info(f"layers: {self.layers}") From fca4cd060ec399c65105cbf69345c6009dec2450 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 01:17:51 -0800 Subject: [PATCH 037/589] fixing model call --- exo/inference/pytorch/model/hf.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index f2b9cf581..4fdc36621 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -23,16 +23,16 @@ def __init__(self, model_name: str, shard: Shard): # Only keep layers corresponding to this shard self.layers = nn.ModuleList([ - self.full_model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) + self.model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) ]) logging.info(f"layers: {self.layers}") # Embeddings and final layer norm - self.embed_tokens = self.full_model.model.embed_tokens - self.embed_positions = self.full_model.model.embed_positions - self.norm = self.full_model.model.norm - self.lm_head = self.full_model.lm_head + self.embed_tokens = self.model.model.embed_tokens + self.embed_positions = self.model.model.embed_positions + self.norm = self.model.model.norm + self.lm_head = self.model.lm_head def forward_layers(self, input_ids, past_key_values=None): """ From 3dbf14754b5626662f2d0804b33ce56313ccbb53 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 01:24:18 -0800 Subject: [PATCH 038/589] fixing model --- exo/inference/pytorch/model/hf.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 4fdc36621..35885b0d4 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -22,17 +22,14 @@ def __init__(self, model_name: str, shard: Shard): ) # Only keep layers corresponding to this shard - self.layers = nn.ModuleList([ - self.model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) - ]) - + self.layers = self.model.layers logging.info(f"layers: {self.layers}") # Embeddings and final layer norm - self.embed_tokens = self.model.model.embed_tokens - self.embed_positions = self.model.model.embed_positions - self.norm = self.model.model.norm - self.lm_head = self.model.lm_head + self.embed_tokens = self.model.embed_tokens + self.embed_positions = self.model.embed_positions + self.norm = self.model.norm + self.lm_head = self.lm_head def forward_layers(self, input_ids, past_key_values=None): """ From 904b3c9c30243819331808852308860f2519dd88 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 01:27:29 -0800 Subject: [PATCH 039/589] fixing get layers --- exo/inference/pytorch/model/hf.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 35885b0d4..a0be18b9b 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -21,8 +21,10 @@ def __init__(self, model_name: str, shard: Shard): device_map="auto" ) - # Only keep layers corresponding to this shard - self.layers = self.model.layers + # Extract only the layers for this shard + self.layers = nn.ModuleList([ + self.full_model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) + ]) logging.info(f"layers: {self.layers}") # Embeddings and final layer norm From 61dcf262283b721e4ee15b8e4186344b0bbdf52f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 01:28:22 -0800 Subject: [PATCH 040/589] fixing get layers --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index a0be18b9b..03192c818 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -23,7 +23,7 @@ def __init__(self, model_name: str, shard: Shard): # Extract only the layers for this shard self.layers = nn.ModuleList([ - self.full_model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) + self.model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) ]) logging.info(f"layers: {self.layers}") From c7ef7d76cb1d518e56d46d1ece271060def3aa1d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 01:29:32 -0800 Subject: [PATCH 041/589] fixing get layers --- exo/inference/pytorch/model/hf.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 03192c818..448a8f8ec 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -15,7 +15,7 @@ def __init__(self, model_name: str, shard: Shard): self.device_ids = list(range(torch.cuda.device_count())) # Load the model - self.model = AutoModelForCausalLM.from_pretrained( + self.full_model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" @@ -23,15 +23,14 @@ def __init__(self, model_name: str, shard: Shard): # Extract only the layers for this shard self.layers = nn.ModuleList([ - self.model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) + self.full_model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) ]) - logging.info(f"layers: {self.layers}") - + # Embeddings and final layer norm - self.embed_tokens = self.model.embed_tokens - self.embed_positions = self.model.embed_positions - self.norm = self.model.norm - self.lm_head = self.lm_head + self.embed_tokens = self.full_model.model.embed_tokens + self.embed_positions = self.full_model.model.embed_positions + self.norm = self.full_model.model.norm + self.lm_head = self.full_model.lm_head def forward_layers(self, input_ids, past_key_values=None): """ From f760cba5a62d28c7f1e7dff3f92672ee5d66e764 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 01:45:44 -0800 Subject: [PATCH 042/589] updated sharded hf model --- exo/inference/pytorch/model/hf.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 448a8f8ec..3eeb053a5 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -28,7 +28,6 @@ def __init__(self, model_name: str, shard: Shard): # Embeddings and final layer norm self.embed_tokens = self.full_model.model.embed_tokens - self.embed_positions = self.full_model.model.embed_positions self.norm = self.full_model.model.norm self.lm_head = self.full_model.lm_head @@ -46,8 +45,8 @@ def forward_layers(self, input_ids, past_key_values=None): if past_key_values is None: past_key_values = [None] * len(self.layers) - # Token and position embeddings - hidden_states = self.embed_tokens(input_ids) + self.embed_positions(input_ids) + # Token embeddings + hidden_states = self.embed_tokens(input_ids) # Apply each layer in this shard new_past_key_values = [] From 073f094c7102036e43ede640febf206d6dd44802 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 01:56:11 -0800 Subject: [PATCH 043/589] fix model call --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index f85daf22e..7b72b8eb0 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -67,7 +67,7 @@ async def infer_prompt( if inference_state: past_key_values = self._load_kv_cache(json.loads(inference_state).get("past_key_values")) - output, past_key_values = self.model(input_ids, past_key_values=past_key_values) + output, past_key_values = self.model.full_model(input_ids, past_key_values=past_key_values) if self.shard.is_last_layer(): logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) From f54f8b8df6414685e70feb1074b9ae48cd93aed7 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 02:00:56 -0800 Subject: [PATCH 044/589] trying tokenizer on cpu --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 7b72b8eb0..9da7a4edb 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -60,7 +60,7 @@ async def infer_prompt( """ await self.ensure_shard(shard) - input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device) + input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to("cpu") # Continue the sequence if inference state exists past_key_values = None From c6f0cbbeb14ed10fa9cc50659dc22f7f0bd1ad2a Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 02:03:56 -0800 Subject: [PATCH 045/589] testing other models --- exo/inference/pytorch/test_inference_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index 96889919b..e2b0eaf6c 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -10,7 +10,7 @@ def setUpClass(cls): # Create a shard cls.shard = Shard( - model_id="meta-llama/Meta-Llama-3-8B", + model_id="LLMQ/LLaMA-3-8B-GPTQ-4bit-b128", start_layer=0, end_layer=0, n_layers=12 From 999759b2a315e29f5eb3c869f68388ecd610d9bc Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 02:06:46 -0800 Subject: [PATCH 046/589] testing other models --- exo/inference/pytorch/test_inference_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index e2b0eaf6c..2cdf2b153 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -10,7 +10,7 @@ def setUpClass(cls): # Create a shard cls.shard = Shard( - model_id="LLMQ/LLaMA-3-8B-GPTQ-4bit-b128", + model_id="hoang1123/llama3.1-8b-sum-trans-gguf-q4_k_m", start_layer=0, end_layer=0, n_layers=12 From cae9efb9c4f2d556a1d62c2470f095fa04f7ab7b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 02:15:56 -0800 Subject: [PATCH 047/589] testing other models --- exo/inference/pytorch/test_inference_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index 2cdf2b153..7fce78d8e 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -10,7 +10,7 @@ def setUpClass(cls): # Create a shard cls.shard = Shard( - model_id="hoang1123/llama3.1-8b-sum-trans-gguf-q4_k_m", + model_id="meta-llama/Meta-Llama-3.1-8B", start_layer=0, end_layer=0, n_layers=12 From d4387a357ed73408c83a09f61822f703ce495a03 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 02:40:31 -0800 Subject: [PATCH 048/589] taking out unittest --- .../pytorch/test_inference_engine.py | 61 ++++++++----------- 1 file changed, 26 insertions(+), 35 deletions(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index 7fce78d8e..1279ee1ed 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -1,43 +1,34 @@ -import unittest + import asyncio from exo.inference.shard import Shard from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine -class TestPyTorchDynamicShardInferenceEngine(unittest.TestCase): - - @classmethod - def setUpClass(cls): - - # Create a shard - cls.shard = Shard( - model_id="meta-llama/Meta-Llama-3.1-8B", - start_layer=0, - end_layer=0, - n_layers=12 - ) - - # Initialize the inference engine - cls.engine = PyTorchDynamicShardInferenceEngine( - cls.shard.model_id, - debug=True - ) - - def test_infer_prompt(self): - # Prepare the prompt - prompt = "Why is the sky blue?" - - # Run inference - loop = asyncio.get_event_loop() - output_data, new_inference_state, is_eos = loop.run_until_complete( - self.engine.infer_prompt( - request_id="test_request", shard=self.shard, prompt=prompt - ) +def main(): + shard = Shard( + model_id="meta-llama/Meta-Llama-3.1-8B", + start_layer=0, + end_layer=0, + n_layers=12 + ) + + engine = PyTorchDynamicShardInferenceEngine( + shard.model_id, + debug=True + ) + + + # Prepare the prompt + prompt = "Why is the sky blue?" + + # Run inference + loop = asyncio.get_event_loop() + output_data, new_inference_state, is_eos = loop.run_until_complete( + engine.infer_prompt( + request_id="test_request", shard=shard, prompt=prompt ) + ) - # Assertions - self.assertIsNotNone(output_data) - self.assertIsNotNone(new_inference_state) - self.assertFalse(is_eos) + assert output_data is not None if __name__ == '__main__': - unittest.main() + main() From f5f5b6f0429ef7603341573a02c9cb8cfaf8bb96 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 02:41:52 -0800 Subject: [PATCH 049/589] getting to base model for output --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 9da7a4edb..db88f7926 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -67,7 +67,7 @@ async def infer_prompt( if inference_state: past_key_values = self._load_kv_cache(json.loads(inference_state).get("past_key_values")) - output, past_key_values = self.model.full_model(input_ids, past_key_values=past_key_values) + output, past_key_values = self.model.full_model.model(input_ids, past_key_values=past_key_values) if self.shard.is_last_layer(): logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) From 2a8fc820d80d2161c8160e450fdef0eee01863a2 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 02:43:49 -0800 Subject: [PATCH 050/589] working on output issue --- exo/inference/pytorch/inference.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index db88f7926..5843a7853 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -69,6 +69,10 @@ async def infer_prompt( output, past_key_values = self.model.full_model.model(input_ids, past_key_values=past_key_values) + if self.debug: + self.log.info( + f"\nInfer Prompt Debug - Request ID: {request_id}\nOutput: {output_data}\nEOS: {self.shard.is_last_layer()}") + if self.shard.is_last_layer(): logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) next_token = torch.argmax(logits[:, -1, :], dim=-1) @@ -80,9 +84,7 @@ async def infer_prompt( new_inference_state = json.dumps({"past_key_values": self._save_kv_cache(past_key_values)}) - if self.debug: - self.log.info( - f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") + return output_data, new_inference_state, is_eos From 9fd5f90274eeb22a7b10e4a03f85906db1e9bf93 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 02:45:07 -0800 Subject: [PATCH 051/589] working on output issue --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 5843a7853..78ebd5c19 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -71,7 +71,7 @@ async def infer_prompt( if self.debug: self.log.info( - f"\nInfer Prompt Debug - Request ID: {request_id}\nOutput: {output_data}\nEOS: {self.shard.is_last_layer()}") + f"\nInfer Prompt Debug - Request ID: {request_id}\nOutput: {output}\nEOS: {self.shard.is_last_layer()}") if self.shard.is_last_layer(): logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) From de3483db65f45adfbeb93bd0c3bf1806fdfaf0e8 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 02:47:36 -0800 Subject: [PATCH 052/589] working on output issue --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 78ebd5c19..01ab9de16 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -67,7 +67,7 @@ async def infer_prompt( if inference_state: past_key_values = self._load_kv_cache(json.loads(inference_state).get("past_key_values")) - output, past_key_values = self.model.full_model.model(input_ids, past_key_values=past_key_values) + output, past_key_values = self.model.forward_layers(input_ids, past_key_values=past_key_values) if self.debug: self.log.info( From 7d1598dc2556898c66db277a6e9f5d6a2a681627 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 02:49:19 -0800 Subject: [PATCH 053/589] working on output issue --- exo/inference/pytorch/inference.py | 5 ++++- exo/inference/pytorch/model/hf.py | 11 ++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 01ab9de16..d4e4a3400 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -67,7 +67,10 @@ async def infer_prompt( if inference_state: past_key_values = self._load_kv_cache(json.loads(inference_state).get("past_key_values")) - output, past_key_values = self.model.forward_layers(input_ids, past_key_values=past_key_values) + output, past_key_values = self.model.forward_layers( + input_ids, + past_key_values=past_key_values + ) if self.debug: self.log.info( diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 3eeb053a5..0f9c85fe3 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -3,7 +3,7 @@ import torch import torch.nn as nn -from transformers import AutoModelForCausalLM +from transformers import AutoModelForCausalLM, Cache from exo.inference.shard import Shard import logging @@ -43,7 +43,7 @@ def forward_layers(self, input_ids, past_key_values=None): tuple: Hidden states and new past key values. """ if past_key_values is None: - past_key_values = [None] * len(self.layers) + past_key_values = Cache() # Token embeddings hidden_states = self.embed_tokens(input_ids) @@ -52,7 +52,12 @@ def forward_layers(self, input_ids, past_key_values=None): new_past_key_values = [] for i, layer in enumerate(self.layers): layer_past = past_key_values[i] - hidden_states, new_layer_past = layer(hidden_states, past_key_values=layer_past, use_cache=True) + hidden_states, new_layer_past = layer( + hidden_states, + past_key_values=layer_past, + use_cache=True + ) + new_past_key_values.append(new_layer_past) if self.shard.is_last_layer(): From 6068949f289070f6a6234f14003a81405a219d22 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 03:15:01 -0800 Subject: [PATCH 054/589] updated kv caching to dynamic --- exo/inference/pytorch/inference.py | 48 ++++++++++++++++-------------- exo/inference/pytorch/model/hf.py | 18 ++++------- 2 files changed, 32 insertions(+), 34 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index d4e4a3400..3cff77382 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -6,7 +6,7 @@ import torch.nn as nn import numpy as np from typing import Optional, Callable, Tuple -from transformers import AutoTokenizer, Cache +from transformers import AutoTokenizer, DynamicCache from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel @@ -60,21 +60,14 @@ async def infer_prompt( """ await self.ensure_shard(shard) - input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to("cpu") + input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.model.device) # Continue the sequence if inference state exists past_key_values = None if inference_state: past_key_values = self._load_kv_cache(json.loads(inference_state).get("past_key_values")) - output, past_key_values = self.model.forward_layers( - input_ids, - past_key_values=past_key_values - ) - - if self.debug: - self.log.info( - f"\nInfer Prompt Debug - Request ID: {request_id}\nOutput: {output}\nEOS: {self.shard.is_last_layer()}") + output, past_key_values = self.model.forward_layers(input_ids, past_key_values=past_key_values) if self.shard.is_last_layer(): logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) @@ -85,9 +78,10 @@ async def infer_prompt( output_data = output.cpu().numpy() is_eos = False - new_inference_state = json.dumps({"past_key_values": self._save_kv_cache(past_key_values)}) + new_inference_state = json.dumps({"past_key_values": past_key_values.to_legacy_cache()}) - + if self.debug: + self.log.info(f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") return output_data, new_inference_state, is_eos @@ -111,14 +105,14 @@ async def infer_tensor( """ await self.ensure_shard(shard) - input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.device) + input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.model.device) # Continue the sequence if inference state exists past_key_values = None if inference_state: past_key_values = self._load_kv_cache(json.loads(inference_state).get("past_key_values")) - output, past_key_values = self.model(input_tensor, past_key_values=past_key_values) + output, past_key_values = self.model.forward_layers(input_tensor, past_key_values=past_key_values) if self.shard.is_last_layer(): logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) @@ -129,7 +123,7 @@ async def infer_tensor( output_data = output.cpu().numpy() is_eos = False - new_inference_state = json.dumps({"past_key_values": self._save_kv_cache(past_key_values)}) + new_inference_state = json.dumps({"past_key_values": past_key_values.to_legacy_cache()}) if self.debug: self.log.info(f"Infer Tensor Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") @@ -162,13 +156,17 @@ def _load_kv_cache(self, past_key_values_list): past_key_values_list (list): List of past key-value tensors. Returns: - Cache: Loaded past key-value cache. + DynamicCache: Loaded past key-value cache. """ if past_key_values_list is None: - return Cache() - cache = Cache() - for kv in past_key_values_list: - cache.append(torch.tensor(kv, device=self.device)) + return DynamicCache() + + cache = DynamicCache() + for layer_idx, (key_states, value_states) in enumerate(past_key_values_list): + key_states_tensor = torch.tensor(key_states, device=self.device) + value_states_tensor = torch.tensor(value_states, device=self.device) + cache.update(key_states_tensor, value_states_tensor, layer_idx) + return cache def _save_kv_cache(self, past_key_values): @@ -176,12 +174,18 @@ def _save_kv_cache(self, past_key_values): Save key-value cache to the inference state. Args: - past_key_values (list): List of past key-value tensors. + past_key_values (DynamicCache): Cache object containing past key-value tensors. Returns: list: List of key-value tensors in a format suitable for saving. """ - return [kv.cpu().tolist() for kv in past_key_values] + past_key_values_list = [] + for layer_idx in range(len(past_key_values)): + key_states, value_states = past_key_values[layer_idx] + past_key_values_list.append((key_states.cpu().tolist(), value_states.cpu().tolist())) + + return past_key_values_list + async def ensure_shard(self, shard: Optional[Shard]): """ diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 0f9c85fe3..9e4543605 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -1,18 +1,13 @@ -# Work in progress on a generic hugging face model sharder -# right now doesn't work with all models - import torch import torch.nn as nn -from transformers import AutoModelForCausalLM, Cache +from transformers import AutoModelForCausalLM, DynamicCache from exo.inference.shard import Shard -import logging class ShardedHuggingFaceModel(nn.Module): def __init__(self, model_name: str, shard: Shard): super(ShardedHuggingFaceModel, self).__init__() self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.shard = shard - self.device_ids = list(range(torch.cuda.device_count())) # Load the model self.full_model = AutoModelForCausalLM.from_pretrained( @@ -37,19 +32,19 @@ def forward_layers(self, input_ids, past_key_values=None): Args: input_ids (torch.Tensor): Input token IDs. - past_key_values (list, optional): Past key values for caching. + past_key_values (DynamicCache, optional): Past key values for caching. Returns: tuple: Hidden states and new past key values. """ if past_key_values is None: - past_key_values = Cache() + past_key_values = DynamicCache() # Token embeddings hidden_states = self.embed_tokens(input_ids) # Apply each layer in this shard - new_past_key_values = [] + new_past_key_values = DynamicCache() for i, layer in enumerate(self.layers): layer_past = past_key_values[i] hidden_states, new_layer_past = layer( @@ -57,12 +52,11 @@ def forward_layers(self, input_ids, past_key_values=None): past_key_values=layer_past, use_cache=True ) - - new_past_key_values.append(new_layer_past) + new_past_key_values.update(new_layer_past[0], new_layer_past[1], i) if self.shard.is_last_layer(): hidden_states = self.norm(hidden_states) logits = self.lm_head(hidden_states) return logits, new_past_key_values else: - return hidden_states, new_past_key_values \ No newline at end of file + return hidden_states, new_past_key_values From a9d8d646896b9e079f1a402b032e0da4ea7278c4 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 03:19:11 -0800 Subject: [PATCH 055/589] updated kv caching to dynamic --- exo/inference/pytorch/inference.py | 18 +++++++++--------- exo/inference/pytorch/model/hf.py | 7 ++++++- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 3cff77382..a665a0c6b 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -158,14 +158,12 @@ def _load_kv_cache(self, past_key_values_list): Returns: DynamicCache: Loaded past key-value cache. """ - if past_key_values_list is None: - return DynamicCache() - cache = DynamicCache() - for layer_idx, (key_states, value_states) in enumerate(past_key_values_list): - key_states_tensor = torch.tensor(key_states, device=self.device) - value_states_tensor = torch.tensor(value_states, device=self.device) - cache.update(key_states_tensor, value_states_tensor, layer_idx) + if past_key_values_list is not None: + for layer_idx, (key_states, value_states) in enumerate(past_key_values_list): + key_states_tensor = torch.tensor(key_states, device=self.device) + value_states_tensor = torch.tensor(value_states, device=self.device) + cache.update(key_states_tensor, value_states_tensor, layer_idx) return cache @@ -182,11 +180,13 @@ def _save_kv_cache(self, past_key_values): past_key_values_list = [] for layer_idx in range(len(past_key_values)): key_states, value_states = past_key_values[layer_idx] - past_key_values_list.append((key_states.cpu().tolist(), value_states.cpu().tolist())) + past_key_values_list.append(( + key_states.cpu().tolist(), + value_states.cpu().tolist() + )) return past_key_values_list - async def ensure_shard(self, shard: Optional[Shard]): """ Ensure the model shard is loaded and ready for inference. diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 9e4543605..87824aac1 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -46,12 +46,17 @@ def forward_layers(self, input_ids, past_key_values=None): # Apply each layer in this shard new_past_key_values = DynamicCache() for i, layer in enumerate(self.layers): - layer_past = past_key_values[i] + if i < len(past_key_values): + layer_past = past_key_values[i] + else: + layer_past = None + hidden_states, new_layer_past = layer( hidden_states, past_key_values=layer_past, use_cache=True ) + new_past_key_values.update(new_layer_past[0], new_layer_past[1], i) if self.shard.is_last_layer(): From 6dca880b9166de3b4971dd9cc0549580ea2b61bb Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 03:27:38 -0800 Subject: [PATCH 056/589] fixing position id error --- exo/inference/pytorch/model/hf.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 87824aac1..63df3cdb0 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -40,8 +40,11 @@ def forward_layers(self, input_ids, past_key_values=None): if past_key_values is None: past_key_values = DynamicCache() - # Token embeddings - hidden_states = self.embed_tokens(input_ids) + # Token and position embeddings + position_ids = torch.arange(0, input_ids.size(1), dtype=torch.long, device=input_ids.device) + position_ids = position_ids.unsqueeze(0).expand_as(input_ids) + inputs_embeds = self.embed_tokens(input_ids) + hidden_states = inputs_embeds + self.full_model.model.embed_positions(position_ids) # Apply each layer in this shard new_past_key_values = DynamicCache() @@ -50,13 +53,14 @@ def forward_layers(self, input_ids, past_key_values=None): layer_past = past_key_values[i] else: layer_past = None - + hidden_states, new_layer_past = layer( - hidden_states, - past_key_values=layer_past, - use_cache=True + hidden_states, + past_key_values=layer_past, + use_cache=True, + position_ids=position_ids ) - + new_past_key_values.update(new_layer_past[0], new_layer_past[1], i) if self.shard.is_last_layer(): @@ -64,4 +68,4 @@ def forward_layers(self, input_ids, past_key_values=None): logits = self.lm_head(hidden_states) return logits, new_past_key_values else: - return hidden_states, new_past_key_values + return hidden_states, new_past_key_values \ No newline at end of file From c7df760206dd051093b1e8738a3bcfc9acbde193 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 03:30:06 -0800 Subject: [PATCH 057/589] fixing position id error --- exo/inference/pytorch/model/hf.py | 32 +++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 63df3cdb0..609f963d1 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -2,12 +2,14 @@ import torch.nn as nn from transformers import AutoModelForCausalLM, DynamicCache from exo.inference.shard import Shard +import logging class ShardedHuggingFaceModel(nn.Module): def __init__(self, model_name: str, shard: Shard): super(ShardedHuggingFaceModel, self).__init__() self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.shard = shard + self.device_ids = list(range(torch.cuda.device_count())) # Load the model self.full_model = AutoModelForCausalLM.from_pretrained( @@ -40,32 +42,30 @@ def forward_layers(self, input_ids, past_key_values=None): if past_key_values is None: past_key_values = DynamicCache() - # Token and position embeddings - position_ids = torch.arange(0, input_ids.size(1), dtype=torch.long, device=input_ids.device) - position_ids = position_ids.unsqueeze(0).expand_as(input_ids) + # Token embeddings inputs_embeds = self.embed_tokens(input_ids) + + # Generate position ids if not given + position_ids = torch.arange(0, input_ids.shape[-1], dtype=torch.long, device=input_ids.device) + position_ids = position_ids.unsqueeze(0).expand_as(input_ids) + + # Apply positional embeddings hidden_states = inputs_embeds + self.full_model.model.embed_positions(position_ids) # Apply each layer in this shard - new_past_key_values = DynamicCache() + new_past_key_values = [] for i, layer in enumerate(self.layers): - if i < len(past_key_values): - layer_past = past_key_values[i] - else: - layer_past = None - + layer_past = past_key_values[i] if i < len(past_key_values) else None hidden_states, new_layer_past = layer( - hidden_states, - past_key_values=layer_past, - use_cache=True, - position_ids=position_ids + hidden_states, + past_key_values=layer_past, + use_cache=True ) - - new_past_key_values.update(new_layer_past[0], new_layer_past[1], i) + new_past_key_values.append(new_layer_past) if self.shard.is_last_layer(): hidden_states = self.norm(hidden_states) logits = self.lm_head(hidden_states) return logits, new_past_key_values else: - return hidden_states, new_past_key_values \ No newline at end of file + return hidden_states, new_past_key_values From 29470a1af28aea2b082eb88881b0d402a6807372 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 03:37:02 -0800 Subject: [PATCH 058/589] fixing position id error --- exo/inference/pytorch/model/hf.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 609f963d1..fd21f028f 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -2,7 +2,6 @@ import torch.nn as nn from transformers import AutoModelForCausalLM, DynamicCache from exo.inference.shard import Shard -import logging class ShardedHuggingFaceModel(nn.Module): def __init__(self, model_name: str, shard: Shard): @@ -45,21 +44,20 @@ def forward_layers(self, input_ids, past_key_values=None): # Token embeddings inputs_embeds = self.embed_tokens(input_ids) - # Generate position ids if not given + # Generate position ids position_ids = torch.arange(0, input_ids.shape[-1], dtype=torch.long, device=input_ids.device) position_ids = position_ids.unsqueeze(0).expand_as(input_ids) - # Apply positional embeddings - hidden_states = inputs_embeds + self.full_model.model.embed_positions(position_ids) - # Apply each layer in this shard + hidden_states = inputs_embeds new_past_key_values = [] for i, layer in enumerate(self.layers): layer_past = past_key_values[i] if i < len(past_key_values) else None hidden_states, new_layer_past = layer( - hidden_states, - past_key_values=layer_past, - use_cache=True + hidden_states, + past_key_values=layer_past, + use_cache=True, + position_ids=position_ids ) new_past_key_values.append(new_layer_past) From 2b9330ed3c4293fa04d449d2c9ba931babc1a228 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 03:38:08 -0800 Subject: [PATCH 059/589] fixing tensor call --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index a665a0c6b..36f1ab4c3 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -120,7 +120,7 @@ async def infer_tensor( output_data = np.array([next_token.item()]) is_eos = next_token.item() == self.tokenizer.eos_token_id else: - output_data = output.cpu().numpy() + output_data = output.detach().numpy() is_eos = False new_inference_state = json.dumps({"past_key_values": past_key_values.to_legacy_cache()}) From a61be5c23a84ad64299414d79db05ff0588c9116 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 03:39:06 -0800 Subject: [PATCH 060/589] fixing tensor call --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 36f1ab4c3..3644113af 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -75,7 +75,7 @@ async def infer_prompt( output_data = np.array([next_token.item()]) is_eos = next_token.item() == self.tokenizer.eos_token_id else: - output_data = output.cpu().numpy() + output_data = output.detach().numpy() is_eos = False new_inference_state = json.dumps({"past_key_values": past_key_values.to_legacy_cache()}) From 8f93296b4b22d3accb816ae197a5b8a2482766c7 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 03:41:19 -0800 Subject: [PATCH 061/589] fixing tensor call --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 3644113af..c2b10e216 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -75,7 +75,7 @@ async def infer_prompt( output_data = np.array([next_token.item()]) is_eos = next_token.item() == self.tokenizer.eos_token_id else: - output_data = output.detach().numpy() + output_data = output.cpu().detach().numpy() is_eos = False new_inference_state = json.dumps({"past_key_values": past_key_values.to_legacy_cache()}) @@ -120,7 +120,7 @@ async def infer_tensor( output_data = np.array([next_token.item()]) is_eos = next_token.item() == self.tokenizer.eos_token_id else: - output_data = output.detach().numpy() + output_data = output.cpu().detach().numpy() is_eos = False new_inference_state = json.dumps({"past_key_values": past_key_values.to_legacy_cache()}) From b0287b6673ddbe3d57e633ae5b680086b8abc233 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 04:06:38 -0800 Subject: [PATCH 062/589] fixing cache issue --- exo/inference/pytorch/inference.py | 12 +++++------- exo/inference/pytorch/model/hf.py | 4 ++-- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index c2b10e216..6f1ec9a75 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -148,7 +148,7 @@ def _apply_generation_settings(self, logits, temperature, top_k): logits = logits.scatter(1, top_k_indices, top_k_values) return logits - def _load_kv_cache(self, past_key_values_list): + def _load_kv_cache(self, past_key_values_list) -> DynamicCache: """ Load key-value cache from the inference state. @@ -158,13 +158,11 @@ def _load_kv_cache(self, past_key_values_list): Returns: DynamicCache: Loaded past key-value cache. """ + if past_key_values_list is None: + return DynamicCache() cache = DynamicCache() - if past_key_values_list is not None: - for layer_idx, (key_states, value_states) in enumerate(past_key_values_list): - key_states_tensor = torch.tensor(key_states, device=self.device) - value_states_tensor = torch.tensor(value_states, device=self.device) - cache.update(key_states_tensor, value_states_tensor, layer_idx) - + for layer_idx, (key, value) in enumerate(past_key_values_list): + cache.update(torch.tensor(key, device=self.device), torch.tensor(value, device=self.device), layer_idx) return cache def _save_kv_cache(self, past_key_values): diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index fd21f028f..8f1d79f48 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -50,7 +50,7 @@ def forward_layers(self, input_ids, past_key_values=None): # Apply each layer in this shard hidden_states = inputs_embeds - new_past_key_values = [] + new_past_key_values = DynamicCache() for i, layer in enumerate(self.layers): layer_past = past_key_values[i] if i < len(past_key_values) else None hidden_states, new_layer_past = layer( @@ -59,7 +59,7 @@ def forward_layers(self, input_ids, past_key_values=None): use_cache=True, position_ids=position_ids ) - new_past_key_values.append(new_layer_past) + new_past_key_values.update(new_layer_past[0], new_layer_past[1], i) if self.shard.is_last_layer(): hidden_states = self.norm(hidden_states) From d19ac65d99256541db5031ecc0d13e744516279c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 04:09:43 -0800 Subject: [PATCH 063/589] fixing cache issue --- exo/inference/pytorch/inference.py | 24 +++++++++++++----------- exo/inference/pytorch/model/hf.py | 3 ++- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 6f1ec9a75..b3a12e3e9 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -85,19 +85,19 @@ async def infer_prompt( return output_data, new_inference_state, is_eos - async def infer_tensor( - self, - request_id: str, - shard: Optional[Shard], - input_data: np.ndarray, - inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + async def infer_prompt( + self, + request_id: str, + shard: Optional[Shard], + prompt: str, + inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: """ - Perform inference based on an input tensor. + Perform inference based on a text prompt. Args: request_id (str): Unique identifier for the request. shard (Optional[Shard]): Shard information for the model. - input_data (np.ndarray): The input tensor for inference. + prompt (str): The input text prompt for inference. inference_state (Optional[str]): The previous inference state. Returns: @@ -105,14 +105,14 @@ async def infer_tensor( """ await self.ensure_shard(shard) - input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.model.device) + input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.model.device) # Continue the sequence if inference state exists past_key_values = None if inference_state: past_key_values = self._load_kv_cache(json.loads(inference_state).get("past_key_values")) - output, past_key_values = self.model.forward_layers(input_tensor, past_key_values=past_key_values) + output, past_key_values = self.model.forward_layers(input_ids, past_key_values=past_key_values) if self.shard.is_last_layer(): logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) @@ -126,7 +126,8 @@ async def infer_tensor( new_inference_state = json.dumps({"past_key_values": past_key_values.to_legacy_cache()}) if self.debug: - self.log.info(f"Infer Tensor Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") + self.log.info( + f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") return output_data, new_inference_state, is_eos @@ -160,6 +161,7 @@ def _load_kv_cache(self, past_key_values_list) -> DynamicCache: """ if past_key_values_list is None: return DynamicCache() + cache = DynamicCache() for layer_idx, (key, value) in enumerate(past_key_values_list): cache.update(torch.tensor(key, device=self.device), torch.tensor(value, device=self.device), layer_idx) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 8f1d79f48..efb983093 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -59,7 +59,8 @@ def forward_layers(self, input_ids, past_key_values=None): use_cache=True, position_ids=position_ids ) - new_past_key_values.update(new_layer_past[0], new_layer_past[1], i) + if new_layer_past is not None: + new_past_key_values.update(new_layer_past[0], new_layer_past[1], i) if self.shard.is_last_layer(): hidden_states = self.norm(hidden_states) From f19498efb69c141214d6b4bb7a3993b805213227 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 04:11:28 -0800 Subject: [PATCH 064/589] fixing cache issue --- exo/inference/pytorch/inference.py | 36 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index b3a12e3e9..cb359f6a0 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -41,11 +41,11 @@ def __init__(self, model_name: str, debug: bool = True): self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") async def infer_prompt( - self, - request_id: str, - shard: Optional[Shard], - prompt: str, - inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + self, + request_id: str, + shard: Optional[Shard], + prompt: str, + inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: """ Perform inference based on a text prompt. @@ -81,23 +81,24 @@ async def infer_prompt( new_inference_state = json.dumps({"past_key_values": past_key_values.to_legacy_cache()}) if self.debug: - self.log.info(f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") + self.log.info( + f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") return output_data, new_inference_state, is_eos - async def infer_prompt( - self, - request_id: str, - shard: Optional[Shard], - prompt: str, - inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + async def infer_tensor( + self, + request_id: str, + shard: Optional[Shard], + input_data: np.ndarray, + inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: """ - Perform inference based on a text prompt. + Perform inference based on an input tensor. Args: request_id (str): Unique identifier for the request. shard (Optional[Shard]): Shard information for the model. - prompt (str): The input text prompt for inference. + input_data (np.ndarray): The input tensor for inference. inference_state (Optional[str]): The previous inference state. Returns: @@ -105,14 +106,14 @@ async def infer_prompt( """ await self.ensure_shard(shard) - input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.model.device) + input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.model.device) # Continue the sequence if inference state exists past_key_values = None if inference_state: past_key_values = self._load_kv_cache(json.loads(inference_state).get("past_key_values")) - output, past_key_values = self.model.forward_layers(input_ids, past_key_values=past_key_values) + output, past_key_values = self.model.forward_layers(input_tensor, past_key_values=past_key_values) if self.shard.is_last_layer(): logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) @@ -126,8 +127,7 @@ async def infer_prompt( new_inference_state = json.dumps({"past_key_values": past_key_values.to_legacy_cache()}) if self.debug: - self.log.info( - f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") + self.log.info(f"Infer Tensor Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") return output_data, new_inference_state, is_eos From 142649cb519a3bbd250142a187e9e760b4bd6547 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 04:12:58 -0800 Subject: [PATCH 065/589] cleaning logging --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index cb359f6a0..62fdc3778 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -82,7 +82,7 @@ async def infer_prompt( if self.debug: self.log.info( - f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") + f"\nInfer Prompt Debug - Request ID: {request_id}\nOutput: {output_data}\nEOS: {is_eos}") return output_data, new_inference_state, is_eos From 752ebb4bf06157446e46d00dbdd2eec24d090a57 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 13:57:39 -0800 Subject: [PATCH 066/589] updating device capabilities for other NVIDIA cards in current env --- exo/topology/device_capabilities.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/exo/topology/device_capabilities.py b/exo/topology/device_capabilities.py index 02e26d72e..f64066efa 100644 --- a/exo/topology/device_capabilities.py +++ b/exo/topology/device_capabilities.py @@ -75,6 +75,7 @@ def to_dict(self): # RTX 30 series "NVIDIA GEFORCE RTX 3050": DeviceFlops(fp32=9.11 * TFLOPS, fp16=18.22 * TFLOPS, int8=36.44 * TFLOPS), "NVIDIA GEFORCE RTX 3060": DeviceFlops(fp32=13.0 * TFLOPS, fp16=26.0 * TFLOPS, int8=52.0 * TFLOPS), + "NVIDIA GEFORCE RTX 3060 LAPTOP GPU": DeviceFlops(fp32=12.7 * TFLOPS, fp16=25.4 * TFLOPS, int8=50.8 * TFLOPS), "NVIDIA GEFORCE RTX 3060 TI": DeviceFlops(fp32=16.2 * TFLOPS, fp16=32.4 * TFLOPS, int8=64.8 * TFLOPS), "NVIDIA GEFORCE RTX 3070": DeviceFlops(fp32=20.3 * TFLOPS, fp16=40.6 * TFLOPS, int8=81.2 * TFLOPS), "NVIDIA GEFORCE RTX 3070 TI": DeviceFlops(fp32=21.8 * TFLOPS, fp16=43.6 * TFLOPS, int8=87.2 * TFLOPS), @@ -91,6 +92,7 @@ def to_dict(self): "NVIDIA A800 80GB PCIE": DeviceFlops(fp32=19.5 * TFLOPS, fp16=312.0 * TFLOPS, int8=624.0 * TFLOPS), "NVIDIA A100 80GB SXM": DeviceFlops(fp32=19.5 * TFLOPS, fp16=312.0 * TFLOPS, int8=624.0 * TFLOPS), "NVIDIA A800 80GB SXM": DeviceFlops(fp32=19.5 * TFLOPS, fp16=312.0 * TFLOPS, int8=624.0 * TFLOPS), + "NVIDIA T1000 8GB": DeviceFlops(fp32=2.5 * TFLOPS, fp16=5.0 * TFLOPS, int8=10.0 * TFLOPS), # ... add more devices if needed ... ### AMD GPUs # RX 6000 series From 3916feceb91ad333946d03fd7c8489515dfd08fc Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 14:00:59 -0800 Subject: [PATCH 067/589] adding more low level devices --- exo/topology/device_capabilities.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/exo/topology/device_capabilities.py b/exo/topology/device_capabilities.py index f64066efa..519a6f160 100644 --- a/exo/topology/device_capabilities.py +++ b/exo/topology/device_capabilities.py @@ -93,6 +93,8 @@ def to_dict(self): "NVIDIA A100 80GB SXM": DeviceFlops(fp32=19.5 * TFLOPS, fp16=312.0 * TFLOPS, int8=624.0 * TFLOPS), "NVIDIA A800 80GB SXM": DeviceFlops(fp32=19.5 * TFLOPS, fp16=312.0 * TFLOPS, int8=624.0 * TFLOPS), "NVIDIA T1000 8GB": DeviceFlops(fp32=2.5 * TFLOPS, fp16=5.0 * TFLOPS, int8=10.0 * TFLOPS), + "Quadro M2000": DeviceFlops(fp32=0.5 * TFLOPS, fp16=1.0 * TFLOPS, int8=2.0 * TFLOPS), + "Quadro P400": DeviceFlops(fp32=0.641 * TFLOPS, fp16=1.282 * TFLOPS, int8=2.564 * TFLOPS), # ... add more devices if needed ... ### AMD GPUs # RX 6000 series From cd9515f3bdae971439739e1e620b171bec59a509 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 14:16:41 -0800 Subject: [PATCH 068/589] updating pytorch inference, adding pytorch to inference selection --- exo/inference/pytorch/inference.py | 9 +++------ exo/inference/pytorch/model/hf.py | 4 ++-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 62fdc3778..97ea56b33 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -24,7 +24,7 @@ class PyTorchDynamicShardInferenceEngine(InferenceEngine): PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. """ - def __init__(self, model_name: str, debug: bool = True): + def __init__(self, debug: bool = True): """ Initialize the inference engine. @@ -33,11 +33,8 @@ def __init__(self, model_name: str, debug: bool = True): """ self.shard = None self.model = None - self.model_name = model_name if model_name else "meta-llama/Meta-Llama-3-8B" self.debug = debug self.log = logging.getLogger("pytorch.inference") - self.rank = int(os.getenv("RANK", "0")) - self.world_size = int(os.getenv("WORLD_SIZE", "1")) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") async def infer_prompt( @@ -200,8 +197,8 @@ async def ensure_shard(self, shard: Optional[Shard]): # Load model and tokenizer from the downloaded files # This is written for llama model but need to add in option for others if not self.model: - self.model = ShardedHuggingFaceModel(self.model_name, shard) - self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) + self.model = ShardedHuggingFaceModel(shard) + self.tokenizer = AutoTokenizer.from_pretrained(shard.model_id) self.shard = shard diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index efb983093..b2bc1b784 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -4,7 +4,7 @@ from exo.inference.shard import Shard class ShardedHuggingFaceModel(nn.Module): - def __init__(self, model_name: str, shard: Shard): + def __init__(self, shard: Shard): super(ShardedHuggingFaceModel, self).__init__() self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.shard = shard @@ -12,7 +12,7 @@ def __init__(self, model_name: str, shard: Shard): # Load the model self.full_model = AutoModelForCausalLM.from_pretrained( - model_name, + shard.model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) From 3534cbcc2d42f9a98bdfe5d0521b2126272f0c20 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 14:17:08 -0800 Subject: [PATCH 069/589] adding pytorch engine to helpers.py --- exo/helpers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/exo/helpers.py b/exo/helpers.py index 2b4027a4a..47b4dc95c 100644 --- a/exo/helpers.py +++ b/exo/helpers.py @@ -42,6 +42,9 @@ def get_inference_engine(inference_engine_name): tinygrad.helpers.DEBUG.value = int(os.getenv("TINYGRAD_DEBUG", default="0")) return TinygradDynamicShardInferenceEngine() + elif inference_engine_name == "pytorch": + from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine + return PyTorchDynamicShardInferenceEngine() else: raise ValueError(f"Inference engine {inference_engine_name} not supported") From d0b7e99f2bd273f762b3eb58900cebe8e57af647 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 15:03:04 -0800 Subject: [PATCH 070/589] updating inference_state bug --- exo/api/chatgpt_api.py | 1 + exo/helpers.py | 3 +- exo/inference/pytorch/inference.py | 105 ++++++++++------------------- 3 files changed, 37 insertions(+), 72 deletions(-) diff --git a/exo/api/chatgpt_api.py b/exo/api/chatgpt_api.py index 87390b7f8..761aabc90 100644 --- a/exo/api/chatgpt_api.py +++ b/exo/api/chatgpt_api.py @@ -16,6 +16,7 @@ ### llama "llama-3.1-8b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32), + "PyTorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Meta-Llama-3.1-8B", start_layer=0, end_layer=0, n_layers=32), }, "llama-3.1-70b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), diff --git a/exo/helpers.py b/exo/helpers.py index 47b4dc95c..b811a0f95 100644 --- a/exo/helpers.py +++ b/exo/helpers.py @@ -43,8 +43,9 @@ def get_inference_engine(inference_engine_name): return TinygradDynamicShardInferenceEngine() elif inference_engine_name == "pytorch": + # will change from debug being true after testing from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine - return PyTorchDynamicShardInferenceEngine() + return PyTorchDynamicShardInferenceEngine(debug=os.getenv("PYTORCH_DEBUG", default=True)) else: raise ValueError(f"Inference engine {inference_engine_name} not supported") diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 97ea56b33..ed9af7a7e 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -10,10 +10,6 @@ from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel -import logging - -logging.basicConfig() -logging.getLogger("pytorch.inference").setLevel(logging.DEBUG) # Default settings TEMPERATURE = 0.7 @@ -34,15 +30,15 @@ def __init__(self, debug: bool = True): self.shard = None self.model = None self.debug = debug - self.log = logging.getLogger("pytorch.inference") self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") async def infer_prompt( - self, - request_id: str, - shard: Optional[Shard], - prompt: str, - inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + self, + request_id: str, + shard: Optional[Shard] = None, + prompt: str = "", + image_str: Optional[str] = None, + inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: """ Perform inference based on a text prompt. @@ -50,6 +46,7 @@ async def infer_prompt( request_id (str): Unique identifier for the request. shard (Optional[Shard]): Shard information for the model. prompt (str): The input text prompt for inference. + image_str (Optional[str]): Optional image string for multi-modal models. inference_state (Optional[str]): The previous inference state. Returns: @@ -57,37 +54,27 @@ async def infer_prompt( """ await self.ensure_shard(shard) - input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.model.device) - - # Continue the sequence if inference state exists - past_key_values = None - if inference_state: - past_key_values = self._load_kv_cache(json.loads(inference_state).get("past_key_values")) - - output, past_key_values = self.model.forward_layers(input_ids, past_key_values=past_key_values) + toks = self.tokenizer.encode(prompt) + start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 - if self.shard.is_last_layer(): - logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) - next_token = torch.argmax(logits[:, -1, :], dim=-1) - output_data = np.array([next_token.item()]) - is_eos = next_token.item() == self.tokenizer.eos_token_id - else: - output_data = output.cpu().detach().numpy() - is_eos = False + start_pos = self.model.prefill(self.model, toks[:-1], start_pos=start_pos) + last_tok = toks[-1] - new_inference_state = json.dumps({"past_key_values": past_key_values.to_legacy_cache()}) + output_data = np.array([self.model.forward_layers(torch.tensor([[last_tok]], device=self.model.device), start_pos=start_pos, temperature=TEMPERATURE, top_k=TOP_K).tolist()]) + if output_data.size == 1: + start_pos += 1 - if self.debug: - self.log.info( - f"\nInfer Prompt Debug - Request ID: {request_id}\nOutput: {output_data}\nEOS: {is_eos}") - - return output_data, new_inference_state, is_eos + return ( + output_data, + json.dumps({"start_pos": start_pos}), + output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], + ) async def infer_tensor( self, request_id: str, - shard: Optional[Shard], - input_data: np.ndarray, + shard: Optional[Shard] = None, + input_data: np.ndarray = None, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: """ Perform inference based on an input tensor. @@ -103,30 +90,17 @@ async def infer_tensor( """ await self.ensure_shard(shard) - input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.model.device) - - # Continue the sequence if inference state exists - past_key_values = None - if inference_state: - past_key_values = self._load_kv_cache(json.loads(inference_state).get("past_key_values")) - - output, past_key_values = self.model.forward_layers(input_tensor, past_key_values=past_key_values) - - if self.shard.is_last_layer(): - logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) - next_token = torch.argmax(logits[:, -1, :], dim=-1) - output_data = np.array([next_token.item()]) - is_eos = next_token.item() == self.tokenizer.eos_token_id - else: - output_data = output.cpu().detach().numpy() - is_eos = False + start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 - new_inference_state = json.dumps({"past_key_values": past_key_values.to_legacy_cache()}) + output_data = np.array([self.model.forward_layers(torch.tensor([input_data], device=self.model.device), start_pos=start_pos, temperature=TEMPERATURE, top_k=TOP_K).tolist()]) + if output_data.size == 1: + start_pos += 1 - if self.debug: - self.log.info(f"Infer Tensor Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") - - return output_data, new_inference_state, is_eos + return ( + output_data, + json.dumps({"start_pos": start_pos}), + output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], + ) def _apply_generation_settings(self, logits, temperature, top_k): """ @@ -146,7 +120,7 @@ def _apply_generation_settings(self, logits, temperature, top_k): logits = logits.scatter(1, top_k_indices, top_k_values) return logits - def _load_kv_cache(self, past_key_values_list) -> DynamicCache: + def _load_kv_cache(self, past_key_values_list): """ Load key-value cache from the inference state. @@ -158,10 +132,7 @@ def _load_kv_cache(self, past_key_values_list) -> DynamicCache: """ if past_key_values_list is None: return DynamicCache() - - cache = DynamicCache() - for layer_idx, (key, value) in enumerate(past_key_values_list): - cache.update(torch.tensor(key, device=self.device), torch.tensor(value, device=self.device), layer_idx) + cache = DynamicCache.from_legacy_cache(past_key_values_list) return cache def _save_kv_cache(self, past_key_values): @@ -169,20 +140,12 @@ def _save_kv_cache(self, past_key_values): Save key-value cache to the inference state. Args: - past_key_values (DynamicCache): Cache object containing past key-value tensors. + past_key_values (DynamicCache): Past key-value cache. Returns: list: List of key-value tensors in a format suitable for saving. """ - past_key_values_list = [] - for layer_idx in range(len(past_key_values)): - key_states, value_states = past_key_values[layer_idx] - past_key_values_list.append(( - key_states.cpu().tolist(), - value_states.cpu().tolist() - )) - - return past_key_values_list + return past_key_values.to_legacy_cache() async def ensure_shard(self, shard: Optional[Shard]): """ From 80ad4d70e6977d2c369b7a8bf70abc185984f50f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 15:07:18 -0800 Subject: [PATCH 071/589] adding prefill --- exo/inference/pytorch/inference.py | 16 ++++++++------ exo/inference/pytorch/model/hf.py | 34 +++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index ed9af7a7e..5d32b8ff0 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -57,16 +57,18 @@ async def infer_prompt( toks = self.tokenizer.encode(prompt) start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 - start_pos = self.model.prefill(self.model, toks[:-1], start_pos=start_pos) - last_tok = toks[-1] + hidden_states, past_key_values = self.model.prefill(self.model, torch.tensor(toks[:-1], device=self.model.device), start_pos=start_pos) + last_tok = torch.tensor([toks[-1]], device=self.model.device).unsqueeze(0) + + output_data, past_key_values = self.model.forward_layers(last_tok, past_key_values=past_key_values) + output_data = output_data.detach().cpu().numpy() - output_data = np.array([self.model.forward_layers(torch.tensor([[last_tok]], device=self.model.device), start_pos=start_pos, temperature=TEMPERATURE, top_k=TOP_K).tolist()]) if output_data.size == 1: start_pos += 1 return ( output_data, - json.dumps({"start_pos": start_pos}), + json.dumps({"start_pos": start_pos, "past_key_values": past_key_values.to_legacy_cache()}), output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], ) @@ -92,13 +94,15 @@ async def infer_tensor( start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 - output_data = np.array([self.model.forward_layers(torch.tensor([input_data], device=self.model.device), start_pos=start_pos, temperature=TEMPERATURE, top_k=TOP_K).tolist()]) + output_data, past_key_values = self.model.forward_layers(torch.tensor([input_data], device=self.model.device), start_pos=start_pos) + output_data = output_data.detach().cpu().numpy() + if output_data.size == 1: start_pos += 1 return ( output_data, - json.dumps({"start_pos": start_pos}), + json.dumps({"start_pos": start_pos, "past_key_values": past_key_values.to_legacy_cache()}), output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], ) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index b2bc1b784..edae36f72 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -4,15 +4,14 @@ from exo.inference.shard import Shard class ShardedHuggingFaceModel(nn.Module): - def __init__(self, shard: Shard): + def __init__(self, model_name: str, shard: Shard): super(ShardedHuggingFaceModel, self).__init__() self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.shard = shard - self.device_ids = list(range(torch.cuda.device_count())) # Load the model self.full_model = AutoModelForCausalLM.from_pretrained( - shard.model_id, + model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) @@ -27,6 +26,32 @@ def __init__(self, shard: Shard): self.norm = self.full_model.model.norm self.lm_head = self.full_model.lm_head + def prefill(self, model, tokens, start_pos=0): + """ + Process the initial input tokens and set up the initial hidden states and key-value caches. + """ + # Token embeddings + inputs_embeds = self.embed_tokens(tokens) + + # Generate position ids + position_ids = torch.arange(start_pos, start_pos + tokens.shape[-1], dtype=torch.long, device=tokens.device) + position_ids = position_ids.unsqueeze(0).expand_as(tokens) + + # Apply each layer in this shard + hidden_states = inputs_embeds + past_key_values = [] + for i, layer in enumerate(self.layers): + layer_past = None + hidden_states, new_layer_past = layer( + hidden_states, + past_key_values=layer_past, + use_cache=True, + position_ids=position_ids + ) + past_key_values.append(new_layer_past) + + return hidden_states, past_key_values + def forward_layers(self, input_ids, past_key_values=None): """ Forward pass through the specified layers. @@ -59,8 +84,7 @@ def forward_layers(self, input_ids, past_key_values=None): use_cache=True, position_ids=position_ids ) - if new_layer_past is not None: - new_past_key_values.update(new_layer_past[0], new_layer_past[1], i) + new_past_key_values.update(new_layer_past[0], new_layer_past[1], i) if self.shard.is_last_layer(): hidden_states = self.norm(hidden_states) From 2975929d35c963b0e06c815d6b18b4625fa65eef Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 15:10:44 -0800 Subject: [PATCH 072/589] fixing hugging face sharded class --- exo/inference/pytorch/model/hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index edae36f72..3983e00fe 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -4,14 +4,14 @@ from exo.inference.shard import Shard class ShardedHuggingFaceModel(nn.Module): - def __init__(self, model_name: str, shard: Shard): + def __init__(self, shard: Shard): super(ShardedHuggingFaceModel, self).__init__() self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.shard = shard # Load the model self.full_model = AutoModelForCausalLM.from_pretrained( - model_name, + shard.model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) From 2e72367d564f4091dd96242015c2e376bcef7a14 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 15:14:52 -0800 Subject: [PATCH 073/589] fixing tensor shape issue --- exo/inference/pytorch/inference.py | 2 +- exo/inference/pytorch/model/hf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 5d32b8ff0..09de6d223 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -94,7 +94,7 @@ async def infer_tensor( start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 - output_data, past_key_values = self.model.forward_layers(torch.tensor([input_data], device=self.model.device), start_pos=start_pos) + output_data, past_key_values = self.model.forward_layers(torch.tensor([input_data], device=self.model.device), past_key_values=past_key_values) output_data = output_data.detach().cpu().numpy() if output_data.size == 1: diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 3983e00fe..1b7e751cd 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -71,7 +71,7 @@ def forward_layers(self, input_ids, past_key_values=None): # Generate position ids position_ids = torch.arange(0, input_ids.shape[-1], dtype=torch.long, device=input_ids.device) - position_ids = position_ids.unsqueeze(0).expand_as(input_ids) + position_ids = position_ids.unsqueeze(0).expand(input_ids.shape[0], -1) # Apply each layer in this shard hidden_states = inputs_embeds From 9ed779ab4e050d99ec3ae8ec67b59ad9e006f176 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 15:19:03 -0800 Subject: [PATCH 074/589] fixing tensor shape issue --- exo/api/chatgpt_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/api/chatgpt_api.py b/exo/api/chatgpt_api.py index 761aabc90..65c6d40e2 100644 --- a/exo/api/chatgpt_api.py +++ b/exo/api/chatgpt_api.py @@ -16,7 +16,7 @@ ### llama "llama-3.1-8b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32), - "PyTorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Meta-Llama-3.1-8B", start_layer=0, end_layer=0, n_layers=32), + "PyTorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Meta-Llama-3.1-8B", start_layer=0, end_layer=0, n_layers=12), }, "llama-3.1-70b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), From 86994c352b6a0dc2390fd0d5fc17e93a58721e69 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 17:11:46 -0800 Subject: [PATCH 075/589] fixing tensor shape issue --- exo/inference/pytorch/inference.py | 91 ++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 23 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 09de6d223..647f5ac5e 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -52,25 +52,48 @@ async def infer_prompt( Returns: Tuple[np.ndarray, str, bool]: The output data, new inference state, and end-of-sequence flag. """ + # await self.ensure_shard(shard) + + # toks = self.tokenizer.encode(prompt) + # start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 + + # hidden_states, past_key_values = self.model.prefill(self.model, torch.tensor(toks[:-1], device=self.model.device), start_pos=start_pos) + # last_tok = torch.tensor([toks[-1]], device=self.model.device).unsqueeze(0) + + # output_data, past_key_values = self.model.forward_layers(last_tok, past_key_values=past_key_values) + # output_data = output_data.detach().cpu().numpy() + + # if output_data.size == 1: + # start_pos += 1 + + # return ( + # output_data, + # json.dumps({"start_pos": start_pos, "past_key_values": past_key_values.to_legacy_cache()}), + # output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], + # ) await self.ensure_shard(shard) - toks = self.tokenizer.encode(prompt) - start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 + input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.model.device) + + output = self.model.forward_layers(input_ids) - hidden_states, past_key_values = self.model.prefill(self.model, torch.tensor(toks[:-1], device=self.model.device), start_pos=start_pos) - last_tok = torch.tensor([toks[-1]], device=self.model.device).unsqueeze(0) + if self.shard.is_last_layer(): + logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) + next_token = torch.argmax(logits[:, -1, :], dim=-1) + output_data = np.array([next_token.item()]) + is_eos = next_token.item() == self.tokenizer.eos_token_id + else: + output_data = output.detach().cpu().numpy() + is_eos = False - output_data, past_key_values = self.model.forward_layers(last_tok, past_key_values=past_key_values) - output_data = output_data.detach().cpu().numpy() + new_inference_state = json.dumps({"past_key_values": []}) - if output_data.size == 1: - start_pos += 1 + if self.debug: + self.log.info( + f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") - return ( - output_data, - json.dumps({"start_pos": start_pos, "past_key_values": past_key_values.to_legacy_cache()}), - output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], - ) + return output_data, new_inference_state, is_eos + async def infer_tensor( self, @@ -90,21 +113,43 @@ async def infer_tensor( Returns: Tuple[np.ndarray, str, bool]: The output data, new inference state, and end-of-sequence flag. """ + # await self.ensure_shard(shard) + + # start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 + + # output_data, past_key_values = self.model.forward_layers(torch.tensor([input_data], device=self.model.device), past_key_values=past_key_values) + # output_data = output_data.detach().cpu().numpy() + + # if output_data.size == 1: + # start_pos += 1 + + # return ( + # output_data, + # json.dumps({"start_pos": start_pos, "past_key_values": past_key_values.to_legacy_cache()}), + # output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], + # ) + await self.ensure_shard(shard) - start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 + input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.model.device) + + output = self.model.forward_layers(input_tensor) + + if self.shard.is_last_layer(): + logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) + next_token = torch.argmax(logits[:, -1, :], dim=-1) + output_data = np.array([next_token.item()]) + is_eos = next_token.item() == self.tokenizer.eos_token_id + else: + output_data = output.detach().cpu().numpy() + is_eos = False - output_data, past_key_values = self.model.forward_layers(torch.tensor([input_data], device=self.model.device), past_key_values=past_key_values) - output_data = output_data.detach().cpu().numpy() + new_inference_state = json.dumps({"past_key_values": []}) - if output_data.size == 1: - start_pos += 1 + if self.debug: + self.log.info(f"Infer Tensor Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") - return ( - output_data, - json.dumps({"start_pos": start_pos, "past_key_values": past_key_values.to_legacy_cache()}), - output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], - ) + return output_data, new_inference_state, is_eos def _apply_generation_settings(self, logits, temperature, top_k): """ From 8b266d81e049ce620271efaa604215499d34dd56 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 17:32:50 -0800 Subject: [PATCH 076/589] fixing tensor shape issue --- exo/inference/pytorch/inference.py | 367 +++++++++++++++++++---------- exo/inference/pytorch/model/hf.py | 132 +++++++++-- 2 files changed, 367 insertions(+), 132 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 647f5ac5e..eb26a74fa 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -1,12 +1,233 @@ -# experimental, based off of tinygrad/inference.py +# # experimental, based off of tinygrad/inference.py + +# import os +# import json +# import torch +# import torch.nn as nn +# import numpy as np +# from typing import Optional, Callable, Tuple +# from transformers import AutoTokenizer, DynamicCache +# from exo.inference.shard import Shard +# from exo.inference.inference_engine import InferenceEngine +# from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel + +# # Default settings +# TEMPERATURE = 0.7 +# TOP_K = 50 + +# class PyTorchDynamicShardInferenceEngine(InferenceEngine): +# """ +# PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. +# """ + +# def __init__(self, debug: bool = True): +# """ +# Initialize the inference engine. + +# Args: +# debug (bool): If True, enables debug logging. Defaults to False. +# """ +# self.shard = None +# self.model = None +# self.debug = debug +# self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# async def infer_prompt( +# self, +# request_id: str, +# shard: Optional[Shard] = None, +# prompt: str = "", +# image_str: Optional[str] = None, +# inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: +# """ +# Perform inference based on a text prompt. + +# Args: +# request_id (str): Unique identifier for the request. +# shard (Optional[Shard]): Shard information for the model. +# prompt (str): The input text prompt for inference. +# image_str (Optional[str]): Optional image string for multi-modal models. +# inference_state (Optional[str]): The previous inference state. + +# Returns: +# Tuple[np.ndarray, str, bool]: The output data, new inference state, and end-of-sequence flag. +# """ +# # await self.ensure_shard(shard) + +# # toks = self.tokenizer.encode(prompt) +# # start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 + +# # hidden_states, past_key_values = self.model.prefill(self.model, torch.tensor(toks[:-1], device=self.model.device), start_pos=start_pos) +# # last_tok = torch.tensor([toks[-1]], device=self.model.device).unsqueeze(0) + +# # output_data, past_key_values = self.model.forward_layers(last_tok, past_key_values=past_key_values) +# # output_data = output_data.detach().cpu().numpy() + +# # if output_data.size == 1: +# # start_pos += 1 + +# # return ( +# # output_data, +# # json.dumps({"start_pos": start_pos, "past_key_values": past_key_values.to_legacy_cache()}), +# # output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], +# # ) +# await self.ensure_shard(shard) + +# start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 +# toks = self.tokenizer.encode(prompt) + +# hidden_states, past_key_values = self.model.prefill(self.model, torch.tensor(toks[:-1], device=self.model.device), start_pos=start_pos) +# last_tok = torch.tensor([toks[-1]], device=self.model.device).unsqueeze(0) + +# output_data, past_key_values = self.model.forward_layers(last_tok, past_key_values=past_key_values) +# output_data = output_data.detach().cpu().numpy() + +# if output_data.size == 1: +# start_pos += 1 + +# return ( +# output_data, +# json.dumps({"start_pos": start_pos, "past_key_values": past_key_values}), +# output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], +# ) + + +# async def infer_tensor( +# self, +# request_id: str, +# shard: Optional[Shard] = None, +# input_data: np.ndarray = None, +# inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: +# """ +# Perform inference based on an input tensor. + +# Args: +# request_id (str): Unique identifier for the request. +# shard (Optional[Shard]): Shard information for the model. +# input_data (np.ndarray): The input tensor for inference. +# inference_state (Optional[str]): The previous inference state. + +# Returns: +# Tuple[np.ndarray, str, bool]: The output data, new inference state, and end-of-sequence flag. +# """ +# # await self.ensure_shard(shard) + +# # start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 + +# # output_data, past_key_values = self.model.forward_layers(torch.tensor([input_data], device=self.model.device), past_key_values=past_key_values) +# # output_data = output_data.detach().cpu().numpy() + +# # if output_data.size == 1: +# # start_pos += 1 + +# # return ( +# # output_data, +# # json.dumps({"start_pos": start_pos, "past_key_values": past_key_values.to_legacy_cache()}), +# # output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], +# # ) + +# await self.ensure_shard(shard) + +# input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.model.device) + +# output = self.model.forward_layers(input_tensor) + +# if self.shard.is_last_layer(): +# logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) +# next_token = torch.argmax(logits[:, -1, :], dim=-1) +# output_data = np.array([next_token.item()]) +# is_eos = next_token.item() == self.tokenizer.eos_token_id +# else: +# output_data = output.detach().cpu().numpy() +# is_eos = False + +# new_inference_state = json.dumps({"past_key_values": []}) + +# if self.debug: +# self.log.info(f"Infer Tensor Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") + +# return output_data, new_inference_state, is_eos + +# def _apply_generation_settings(self, logits, temperature, top_k): +# """ +# Apply temperature and top_k settings to logits. + +# Args: +# logits (torch.Tensor): The logits to be adjusted. +# temperature (float): The temperature setting for generation. +# top_k (int): The top_k setting for generation. + +# Returns: +# torch.Tensor: The adjusted logits. +# """ +# logits = logits / temperature +# if top_k > 0: +# top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) +# logits = logits.scatter(1, top_k_indices, top_k_values) +# return logits + +# def _load_kv_cache(self, past_key_values_list): +# """ +# Load key-value cache from the inference state. + +# Args: +# past_key_values_list (list): List of past key-value tensors. + +# Returns: +# DynamicCache: Loaded past key-value cache. +# """ +# if past_key_values_list is None: +# return DynamicCache() +# cache = DynamicCache.from_legacy_cache(past_key_values_list) +# return cache + +# def _save_kv_cache(self, past_key_values): +# """ +# Save key-value cache to the inference state. + +# Args: +# past_key_values (DynamicCache): Past key-value cache. + +# Returns: +# list: List of key-value tensors in a format suitable for saving. +# """ +# return past_key_values.to_legacy_cache() + +# async def ensure_shard(self, shard: Optional[Shard]): +# """ +# Ensure the model shard is loaded and ready for inference. + +# Args: +# shard (Optional[Shard]): Shard information for the model. +# """ +# if self.shard == shard: +# return + +# # Load model and tokenizer from the downloaded files +# # This is written for llama model but need to add in option for others +# if not self.model: +# self.model = ShardedHuggingFaceModel(shard) +# self.tokenizer = AutoTokenizer.from_pretrained(shard.model_id) + +# self.shard = shard + +# def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): +# """ +# Set a callback function to track download progress. + +# Args: +# on_download_progress (Callable[[int, int], None]): Callback function to track progress. +# """ +# # must have this function or inference engine breaks +# # This method can be implemented if progress tracking is needed +# pass + -import os import json import torch -import torch.nn as nn import numpy as np from typing import Optional, Callable, Tuple -from transformers import AutoTokenizer, DynamicCache +from transformers import AutoTokenizer from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel @@ -52,48 +273,25 @@ async def infer_prompt( Returns: Tuple[np.ndarray, str, bool]: The output data, new inference state, and end-of-sequence flag. """ - # await self.ensure_shard(shard) - - # toks = self.tokenizer.encode(prompt) - # start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 - - # hidden_states, past_key_values = self.model.prefill(self.model, torch.tensor(toks[:-1], device=self.model.device), start_pos=start_pos) - # last_tok = torch.tensor([toks[-1]], device=self.model.device).unsqueeze(0) - - # output_data, past_key_values = self.model.forward_layers(last_tok, past_key_values=past_key_values) - # output_data = output_data.detach().cpu().numpy() - - # if output_data.size == 1: - # start_pos += 1 - - # return ( - # output_data, - # json.dumps({"start_pos": start_pos, "past_key_values": past_key_values.to_legacy_cache()}), - # output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], - # ) await self.ensure_shard(shard) - input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.model.device) - - output = self.model.forward_layers(input_ids) - - if self.shard.is_last_layer(): - logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) - next_token = torch.argmax(logits[:, -1, :], dim=-1) - output_data = np.array([next_token.item()]) - is_eos = next_token.item() == self.tokenizer.eos_token_id - else: - output_data = output.detach().cpu().numpy() - is_eos = False + start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 + toks = self.tokenizer.encode(prompt) + + start_pos = self.model.prefill(torch.tensor(toks[:-1], device=self.model.device), start_pos=start_pos) + last_tok = torch.tensor([toks[-1]], device=self.model.device).unsqueeze(0) - new_inference_state = json.dumps({"past_key_values": []}) + output_data, past_key_values = self.model.forward_layers(last_tok, past_key_values=[]) + output_data = output_data.detach().cpu().numpy() - if self.debug: - self.log.info( - f"Infer Prompt Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") + if output_data.size == 1: + start_pos += 1 - return output_data, new_inference_state, is_eos - + return ( + output_data, + json.dumps({"start_pos": start_pos}), + output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], + ) async def infer_tensor( self, @@ -113,88 +311,21 @@ async def infer_tensor( Returns: Tuple[np.ndarray, str, bool]: The output data, new inference state, and end-of-sequence flag. """ - # await self.ensure_shard(shard) - - # start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 - - # output_data, past_key_values = self.model.forward_layers(torch.tensor([input_data], device=self.model.device), past_key_values=past_key_values) - # output_data = output_data.detach().cpu().numpy() - - # if output_data.size == 1: - # start_pos += 1 - - # return ( - # output_data, - # json.dumps({"start_pos": start_pos, "past_key_values": past_key_values.to_legacy_cache()}), - # output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], - # ) - await self.ensure_shard(shard) - input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.model.device) + start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 - output = self.model.forward_layers(input_tensor) + output_data, past_key_values = self.model.forward_layers(torch.tensor([input_data], device=self.model.device), past_key_values=[]) + output_data = output_data.detach().cpu().numpy() - if self.shard.is_last_layer(): - logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) - next_token = torch.argmax(logits[:, -1, :], dim=-1) - output_data = np.array([next_token.item()]) - is_eos = next_token.item() == self.tokenizer.eos_token_id - else: - output_data = output.detach().cpu().numpy() - is_eos = False + if output_data.size == 1: + start_pos += 1 - new_inference_state = json.dumps({"past_key_values": []}) - - if self.debug: - self.log.info(f"Infer Tensor Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") - - return output_data, new_inference_state, is_eos - - def _apply_generation_settings(self, logits, temperature, top_k): - """ - Apply temperature and top_k settings to logits. - - Args: - logits (torch.Tensor): The logits to be adjusted. - temperature (float): The temperature setting for generation. - top_k (int): The top_k setting for generation. - - Returns: - torch.Tensor: The adjusted logits. - """ - logits = logits / temperature - if top_k > 0: - top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) - logits = logits.scatter(1, top_k_indices, top_k_values) - return logits - - def _load_kv_cache(self, past_key_values_list): - """ - Load key-value cache from the inference state. - - Args: - past_key_values_list (list): List of past key-value tensors. - - Returns: - DynamicCache: Loaded past key-value cache. - """ - if past_key_values_list is None: - return DynamicCache() - cache = DynamicCache.from_legacy_cache(past_key_values_list) - return cache - - def _save_kv_cache(self, past_key_values): - """ - Save key-value cache to the inference state. - - Args: - past_key_values (DynamicCache): Past key-value cache. - - Returns: - list: List of key-value tensors in a format suitable for saving. - """ - return past_key_values.to_legacy_cache() + return ( + output_data, + json.dumps({"start_pos": start_pos}), + output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], + ) async def ensure_shard(self, shard: Optional[Shard]): """ diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 1b7e751cd..e85ebc522 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -1,6 +1,101 @@ +# import torch +# import torch.nn as nn +# from transformers import AutoModelForCausalLM, DynamicCache +# from exo.inference.shard import Shard + +# class ShardedHuggingFaceModel(nn.Module): +# def __init__(self, shard: Shard): +# super(ShardedHuggingFaceModel, self).__init__() +# self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +# self.shard = shard + +# # Load the model +# self.full_model = AutoModelForCausalLM.from_pretrained( +# shard.model_id, +# torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, +# device_map="auto" +# ) + +# # Extract only the layers for this shard +# self.layers = nn.ModuleList([ +# self.full_model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) +# ]) + +# # Embeddings and final layer norm +# self.embed_tokens = self.full_model.model.embed_tokens +# self.norm = self.full_model.model.norm +# self.lm_head = self.full_model.lm_head + +# def prefill(self, model, tokens, start_pos=0): +# """ +# Process the initial input tokens and set up the initial hidden states and key-value caches. +# """ +# # Token embeddings +# inputs_embeds = self.embed_tokens(tokens) + +# # Generate position ids +# position_ids = torch.arange(start_pos, start_pos + tokens.shape[-1], dtype=torch.long, device=tokens.device) +# position_ids = position_ids.unsqueeze(0).expand_as(tokens) + +# # Apply each layer in this shard +# hidden_states = inputs_embeds +# past_key_values = [] +# for i, layer in enumerate(self.layers): +# layer_past = None +# hidden_states, new_layer_past = layer( +# hidden_states, +# past_key_values=layer_past, +# use_cache=True, +# position_ids=position_ids +# ) +# past_key_values.append(new_layer_past) + +# return hidden_states, past_key_values + +# def forward_layers(self, input_ids, past_key_values=None): +# """ +# Forward pass through the specified layers. + +# Args: +# input_ids (torch.Tensor): Input token IDs. +# past_key_values (DynamicCache, optional): Past key values for caching. + +# Returns: +# tuple: Hidden states and new past key values. +# """ +# if past_key_values is None: +# past_key_values = DynamicCache() + +# # Token embeddings +# inputs_embeds = self.embed_tokens(input_ids) + +# # Generate position ids +# position_ids = torch.arange(0, input_ids.shape[-1], dtype=torch.long, device=input_ids.device) +# position_ids = position_ids.unsqueeze(0).expand(input_ids.shape[0], -1) + +# # Apply each layer in this shard +# hidden_states = inputs_embeds +# new_past_key_values = DynamicCache() +# for i, layer in enumerate(self.layers): +# layer_past = past_key_values[i] if i < len(past_key_values) else None +# hidden_states, new_layer_past = layer( +# hidden_states, +# past_key_values=layer_past, +# use_cache=True, +# position_ids=position_ids +# ) +# new_past_key_values.update(new_layer_past[0], new_layer_past[1], i) + +# if self.shard.is_last_layer(): +# hidden_states = self.norm(hidden_states) +# logits = self.lm_head(hidden_states) +# return logits, new_past_key_values +# else: +# return hidden_states, new_past_key_values + import torch import torch.nn as nn -from transformers import AutoModelForCausalLM, DynamicCache +from transformers import AutoModelForCausalLM from exo.inference.shard import Shard class ShardedHuggingFaceModel(nn.Module): @@ -8,6 +103,7 @@ def __init__(self, shard: Shard): super(ShardedHuggingFaceModel, self).__init__() self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.shard = shard + self.device_ids = list(range(torch.cuda.device_count())) # Load the model self.full_model = AutoModelForCausalLM.from_pretrained( @@ -26,9 +122,16 @@ def __init__(self, shard: Shard): self.norm = self.full_model.model.norm self.lm_head = self.full_model.lm_head - def prefill(self, model, tokens, start_pos=0): + def prefill(self, tokens, start_pos=0): """ Process the initial input tokens and set up the initial hidden states and key-value caches. + + Args: + tokens (torch.Tensor): Input tokens. + start_pos (int, optional): Starting position for position ids. Defaults to 0. + + Returns: + int: The updated start position. """ # Token embeddings inputs_embeds = self.embed_tokens(tokens) @@ -39,18 +142,18 @@ def prefill(self, model, tokens, start_pos=0): # Apply each layer in this shard hidden_states = inputs_embeds - past_key_values = [] - for i, layer in enumerate(self.layers): - layer_past = None - hidden_states, new_layer_past = layer( + for layer in self.layers: + hidden_states, _ = layer( hidden_states, - past_key_values=layer_past, + past_key_values=None, use_cache=True, position_ids=position_ids ) - past_key_values.append(new_layer_past) - return hidden_states, past_key_values + # Update start position + start_pos += tokens.shape[-1] + + return start_pos def forward_layers(self, input_ids, past_key_values=None): """ @@ -58,24 +161,24 @@ def forward_layers(self, input_ids, past_key_values=None): Args: input_ids (torch.Tensor): Input token IDs. - past_key_values (DynamicCache, optional): Past key values for caching. + past_key_values (list, optional): Past key values for caching. Returns: tuple: Hidden states and new past key values. """ if past_key_values is None: - past_key_values = DynamicCache() + past_key_values = [] # Token embeddings inputs_embeds = self.embed_tokens(input_ids) # Generate position ids position_ids = torch.arange(0, input_ids.shape[-1], dtype=torch.long, device=input_ids.device) - position_ids = position_ids.unsqueeze(0).expand(input_ids.shape[0], -1) + position_ids = position_ids.unsqueeze(0).expand_as(input_ids) # Apply each layer in this shard hidden_states = inputs_embeds - new_past_key_values = DynamicCache() + new_past_key_values = [] for i, layer in enumerate(self.layers): layer_past = past_key_values[i] if i < len(past_key_values) else None hidden_states, new_layer_past = layer( @@ -84,7 +187,7 @@ def forward_layers(self, input_ids, past_key_values=None): use_cache=True, position_ids=position_ids ) - new_past_key_values.update(new_layer_past[0], new_layer_past[1], i) + new_past_key_values.append(new_layer_past) if self.shard.is_last_layer(): hidden_states = self.norm(hidden_states) @@ -92,3 +195,4 @@ def forward_layers(self, input_ids, past_key_values=None): return logits, new_past_key_values else: return hidden_states, new_past_key_values + From 2be2702f462a7e3fd61597f914cb0d642a19f8e3 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 17:44:33 -0800 Subject: [PATCH 077/589] fixing tensor shape issue --- exo/inference/pytorch/model/hf.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index e85ebc522..ad5d68c3d 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -97,6 +97,7 @@ import torch.nn as nn from transformers import AutoModelForCausalLM from exo.inference.shard import Shard +from exo.helpers import DEBUG class ShardedHuggingFaceModel(nn.Module): def __init__(self, shard: Shard): @@ -133,11 +134,17 @@ def prefill(self, tokens, start_pos=0): Returns: int: The updated start position. """ + if DEBUG >=2: + print("\nShardedHuggingFaceModel.prefill called") + # Token embeddings inputs_embeds = self.embed_tokens(tokens) # Generate position ids position_ids = torch.arange(start_pos, start_pos + tokens.shape[-1], dtype=torch.long, device=tokens.device) + + if DEBUG >= 2: + print(f"tokens: {tokens}") position_ids = position_ids.unsqueeze(0).expand_as(tokens) # Apply each layer in this shard From f1943d16c7a2378d79d9b242d63449b405aad192 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 17:48:22 -0800 Subject: [PATCH 078/589] fixing tensor shape issue --- exo/api/chatgpt_api.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/exo/api/chatgpt_api.py b/exo/api/chatgpt_api.py index 65c6d40e2..9ed4a47a4 100644 --- a/exo/api/chatgpt_api.py +++ b/exo/api/chatgpt_api.py @@ -87,21 +87,22 @@ def resolve_tinygrad_tokenizer(model_id: str): async def resolve_tokenizer(model_id: str): - try: - if DEBUG >= 2: print(f"Trying AutoProcessor for {model_id}") - processor = AutoProcessor.from_pretrained(model_id, use_fast=False) - if not hasattr(processor, 'eos_token_id'): - processor.eos_token_id = getattr(processor, 'tokenizer', getattr(processor, '_tokenizer', processor)).eos_token_id - if not hasattr(processor, 'encode'): - processor.encode = getattr(processor, 'tokenizer', getattr(processor, '_tokenizer', processor)).encode - if not hasattr(processor, 'decode'): - processor.decode = getattr(processor, 'tokenizer', getattr(processor, '_tokenizer', processor)).decode - return processor - except Exception as e: - if DEBUG >= 2: print(f"Failed to load processor for {model_id}. Error: {e}") - import traceback + if not model_id == "meta-llama/Meta-Llama-3.1-8B": + try: + if DEBUG >= 2: print(f"Trying AutoProcessor for {model_id}") + processor = AutoProcessor.from_pretrained(model_id, use_fast=False) + if not hasattr(processor, 'eos_token_id'): + processor.eos_token_id = getattr(processor, 'tokenizer', getattr(processor, '_tokenizer', processor)).eos_token_id + if not hasattr(processor, 'encode'): + processor.encode = getattr(processor, 'tokenizer', getattr(processor, '_tokenizer', processor)).encode + if not hasattr(processor, 'decode'): + processor.decode = getattr(processor, 'tokenizer', getattr(processor, '_tokenizer', processor)).decode + return processor + except Exception as e: + if DEBUG >= 2: print(f"Failed to load processor for {model_id}. Error: {e}") + import traceback - if DEBUG >= 2: print(traceback.format_exc()) + if DEBUG >= 2: print(traceback.format_exc()) try: if DEBUG >= 2: print(f"Trying AutoTokenizer for {model_id}") From a8d117a4508c0e13276dba3495a19a341d4ea4f9 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 18:05:21 -0800 Subject: [PATCH 079/589] fixing tensor shape issue --- exo/inference/pytorch/model/hf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index ad5d68c3d..157a9da78 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -142,10 +142,7 @@ def prefill(self, tokens, start_pos=0): # Generate position ids position_ids = torch.arange(start_pos, start_pos + tokens.shape[-1], dtype=torch.long, device=tokens.device) - - if DEBUG >= 2: - print(f"tokens: {tokens}") - position_ids = position_ids.unsqueeze(0).expand_as(tokens) + position_ids = position_ids.unsqueeze(0).expand(tokens.shape[0], -1) # Match the shape of tokens # Apply each layer in this shard hidden_states = inputs_embeds @@ -160,6 +157,9 @@ def prefill(self, tokens, start_pos=0): # Update start position start_pos += tokens.shape[-1] + if DEBUG >= 2: + print(f"\nstart_post: {start_pos}\nposition_ids: {position_ids}") + return start_pos def forward_layers(self, input_ids, past_key_values=None): From 2acebf3a96103938278464391462c9bb7cc57f47 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 18:23:41 -0800 Subject: [PATCH 080/589] fixing model not getting right shard, utilizing past key values --- exo/inference/pytorch/inference.py | 277 +++++------------------------ 1 file changed, 41 insertions(+), 236 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index eb26a74fa..b794b225e 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -1,227 +1,4 @@ -# # experimental, based off of tinygrad/inference.py - -# import os -# import json -# import torch -# import torch.nn as nn -# import numpy as np -# from typing import Optional, Callable, Tuple -# from transformers import AutoTokenizer, DynamicCache -# from exo.inference.shard import Shard -# from exo.inference.inference_engine import InferenceEngine -# from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel - -# # Default settings -# TEMPERATURE = 0.7 -# TOP_K = 50 - -# class PyTorchDynamicShardInferenceEngine(InferenceEngine): -# """ -# PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. -# """ - -# def __init__(self, debug: bool = True): -# """ -# Initialize the inference engine. - -# Args: -# debug (bool): If True, enables debug logging. Defaults to False. -# """ -# self.shard = None -# self.model = None -# self.debug = debug -# self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - -# async def infer_prompt( -# self, -# request_id: str, -# shard: Optional[Shard] = None, -# prompt: str = "", -# image_str: Optional[str] = None, -# inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: -# """ -# Perform inference based on a text prompt. - -# Args: -# request_id (str): Unique identifier for the request. -# shard (Optional[Shard]): Shard information for the model. -# prompt (str): The input text prompt for inference. -# image_str (Optional[str]): Optional image string for multi-modal models. -# inference_state (Optional[str]): The previous inference state. - -# Returns: -# Tuple[np.ndarray, str, bool]: The output data, new inference state, and end-of-sequence flag. -# """ -# # await self.ensure_shard(shard) - -# # toks = self.tokenizer.encode(prompt) -# # start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 - -# # hidden_states, past_key_values = self.model.prefill(self.model, torch.tensor(toks[:-1], device=self.model.device), start_pos=start_pos) -# # last_tok = torch.tensor([toks[-1]], device=self.model.device).unsqueeze(0) - -# # output_data, past_key_values = self.model.forward_layers(last_tok, past_key_values=past_key_values) -# # output_data = output_data.detach().cpu().numpy() - -# # if output_data.size == 1: -# # start_pos += 1 - -# # return ( -# # output_data, -# # json.dumps({"start_pos": start_pos, "past_key_values": past_key_values.to_legacy_cache()}), -# # output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], -# # ) -# await self.ensure_shard(shard) - -# start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 -# toks = self.tokenizer.encode(prompt) - -# hidden_states, past_key_values = self.model.prefill(self.model, torch.tensor(toks[:-1], device=self.model.device), start_pos=start_pos) -# last_tok = torch.tensor([toks[-1]], device=self.model.device).unsqueeze(0) - -# output_data, past_key_values = self.model.forward_layers(last_tok, past_key_values=past_key_values) -# output_data = output_data.detach().cpu().numpy() - -# if output_data.size == 1: -# start_pos += 1 - -# return ( -# output_data, -# json.dumps({"start_pos": start_pos, "past_key_values": past_key_values}), -# output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], -# ) - - -# async def infer_tensor( -# self, -# request_id: str, -# shard: Optional[Shard] = None, -# input_data: np.ndarray = None, -# inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: -# """ -# Perform inference based on an input tensor. - -# Args: -# request_id (str): Unique identifier for the request. -# shard (Optional[Shard]): Shard information for the model. -# input_data (np.ndarray): The input tensor for inference. -# inference_state (Optional[str]): The previous inference state. - -# Returns: -# Tuple[np.ndarray, str, bool]: The output data, new inference state, and end-of-sequence flag. -# """ -# # await self.ensure_shard(shard) - -# # start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 - -# # output_data, past_key_values = self.model.forward_layers(torch.tensor([input_data], device=self.model.device), past_key_values=past_key_values) -# # output_data = output_data.detach().cpu().numpy() - -# # if output_data.size == 1: -# # start_pos += 1 - -# # return ( -# # output_data, -# # json.dumps({"start_pos": start_pos, "past_key_values": past_key_values.to_legacy_cache()}), -# # output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], -# # ) - -# await self.ensure_shard(shard) - -# input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.model.device) - -# output = self.model.forward_layers(input_tensor) - -# if self.shard.is_last_layer(): -# logits = self._apply_generation_settings(output, TEMPERATURE, TOP_K) -# next_token = torch.argmax(logits[:, -1, :], dim=-1) -# output_data = np.array([next_token.item()]) -# is_eos = next_token.item() == self.tokenizer.eos_token_id -# else: -# output_data = output.detach().cpu().numpy() -# is_eos = False - -# new_inference_state = json.dumps({"past_key_values": []}) - -# if self.debug: -# self.log.info(f"Infer Tensor Debug - Request ID: {request_id}, Output: {output_data}, EOS: {is_eos}") - -# return output_data, new_inference_state, is_eos - -# def _apply_generation_settings(self, logits, temperature, top_k): -# """ -# Apply temperature and top_k settings to logits. - -# Args: -# logits (torch.Tensor): The logits to be adjusted. -# temperature (float): The temperature setting for generation. -# top_k (int): The top_k setting for generation. - -# Returns: -# torch.Tensor: The adjusted logits. -# """ -# logits = logits / temperature -# if top_k > 0: -# top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) -# logits = logits.scatter(1, top_k_indices, top_k_values) -# return logits - -# def _load_kv_cache(self, past_key_values_list): -# """ -# Load key-value cache from the inference state. - -# Args: -# past_key_values_list (list): List of past key-value tensors. - -# Returns: -# DynamicCache: Loaded past key-value cache. -# """ -# if past_key_values_list is None: -# return DynamicCache() -# cache = DynamicCache.from_legacy_cache(past_key_values_list) -# return cache - -# def _save_kv_cache(self, past_key_values): -# """ -# Save key-value cache to the inference state. - -# Args: -# past_key_values (DynamicCache): Past key-value cache. - -# Returns: -# list: List of key-value tensors in a format suitable for saving. -# """ -# return past_key_values.to_legacy_cache() - -# async def ensure_shard(self, shard: Optional[Shard]): -# """ -# Ensure the model shard is loaded and ready for inference. - -# Args: -# shard (Optional[Shard]): Shard information for the model. -# """ -# if self.shard == shard: -# return - -# # Load model and tokenizer from the downloaded files -# # This is written for llama model but need to add in option for others -# if not self.model: -# self.model = ShardedHuggingFaceModel(shard) -# self.tokenizer = AutoTokenizer.from_pretrained(shard.model_id) - -# self.shard = shard - -# def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): -# """ -# Set a callback function to track download progress. - -# Args: -# on_download_progress (Callable[[int, int], None]): Callback function to track progress. -# """ -# # must have this function or inference engine breaks -# # This method can be implemented if progress tracking is needed -# pass - +# experimental, based off of tinygrad/inference.py import json import torch @@ -275,13 +52,15 @@ async def infer_prompt( """ await self.ensure_shard(shard) - start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 toks = self.tokenizer.encode(prompt) - + start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 + past_key_values_list = json.loads(inference_state).get("past_key_values", None) if inference_state else None + past_key_values = self._load_kv_cache(past_key_values_list) + start_pos = self.model.prefill(torch.tensor(toks[:-1], device=self.model.device), start_pos=start_pos) last_tok = torch.tensor([toks[-1]], device=self.model.device).unsqueeze(0) - output_data, past_key_values = self.model.forward_layers(last_tok, past_key_values=[]) + output_data, past_key_values = self.model.forward_layers(last_tok, past_key_values=past_key_values) output_data = output_data.detach().cpu().numpy() if output_data.size == 1: @@ -289,7 +68,7 @@ async def infer_prompt( return ( output_data, - json.dumps({"start_pos": start_pos}), + json.dumps({"start_pos": start_pos, "past_key_values": self._save_kv_cache(past_key_values)}), output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], ) @@ -313,9 +92,13 @@ async def infer_tensor( """ await self.ensure_shard(shard) + input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.model.device) + start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 + past_key_values_list = json.loads(inference_state).get("past_key_values", None) if inference_state else None + past_key_values = self._load_kv_cache(past_key_values_list) - output_data, past_key_values = self.model.forward_layers(torch.tensor([input_data], device=self.model.device), past_key_values=[]) + output_data, past_key_values = self.model.forward_layers(input_tensor, past_key_values=past_key_values) output_data = output_data.detach().cpu().numpy() if output_data.size == 1: @@ -323,10 +106,36 @@ async def infer_tensor( return ( output_data, - json.dumps({"start_pos": start_pos}), + json.dumps({"start_pos": start_pos, "past_key_values": self._save_kv_cache(past_key_values)}), output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], ) + def _load_kv_cache(self, past_key_values_list): + """ + Load key-value cache from the inference state. + + Args: + past_key_values_list (list): List of past key-value tensors. + + Returns: + list: List of loaded past key-value tensors. + """ + if past_key_values_list is None: + return [] + return [torch.tensor(kv, device=self.device) for kv in past_key_values_list] + + def _save_kv_cache(self, past_key_values): + """ + Save key-value cache to the inference state. + + Args: + past_key_values (list): List of past key-value tensors. + + Returns: + list: List of key-value tensors in a format suitable for saving. + """ + return [kv.cpu().tolist() for kv in past_key_values] + async def ensure_shard(self, shard: Optional[Shard]): """ Ensure the model shard is loaded and ready for inference. @@ -337,12 +146,8 @@ async def ensure_shard(self, shard: Optional[Shard]): if self.shard == shard: return - # Load model and tokenizer from the downloaded files - # This is written for llama model but need to add in option for others - if not self.model: - self.model = ShardedHuggingFaceModel(shard) - self.tokenizer = AutoTokenizer.from_pretrained(shard.model_id) - + self.model = ShardedHuggingFaceModel(shard) + self.tokenizer = AutoTokenizer.from_pretrained(shard.model_id) self.shard = shard def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): From 4321c5746ef8e23201bf3d5a5b6555178f28a051 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 18:41:56 -0800 Subject: [PATCH 081/589] fixing return value issue with sendprompt --- exo/inference/pytorch/inference.py | 69 +++++++++----- exo/inference/pytorch/model/hf.py | 142 ++--------------------------- 2 files changed, 52 insertions(+), 159 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index b794b225e..f5d7f7b73 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -18,7 +18,7 @@ class PyTorchDynamicShardInferenceEngine(InferenceEngine): PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. """ - def __init__(self, debug: bool = True): + def __init__(self, debug: bool = False): """ Initialize the inference engine. @@ -27,6 +27,7 @@ def __init__(self, debug: bool = True): """ self.shard = None self.model = None + self.tokenizer = None self.debug = debug self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -52,25 +53,31 @@ async def infer_prompt( """ await self.ensure_shard(shard) + if self.debug: + print(f"[{request_id}] Processing prompt: {prompt[:50]}...") + toks = self.tokenizer.encode(prompt) - start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 - past_key_values_list = json.loads(inference_state).get("past_key_values", None) if inference_state else None - past_key_values = self._load_kv_cache(past_key_values_list) + state = json.loads(inference_state) if inference_state else {} + start_pos = state.get("start_pos", 0) + past_key_values = self._load_kv_cache(state.get("past_key_values")) - start_pos = self.model.prefill(torch.tensor(toks[:-1], device=self.model.device), start_pos=start_pos) - last_tok = torch.tensor([toks[-1]], device=self.model.device).unsqueeze(0) + start_pos = self.model.prefill(torch.tensor(toks[:-1], device=self.device), start_pos=start_pos) + last_tok = torch.tensor([toks[-1]], device=self.device).unsqueeze(0) output_data, past_key_values = self.model.forward_layers(last_tok, past_key_values=past_key_values) output_data = output_data.detach().cpu().numpy() - if output_data.size == 1: - start_pos += 1 + is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] + new_state = { + "start_pos": start_pos + 1, + "past_key_values": self._save_kv_cache(past_key_values) + } + new_inference_state = json.dumps(new_state) + + if self.debug: + print(f"[{request_id}] Output size: {output_data.size}, Is finished: {is_finished}") - return ( - output_data, - json.dumps({"start_pos": start_pos, "past_key_values": self._save_kv_cache(past_key_values)}), - output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], - ) + return output_data, new_inference_state, is_finished async def infer_tensor( self, @@ -92,23 +99,29 @@ async def infer_tensor( """ await self.ensure_shard(shard) - input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.model.device) + if self.debug: + print(f"[{request_id}] Processing tensor input, shape: {input_data.shape}") - start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 - past_key_values_list = json.loads(inference_state).get("past_key_values", None) if inference_state else None - past_key_values = self._load_kv_cache(past_key_values_list) + input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.device) + + state = json.loads(inference_state) if inference_state else {} + start_pos = state.get("start_pos", 0) + past_key_values = self._load_kv_cache(state.get("past_key_values")) output_data, past_key_values = self.model.forward_layers(input_tensor, past_key_values=past_key_values) output_data = output_data.detach().cpu().numpy() - if output_data.size == 1: - start_pos += 1 + is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] + new_state = { + "start_pos": start_pos + 1, + "past_key_values": self._save_kv_cache(past_key_values) + } + new_inference_state = json.dumps(new_state) + + if self.debug: + print(f"[{request_id}] Output size: {output_data.size}, Is finished: {is_finished}") - return ( - output_data, - json.dumps({"start_pos": start_pos, "past_key_values": self._save_kv_cache(past_key_values)}), - output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id], - ) + return output_data, new_inference_state, is_finished def _load_kv_cache(self, past_key_values_list): """ @@ -134,6 +147,8 @@ def _save_kv_cache(self, past_key_values): Returns: list: List of key-value tensors in a format suitable for saving. """ + if past_key_values is None: + return [] return [kv.cpu().tolist() for kv in past_key_values] async def ensure_shard(self, shard: Optional[Shard]): @@ -146,10 +161,16 @@ async def ensure_shard(self, shard: Optional[Shard]): if self.shard == shard: return + if self.debug: + print(f"Loading new shard: {shard}") + self.model = ShardedHuggingFaceModel(shard) self.tokenizer = AutoTokenizer.from_pretrained(shard.model_id) self.shard = shard + if self.debug: + print(f"Shard loaded successfully: {shard}") + def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): """ Set a callback function to track download progress. diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 157a9da78..151ff656b 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -1,110 +1,13 @@ -# import torch -# import torch.nn as nn -# from transformers import AutoModelForCausalLM, DynamicCache -# from exo.inference.shard import Shard - -# class ShardedHuggingFaceModel(nn.Module): -# def __init__(self, shard: Shard): -# super(ShardedHuggingFaceModel, self).__init__() -# self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -# self.shard = shard - -# # Load the model -# self.full_model = AutoModelForCausalLM.from_pretrained( -# shard.model_id, -# torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, -# device_map="auto" -# ) - -# # Extract only the layers for this shard -# self.layers = nn.ModuleList([ -# self.full_model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) -# ]) - -# # Embeddings and final layer norm -# self.embed_tokens = self.full_model.model.embed_tokens -# self.norm = self.full_model.model.norm -# self.lm_head = self.full_model.lm_head - -# def prefill(self, model, tokens, start_pos=0): -# """ -# Process the initial input tokens and set up the initial hidden states and key-value caches. -# """ -# # Token embeddings -# inputs_embeds = self.embed_tokens(tokens) - -# # Generate position ids -# position_ids = torch.arange(start_pos, start_pos + tokens.shape[-1], dtype=torch.long, device=tokens.device) -# position_ids = position_ids.unsqueeze(0).expand_as(tokens) - -# # Apply each layer in this shard -# hidden_states = inputs_embeds -# past_key_values = [] -# for i, layer in enumerate(self.layers): -# layer_past = None -# hidden_states, new_layer_past = layer( -# hidden_states, -# past_key_values=layer_past, -# use_cache=True, -# position_ids=position_ids -# ) -# past_key_values.append(new_layer_past) - -# return hidden_states, past_key_values - -# def forward_layers(self, input_ids, past_key_values=None): -# """ -# Forward pass through the specified layers. - -# Args: -# input_ids (torch.Tensor): Input token IDs. -# past_key_values (DynamicCache, optional): Past key values for caching. - -# Returns: -# tuple: Hidden states and new past key values. -# """ -# if past_key_values is None: -# past_key_values = DynamicCache() - -# # Token embeddings -# inputs_embeds = self.embed_tokens(input_ids) - -# # Generate position ids -# position_ids = torch.arange(0, input_ids.shape[-1], dtype=torch.long, device=input_ids.device) -# position_ids = position_ids.unsqueeze(0).expand(input_ids.shape[0], -1) - -# # Apply each layer in this shard -# hidden_states = inputs_embeds -# new_past_key_values = DynamicCache() -# for i, layer in enumerate(self.layers): -# layer_past = past_key_values[i] if i < len(past_key_values) else None -# hidden_states, new_layer_past = layer( -# hidden_states, -# past_key_values=layer_past, -# use_cache=True, -# position_ids=position_ids -# ) -# new_past_key_values.update(new_layer_past[0], new_layer_past[1], i) - -# if self.shard.is_last_layer(): -# hidden_states = self.norm(hidden_states) -# logits = self.lm_head(hidden_states) -# return logits, new_past_key_values -# else: -# return hidden_states, new_past_key_values - import torch -import torch.nn as nn from transformers import AutoModelForCausalLM from exo.inference.shard import Shard from exo.helpers import DEBUG -class ShardedHuggingFaceModel(nn.Module): +class ShardedHuggingFaceModel(torch.nn.Module): def __init__(self, shard: Shard): super(ShardedHuggingFaceModel, self).__init__() self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.shard = shard - self.device_ids = list(range(torch.cuda.device_count())) # Load the model self.full_model = AutoModelForCausalLM.from_pretrained( @@ -114,7 +17,7 @@ def __init__(self, shard: Shard): ) # Extract only the layers for this shard - self.layers = nn.ModuleList([ + self.layers = torch.nn.ModuleList([ self.full_model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) ]) @@ -124,25 +27,12 @@ def __init__(self, shard: Shard): self.lm_head = self.full_model.lm_head def prefill(self, tokens, start_pos=0): - """ - Process the initial input tokens and set up the initial hidden states and key-value caches. - - Args: - tokens (torch.Tensor): Input tokens. - start_pos (int, optional): Starting position for position ids. Defaults to 0. - - Returns: - int: The updated start position. - """ - if DEBUG >=2: - print("\nShardedHuggingFaceModel.prefill called") - # Token embeddings inputs_embeds = self.embed_tokens(tokens) # Generate position ids position_ids = torch.arange(start_pos, start_pos + tokens.shape[-1], dtype=torch.long, device=tokens.device) - position_ids = position_ids.unsqueeze(0).expand(tokens.shape[0], -1) # Match the shape of tokens + position_ids = position_ids.unsqueeze(0).expand_as(tokens) # Apply each layer in this shard hidden_states = inputs_embeds @@ -154,27 +44,11 @@ def prefill(self, tokens, start_pos=0): position_ids=position_ids ) - # Update start position - start_pos += tokens.shape[-1] - - if DEBUG >= 2: - print(f"\nstart_post: {start_pos}\nposition_ids: {position_ids}") - - return start_pos + return start_pos + tokens.shape[-1] def forward_layers(self, input_ids, past_key_values=None): - """ - Forward pass through the specified layers. - - Args: - input_ids (torch.Tensor): Input token IDs. - past_key_values (list, optional): Past key values for caching. - - Returns: - tuple: Hidden states and new past key values. - """ if past_key_values is None: - past_key_values = [] + past_key_values = [None] * len(self.layers) # Token embeddings inputs_embeds = self.embed_tokens(input_ids) @@ -187,10 +61,9 @@ def forward_layers(self, input_ids, past_key_values=None): hidden_states = inputs_embeds new_past_key_values = [] for i, layer in enumerate(self.layers): - layer_past = past_key_values[i] if i < len(past_key_values) else None hidden_states, new_layer_past = layer( hidden_states, - past_key_values=layer_past, + past_key_values=past_key_values[i], use_cache=True, position_ids=position_ids ) @@ -201,5 +74,4 @@ def forward_layers(self, input_ids, past_key_values=None): logits = self.lm_head(hidden_states) return logits, new_past_key_values else: - return hidden_states, new_past_key_values - + return hidden_states, new_past_key_values \ No newline at end of file From 7ae4856e6422763c6942c7e9addc3210aa6742ac Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 18:57:13 -0800 Subject: [PATCH 082/589] fixing return value issue with sendprompt --- exo/inference/pytorch/inference.py | 14 +++++++------- exo/inference/pytorch/model/hf.py | 9 +++++++-- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index f5d7f7b73..a3ed7654d 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -8,6 +8,7 @@ from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel +from exo.helpers import DEBUG # Default settings TEMPERATURE = 0.7 @@ -28,7 +29,6 @@ def __init__(self, debug: bool = False): self.shard = None self.model = None self.tokenizer = None - self.debug = debug self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") async def infer_prompt( @@ -53,7 +53,7 @@ async def infer_prompt( """ await self.ensure_shard(shard) - if self.debug: + if DEBUG >= 2: print(f"[{request_id}] Processing prompt: {prompt[:50]}...") toks = self.tokenizer.encode(prompt) @@ -74,7 +74,7 @@ async def infer_prompt( } new_inference_state = json.dumps(new_state) - if self.debug: + if DEBUG >= 2: print(f"[{request_id}] Output size: {output_data.size}, Is finished: {is_finished}") return output_data, new_inference_state, is_finished @@ -99,7 +99,7 @@ async def infer_tensor( """ await self.ensure_shard(shard) - if self.debug: + if DEBUG >= 2: print(f"[{request_id}] Processing tensor input, shape: {input_data.shape}") input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.device) @@ -118,7 +118,7 @@ async def infer_tensor( } new_inference_state = json.dumps(new_state) - if self.debug: + if DEBUG >= 2: print(f"[{request_id}] Output size: {output_data.size}, Is finished: {is_finished}") return output_data, new_inference_state, is_finished @@ -161,14 +161,14 @@ async def ensure_shard(self, shard: Optional[Shard]): if self.shard == shard: return - if self.debug: + if DEBUG >= 2: print(f"Loading new shard: {shard}") self.model = ShardedHuggingFaceModel(shard) self.tokenizer = AutoTokenizer.from_pretrained(shard.model_id) self.shard = shard - if self.debug: + if DEBUG >= 2: print(f"Shard loaded successfully: {shard}") def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 151ff656b..c9067da98 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -2,10 +2,15 @@ from transformers import AutoModelForCausalLM from exo.inference.shard import Shard from exo.helpers import DEBUG +from typing import Tuple class ShardedHuggingFaceModel(torch.nn.Module): def __init__(self, shard: Shard): super(ShardedHuggingFaceModel, self).__init__() + + if DEBUG >= 2: + print(f"ShardedHuggingFaceModel init with shard {shard}") + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.shard = shard @@ -26,7 +31,7 @@ def __init__(self, shard: Shard): self.norm = self.full_model.model.norm self.lm_head = self.full_model.lm_head - def prefill(self, tokens, start_pos=0): + def prefill(self, tokens, start_pos=0) -> int: # Token embeddings inputs_embeds = self.embed_tokens(tokens) @@ -46,7 +51,7 @@ def prefill(self, tokens, start_pos=0): return start_pos + tokens.shape[-1] - def forward_layers(self, input_ids, past_key_values=None): + def forward_layers(self, input_ids, past_key_values=None) -> Tuple[any, list]: if past_key_values is None: past_key_values = [None] * len(self.layers) From 77a4403eccb43b1f8b1fead8f200b4b8ee62c517 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 19:43:39 -0800 Subject: [PATCH 083/589] fixing prefill stuff and infer_prompt --- exo/inference/pytorch/inference.py | 25 +++++++---- exo/inference/pytorch/model/hf.py | 68 ++++++++++++++++++------------ 2 files changed, 58 insertions(+), 35 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index a3ed7654d..f8468ac87 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -51,25 +51,33 @@ async def infer_prompt( Returns: Tuple[np.ndarray, str, bool]: The output data, new inference state, and end-of-sequence flag. """ - await self.ensure_shard(shard) - + async def infer_prompt( + self, + request_id: str, + shard: Optional[Shard] = None, + prompt: str = "", + image_str: Optional[str] = None, + inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + if DEBUG >= 2: print(f"[{request_id}] Processing prompt: {prompt[:50]}...") + await self.ensure_shard(shard) + toks = self.tokenizer.encode(prompt) state = json.loads(inference_state) if inference_state else {} start_pos = state.get("start_pos", 0) past_key_values = self._load_kv_cache(state.get("past_key_values")) - start_pos = self.model.prefill(torch.tensor(toks[:-1], device=self.device), start_pos=start_pos) - last_tok = torch.tensor([toks[-1]], device=self.device).unsqueeze(0) - - output_data, past_key_values = self.model.forward_layers(last_tok, past_key_values=past_key_values) + start_pos = self.model.prefill( + torch.tensor(toks[:-1], device=self.device), start_pos=start_pos) + + output_data, past_key_values = self.model.forward_layers(toks[:, -1:], past_key_values=past_key_values) output_data = output_data.detach().cpu().numpy() - is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] + is_finished = output_data.shape[1] == 1 and output_data[0, 0, -1] == self.tokenizer.eos_token_id new_state = { - "start_pos": start_pos + 1, + "start_pos": start_pos, "past_key_values": self._save_kv_cache(past_key_values) } new_inference_state = json.dumps(new_state) @@ -79,6 +87,7 @@ async def infer_prompt( return output_data, new_inference_state, is_finished + async def infer_tensor( self, request_id: str, diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index c9067da98..803af7518 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -31,52 +31,66 @@ def __init__(self, shard: Shard): self.norm = self.full_model.model.norm self.lm_head = self.full_model.lm_head - def prefill(self, tokens, start_pos=0) -> int: - # Token embeddings - inputs_embeds = self.embed_tokens(tokens) + def prefill(self, tokens: torch.tensor, start_pos: int=0) -> int: + """ + Process the initial input tokens and set up the initial hidden states. + """ + # Assuming tokens is a 1D tensor of token IDs + for token in tokens: + # Convert token to a tensor and get embeddings + token_tensor = torch.tensor([[token]], device=self.device) + inputs_embeds = self.embed_tokens(token_tensor) - # Generate position ids - position_ids = torch.arange(start_pos, start_pos + tokens.shape[-1], dtype=torch.long, device=tokens.device) - position_ids = position_ids.unsqueeze(0).expand_as(tokens) + # Prefill with tokens + for layer in self.layers: + _ = layer( + inputs_embeds, + use_cache=True, + output_attentions=False, + ) + # Update embeddings with layer output + inputs_embeds = layer_outputs[0] - # Apply each layer in this shard - hidden_states = inputs_embeds - for layer in self.layers: - hidden_states, _ = layer( - hidden_states, - past_key_values=None, - use_cache=True, - position_ids=position_ids - ) - - return start_pos + tokens.shape[-1] + # Increment start position + start_pos += 1 + + return start_pos - def forward_layers(self, input_ids, past_key_values=None) -> Tuple[any, list]: + def forward_layers(self, input_ids, past_key_values=None): + """ + Forward pass through the specified layers. + """ if past_key_values is None: past_key_values = [None] * len(self.layers) # Token embeddings - inputs_embeds = self.embed_tokens(input_ids) + hidden_states = self.embed_tokens(input_ids) # Generate position ids - position_ids = torch.arange(0, input_ids.shape[-1], dtype=torch.long, device=input_ids.device) - position_ids = position_ids.unsqueeze(0).expand_as(input_ids) + seq_length = input_ids.shape[1] + position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device) + position_ids = position_ids.unsqueeze(0).expand(input_ids.shape) # Apply each layer in this shard - hidden_states = inputs_embeds new_past_key_values = [] for i, layer in enumerate(self.layers): - hidden_states, new_layer_past = layer( + layer_outputs = layer( hidden_states, - past_key_values=past_key_values[i], + attention_mask=None, + position_ids=position_ids, + past_key_value=past_key_values[i], use_cache=True, - position_ids=position_ids + output_attentions=False, ) - new_past_key_values.append(new_layer_past) + hidden_states = layer_outputs[0] + new_past_key_values.append(layer_outputs[1]) if self.shard.is_last_layer(): hidden_states = self.norm(hidden_states) logits = self.lm_head(hidden_states) return logits, new_past_key_values else: - return hidden_states, new_past_key_values \ No newline at end of file + return hidden_states, new_past_key_values + + def is_last_layer(self): + return self.shard.is_last_layer() \ No newline at end of file From dd23891273523411306999036c71a23169e197b1 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 20:42:17 -0800 Subject: [PATCH 084/589] fixing none for shape call --- exo/inference/pytorch/model/hf.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 803af7518..4917f23ee 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -39,18 +39,15 @@ def prefill(self, tokens: torch.tensor, start_pos: int=0) -> int: for token in tokens: # Convert token to a tensor and get embeddings token_tensor = torch.tensor([[token]], device=self.device) - inputs_embeds = self.embed_tokens(token_tensor) # Prefill with tokens for layer in self.layers: _ = layer( - inputs_embeds, + token_tensor, use_cache=True, output_attentions=False, ) - # Update embeddings with layer output - inputs_embeds = layer_outputs[0] - + # Increment start position start_pos += 1 From 82347462f5f537e7c06eb48c3cbf547b986e967b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 20:44:46 -0800 Subject: [PATCH 085/589] updating test --- exo/inference/pytorch/test_inference_engine.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index 1279ee1ed..fbc314f08 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -12,8 +12,7 @@ def main(): ) engine = PyTorchDynamicShardInferenceEngine( - shard.model_id, - debug=True + shard ) From e603902fdf20ac3001778d04f1b45b323d5dcd96 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 20:50:15 -0800 Subject: [PATCH 086/589] updating hf --- exo/inference/pytorch/model/hf.py | 36 ++++++++++++------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 4917f23ee..d87235fb7 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -35,22 +35,30 @@ def prefill(self, tokens: torch.tensor, start_pos: int=0) -> int: """ Process the initial input tokens and set up the initial hidden states. """ - # Assuming tokens is a 1D tensor of token IDs + # Assuming tokens is a 1D tensor of token IDs for token in tokens: # Convert token to a tensor and get embeddings token_tensor = torch.tensor([[token]], device=self.device) + inputs_embeds = self.embed_tokens(token_tensor) + + if DEBUG >= 2: + print(f"Initial input embeddings shape: {inputs_embeds.shape}") # Prefill with tokens for layer in self.layers: - _ = layer( - token_tensor, + layer_outputs = layer( + inputs_embeds, use_cache=True, output_attentions=False, ) - + inputs_embeds = layer_outputs[0] + + if DEBUG >= 2: + print(f"Layer output shape: {inputs_embeds.shape}") + # Increment start position start_pos += 1 - + return start_pos def forward_layers(self, input_ids, past_key_values=None): @@ -62,19 +70,11 @@ def forward_layers(self, input_ids, past_key_values=None): # Token embeddings hidden_states = self.embed_tokens(input_ids) - - # Generate position ids - seq_length = input_ids.shape[1] - position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device) - position_ids = position_ids.unsqueeze(0).expand(input_ids.shape) - # Apply each layer in this shard new_past_key_values = [] for i, layer in enumerate(self.layers): layer_outputs = layer( hidden_states, - attention_mask=None, - position_ids=position_ids, past_key_value=past_key_values[i], use_cache=True, output_attentions=False, @@ -82,12 +82,4 @@ def forward_layers(self, input_ids, past_key_values=None): hidden_states = layer_outputs[0] new_past_key_values.append(layer_outputs[1]) - if self.shard.is_last_layer(): - hidden_states = self.norm(hidden_states) - logits = self.lm_head(hidden_states) - return logits, new_past_key_values - else: - return hidden_states, new_past_key_values - - def is_last_layer(self): - return self.shard.is_last_layer() \ No newline at end of file + return hidden_states, new_past_key_values \ No newline at end of file From 90eb294b4a611b3f0dff39f6a1c5d3d5aa12b808 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 20:53:52 -0800 Subject: [PATCH 087/589] updating hf --- exo/inference/pytorch/model/hf.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index d87235fb7..f7e946a2f 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -9,7 +9,7 @@ def __init__(self, shard: Shard): super(ShardedHuggingFaceModel, self).__init__() if DEBUG >= 2: - print(f"ShardedHuggingFaceModel init with shard {shard}") + print(f"\nShardedHuggingFaceModel init with shard {shard}") self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.shard = shard @@ -40,21 +40,22 @@ def prefill(self, tokens: torch.tensor, start_pos: int=0) -> int: # Convert token to a tensor and get embeddings token_tensor = torch.tensor([[token]], device=self.device) inputs_embeds = self.embed_tokens(token_tensor) - if DEBUG >= 2: - print(f"Initial input embeddings shape: {inputs_embeds.shape}") + print(f"\nInitial input embeddings shape: {inputs_embeds.shape}") # Debugging # Prefill with tokens + position_ids = torch.arange(start_pos, start_pos + 1, dtype=torch.long, device=self.device).unsqueeze(0) for layer in self.layers: layer_outputs = layer( inputs_embeds, + position_ids=position_ids, use_cache=True, output_attentions=False, ) inputs_embeds = layer_outputs[0] if DEBUG >= 2: - print(f"Layer output shape: {inputs_embeds.shape}") + print(f"\nLayer output shape: {inputs_embeds.shape}") # Debugging # Increment start position start_pos += 1 @@ -82,4 +83,17 @@ def forward_layers(self, input_ids, past_key_values=None): hidden_states = layer_outputs[0] new_past_key_values.append(layer_outputs[1]) - return hidden_states, new_past_key_values \ No newline at end of file + return hidden_states, new_past_key_values + + def forward(self, input_ids, past_key_values=None): + """ + Forward pass through the model. + """ + hidden_states = self.prefill(input_ids) + hidden_states = self.norm(hidden_states) + logits = self.lm_head(hidden_states) + + if DEBUG >= 2: + print(f"\nLogits shape: {logits.shape}") # Debugging + return logits + \ No newline at end of file From 49cf8e3e5ad1d9c455047db39db991940b791efc Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 20:58:28 -0800 Subject: [PATCH 088/589] updating hf --- exo/inference/pytorch/inference.py | 2 +- exo/inference/pytorch/model/hf.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index f8468ac87..9d4feb75d 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -72,7 +72,7 @@ async def infer_prompt( start_pos = self.model.prefill( torch.tensor(toks[:-1], device=self.device), start_pos=start_pos) - output_data, past_key_values = self.model.forward_layers(toks[:, -1:], past_key_values=past_key_values) + output_data, past_key_values = self.model(toks[:, -1:], past_key_values=past_key_values) output_data = output_data.detach().cpu().numpy() is_finished = output_data.shape[1] == 1 and output_data[0, 0, -1] == self.tokenizer.eos_token_id diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index f7e946a2f..71df1f5db 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -89,11 +89,10 @@ def forward(self, input_ids, past_key_values=None): """ Forward pass through the model. """ - hidden_states = self.prefill(input_ids) + hidden_states, new_past_key_values = self.forward_layers(input_ids, past_key_values) hidden_states = self.norm(hidden_states) logits = self.lm_head(hidden_states) if DEBUG >= 2: print(f"\nLogits shape: {logits.shape}") # Debugging - return logits - \ No newline at end of file + return logits, new_past_key_values From 9e7948f1a0819f1279e31236dfcb59340b26b358 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 21:07:51 -0800 Subject: [PATCH 089/589] working on none issue --- exo/inference/pytorch/inference.py | 99 +++++++++++------------------- 1 file changed, 37 insertions(+), 62 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 9d4feb75d..59c6b3cf3 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -32,12 +32,13 @@ def __init__(self, debug: bool = False): self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") async def infer_prompt( - self, - request_id: str, - shard: Optional[Shard] = None, - prompt: str = "", - image_str: Optional[str] = None, - inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + self, + request_id: str, + shard: Optional[Shard] = None, + prompt: str = "", + image_str: Optional[str] = None, + inference_state: Optional[str] = None + ) -> Tuple[np.ndarray, str, bool]: """ Perform inference based on a text prompt. @@ -51,86 +52,60 @@ async def infer_prompt( Returns: Tuple[np.ndarray, str, bool]: The output data, new inference state, and end-of-sequence flag. """ - async def infer_prompt( - self, - request_id: str, - shard: Optional[Shard] = None, - prompt: str = "", - image_str: Optional[str] = None, - inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: - - if DEBUG >= 2: - print(f"[{request_id}] Processing prompt: {prompt[:50]}...") + # Ensure the shard is loaded await self.ensure_shard(shard) - toks = self.tokenizer.encode(prompt) - state = json.loads(inference_state) if inference_state else {} - start_pos = state.get("start_pos", 0) - past_key_values = self._load_kv_cache(state.get("past_key_values")) + # Tokenize the prompt + toks = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device) + + # Load the past key values from the inference state if available + past_key_values = self._load_kv_cache(inference_state) + + # Prefill the model with tokens + start_pos = self.model.prefill(toks.squeeze()) - start_pos = self.model.prefill( - torch.tensor(toks[:-1], device=self.device), start_pos=start_pos) - - output_data, past_key_values = self.model(toks[:, -1:], past_key_values=past_key_values) - output_data = output_data.detach().cpu().numpy() + # Run the forward pass through the model layers + output_data, past_key_values = self.model.forward_layers(toks[:, -1:], past_key_values=past_key_values) - is_finished = output_data.shape[1] == 1 and output_data[0, 0, -1] == self.tokenizer.eos_token_id - new_state = { - "start_pos": start_pos, - "past_key_values": self._save_kv_cache(past_key_values) - } - new_inference_state = json.dumps(new_state) + # Save the past key values to the inference state + new_inference_state = self._save_kv_cache(past_key_values) + + is_finished = False # Assuming a mechanism to determine if the sequence is finished if DEBUG >= 2: - print(f"[{request_id}] Output size: {output_data.size}, Is finished: {is_finished}") + print(f"Output data: {output_data}, new inference state: {new_inference_state}, finished: {is_finished}") return output_data, new_inference_state, is_finished - async def infer_tensor( - self, - request_id: str, - shard: Optional[Shard] = None, - input_data: np.ndarray = None, - inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + self, + input_tensor: torch.Tensor, + shard: Optional[Shard] = None, + past_key_values: Optional[list] = None + ) -> Tuple[torch.Tensor, list]: """ - Perform inference based on an input tensor. + Perform inference based on a tensor input. Args: - request_id (str): Unique identifier for the request. + input_tensor (torch.Tensor): The input tensor for inference. shard (Optional[Shard]): Shard information for the model. - input_data (np.ndarray): The input tensor for inference. - inference_state (Optional[str]): The previous inference state. + past_key_values (Optional[list]): The previous inference state. Returns: - Tuple[np.ndarray, str, bool]: The output data, new inference state, and end-of-sequence flag. + Tuple[torch.Tensor, list]: The output tensor and new inference state. """ - await self.ensure_shard(shard) - - if DEBUG >= 2: - print(f"[{request_id}] Processing tensor input, shape: {input_data.shape}") - - input_tensor = torch.tensor(input_data).unsqueeze(0).to(self.device) - state = json.loads(inference_state) if inference_state else {} - start_pos = state.get("start_pos", 0) - past_key_values = self._load_kv_cache(state.get("past_key_values")) + # Ensure the shard is loaded + await self.ensure_shard(shard) + # Run the forward pass through the model layers output_data, past_key_values = self.model.forward_layers(input_tensor, past_key_values=past_key_values) - output_data = output_data.detach().cpu().numpy() - - is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] - new_state = { - "start_pos": start_pos + 1, - "past_key_values": self._save_kv_cache(past_key_values) - } - new_inference_state = json.dumps(new_state) if DEBUG >= 2: - print(f"[{request_id}] Output size: {output_data.size}, Is finished: {is_finished}") + print(f"Output data shape: {output_data.shape}") - return output_data, new_inference_state, is_finished + return output_data, past_key_values def _load_kv_cache(self, past_key_values_list): """ From b0320e4fa414b9a6218fbc824260bacbfd233e5d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 21:10:16 -0800 Subject: [PATCH 090/589] working on forward_layers --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 71df1f5db..2a5a953a8 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -76,7 +76,7 @@ def forward_layers(self, input_ids, past_key_values=None): for i, layer in enumerate(self.layers): layer_outputs = layer( hidden_states, - past_key_value=past_key_values[i], + past_key_value=past_key_values[i] if len(past_key_values) > 0 else None, use_cache=True, output_attentions=False, ) From 6464b40f1e6d01c9df5033cc8d6e45982196e81c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 21:29:07 -0800 Subject: [PATCH 091/589] working on forward_layers bug --- exo/inference/pytorch/inference.py | 30 ++++++++++++++++++++++-------- exo/inference/pytorch/model/hf.py | 16 +++++++++++++--- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 59c6b3cf3..25456d38a 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -71,19 +71,23 @@ async def infer_prompt( # Save the past key values to the inference state new_inference_state = self._save_kv_cache(past_key_values) - is_finished = False # Assuming a mechanism to determine if the sequence is finished + is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] if DEBUG >= 2: print(f"Output data: {output_data}, new inference state: {new_inference_state}, finished: {is_finished}") - return output_data, new_inference_state, is_finished + return ( + output_data, + json.dumps({"start_pos": start_pos}), + is_finished + ) async def infer_tensor( self, - input_tensor: torch.Tensor, - shard: Optional[Shard] = None, - past_key_values: Optional[list] = None - ) -> Tuple[torch.Tensor, list]: + request_id: str, + shard: Shard, + input_data: np.ndarray, + inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: """ Perform inference based on a tensor input. @@ -98,14 +102,24 @@ async def infer_tensor( # Ensure the shard is loaded await self.ensure_shard(shard) + start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 # Run the forward pass through the model layers - output_data, past_key_values = self.model.forward_layers(input_tensor, past_key_values=past_key_values) + output_data, past_key_values = self.model.forward_layers( + input_data, + past_key_values=past_key_values + ) + + is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] if DEBUG >= 2: print(f"Output data shape: {output_data.shape}") - return output_data, past_key_values + return ( + output_data, + json.dumps({"start_pos": start_pos}), + is_finished + ) def _load_kv_cache(self, past_key_values_list): """ diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 2a5a953a8..10ddc7566 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -41,7 +41,7 @@ def prefill(self, tokens: torch.tensor, start_pos: int=0) -> int: token_tensor = torch.tensor([[token]], device=self.device) inputs_embeds = self.embed_tokens(token_tensor) if DEBUG >= 2: - print(f"\nInitial input embeddings shape: {inputs_embeds.shape}") # Debugging + print(f"\nprefill shape: {inputs_embeds.shape}") # Debugging # Prefill with tokens position_ids = torch.arange(start_pos, start_pos + 1, dtype=torch.long, device=self.device).unsqueeze(0) @@ -62,7 +62,7 @@ def prefill(self, tokens: torch.tensor, start_pos: int=0) -> int: return start_pos - def forward_layers(self, input_ids, past_key_values=None): + def forward_layers(self, start_pos, input_ids, past_key_values=None): """ Forward pass through the specified layers. """ @@ -72,19 +72,29 @@ def forward_layers(self, input_ids, past_key_values=None): # Token embeddings hidden_states = self.embed_tokens(input_ids) + # Initialize position_ids + position_ids = torch.arange(start_pos, start_pos + input_ids.size(1), dtype=torch.long, device=input_ids.device).unsqueeze(0) + new_past_key_values = [] for i, layer in enumerate(self.layers): + # Get past key value if available + past_key_value = past_key_values[i] if past_key_values and len(past_key_values) > 0 else None + + # Forward pass through the layer layer_outputs = layer( hidden_states, - past_key_value=past_key_values[i] if len(past_key_values) > 0 else None, + position_ids=position_ids, + past_key_value=past_key_value, use_cache=True, output_attentions=False, ) + hidden_states = layer_outputs[0] new_past_key_values.append(layer_outputs[1]) return hidden_states, new_past_key_values + def forward(self, input_ids, past_key_values=None): """ Forward pass through the model. From b7fd0c197bea29efdac2a707f64c09ee91a7f255 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 22:00:26 -0800 Subject: [PATCH 092/589] cleaning up code, fixing passing of tensors --- exo/inference/pytorch/inference.py | 39 +++++++++--------------------- exo/inference/pytorch/model/hf.py | 36 ++++++++++++++++++--------- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 25456d38a..2594c3ca1 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -39,19 +39,6 @@ async def infer_prompt( image_str: Optional[str] = None, inference_state: Optional[str] = None ) -> Tuple[np.ndarray, str, bool]: - """ - Perform inference based on a text prompt. - - Args: - request_id (str): Unique identifier for the request. - shard (Optional[Shard]): Shard information for the model. - prompt (str): The input text prompt for inference. - image_str (Optional[str]): Optional image string for multi-modal models. - inference_state (Optional[str]): The previous inference state. - - Returns: - Tuple[np.ndarray, str, bool]: The output data, new inference state, and end-of-sequence flag. - """ # Ensure the shard is loaded await self.ensure_shard(shard) @@ -63,10 +50,18 @@ async def infer_prompt( past_key_values = self._load_kv_cache(inference_state) # Prefill the model with tokens - start_pos = self.model.prefill(toks.squeeze()) + start_pos = self.model.prefill(toks[:-1]) + last_token = toks[-1] # Run the forward pass through the model layers - output_data, past_key_values = self.model.forward_layers(toks[:, -1:], past_key_values=past_key_values) + output_data, past_key_values = self.model.forward_layers( + start_pos, + torch.tensor( + last_token, + device=self.device + ), + past_key_values=past_key_values + ) # Save the past key values to the inference state new_inference_state = self._save_kv_cache(past_key_values) @@ -88,17 +83,6 @@ async def infer_tensor( shard: Shard, input_data: np.ndarray, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: - """ - Perform inference based on a tensor input. - - Args: - input_tensor (torch.Tensor): The input tensor for inference. - shard (Optional[Shard]): Shard information for the model. - past_key_values (Optional[list]): The previous inference state. - - Returns: - Tuple[torch.Tensor, list]: The output tensor and new inference state. - """ # Ensure the shard is loaded await self.ensure_shard(shard) @@ -106,7 +90,8 @@ async def infer_tensor( # Run the forward pass through the model layers output_data, past_key_values = self.model.forward_layers( - input_data, + start_pos, + torch.tensor(input_data), past_key_values=past_key_values ) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 10ddc7566..b0938a034 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -31,7 +31,7 @@ def __init__(self, shard: Shard): self.norm = self.full_model.model.norm self.lm_head = self.full_model.lm_head - def prefill(self, tokens: torch.tensor, start_pos: int=0) -> int: + def prefill(self, tokens: list[int], start_pos: int=0) -> int: """ Process the initial input tokens and set up the initial hidden states. """ @@ -39,7 +39,10 @@ def prefill(self, tokens: torch.tensor, start_pos: int=0) -> int: for token in tokens: # Convert token to a tensor and get embeddings token_tensor = torch.tensor([[token]], device=self.device) - inputs_embeds = self.embed_tokens(token_tensor) + + if self.shard.is_first_layer(): + token_tensor = self.embed_tokens(token_tensor) + if DEBUG >= 2: print(f"\nprefill shape: {inputs_embeds.shape}") # Debugging @@ -62,37 +65,48 @@ def prefill(self, tokens: torch.tensor, start_pos: int=0) -> int: return start_pos - def forward_layers(self, start_pos, input_ids, past_key_values=None): + def forward_layers( + self, + start_pos: int, + in_tensor: torch.tensor, + past_key_values=None + ) -> Tuple[any, list]: + """ Forward pass through the specified layers. """ if past_key_values is None: past_key_values = [None] * len(self.layers) - # Token embeddings - hidden_states = self.embed_tokens(input_ids) - # Initialize position_ids - position_ids = torch.arange(start_pos, start_pos + input_ids.size(1), dtype=torch.long, device=input_ids.device).unsqueeze(0) + position_ids = torch.arange( + start_pos, + start_pos + in_tensor.size(1), + dtype=torch.long, + device=in_tensor.device + ).unsqueeze(0) new_past_key_values = [] for i, layer in enumerate(self.layers): # Get past key value if available - past_key_value = past_key_values[i] if past_key_values and len(past_key_values) > 0 else None + if past_key_values and len(past_key_values) > 0: + past_key_value = past_key_values[i] + else: + past_key_value = None # Forward pass through the layer layer_outputs = layer( - hidden_states, + layer_out, position_ids=position_ids, past_key_value=past_key_value, use_cache=True, output_attentions=False, ) - hidden_states = layer_outputs[0] + layer_out = layer_outputs[0] new_past_key_values.append(layer_outputs[1]) - return hidden_states, new_past_key_values + return layer_out, new_past_key_values def forward(self, input_ids, past_key_values=None): From aba82da8d489d2be8bf3324ddc496defca241c5d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 22:09:01 -0800 Subject: [PATCH 093/589] cleaning up code, fixing passing of tensors --- exo/inference/pytorch/model/hf.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index b0938a034..819852e22 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -44,21 +44,10 @@ def prefill(self, tokens: list[int], start_pos: int=0) -> int: token_tensor = self.embed_tokens(token_tensor) if DEBUG >= 2: - print(f"\nprefill shape: {inputs_embeds.shape}") # Debugging + print(f"\ntoken_tensor shape: {token_tensor.shape}") # Prefill with tokens - position_ids = torch.arange(start_pos, start_pos + 1, dtype=torch.long, device=self.device).unsqueeze(0) - for layer in self.layers: - layer_outputs = layer( - inputs_embeds, - position_ids=position_ids, - use_cache=True, - output_attentions=False, - ) - inputs_embeds = layer_outputs[0] - - if DEBUG >= 2: - print(f"\nLayer output shape: {inputs_embeds.shape}") # Debugging + self.forward_layers(start_pos, token_tensor, None) # Increment start position start_pos += 1 From 46ae2c07e1696bc4266f5454a5685bb5d31ea65e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 22:16:42 -0800 Subject: [PATCH 094/589] cleaning up code, fixing passing of tensors --- exo/inference/pytorch/model/hf.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 819852e22..bb48b054f 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -69,13 +69,15 @@ def forward_layers( # Initialize position_ids position_ids = torch.arange( - start_pos, - start_pos + in_tensor.size(1), - dtype=torch.long, - device=in_tensor.device - ).unsqueeze(0) + start_pos, + start_pos + len(past_key_values), + dtype=torch.long, + device=in_tensor + ) + position_ids = position_ids.unsqueeze(0) new_past_key_values = [] + out_tensor = None for i, layer in enumerate(self.layers): # Get past key value if available if past_key_values and len(past_key_values) > 0: @@ -85,17 +87,17 @@ def forward_layers( # Forward pass through the layer layer_outputs = layer( - layer_out, + in_tensor if not out_tensor else out_tensor, position_ids=position_ids, past_key_value=past_key_value, use_cache=True, output_attentions=False, ) - layer_out = layer_outputs[0] + out_tensor = layer_outputs[0] new_past_key_values.append(layer_outputs[1]) - return layer_out, new_past_key_values + return out_tensor, new_past_key_values def forward(self, input_ids, past_key_values=None): From 5c5b6a40533828b5a06238f4b2121cd3012f7c4c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 22:23:04 -0800 Subject: [PATCH 095/589] cleaning up code, fixing passing of tensors --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index bb48b054f..e3c6ef959 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -72,7 +72,7 @@ def forward_layers( start_pos, start_pos + len(past_key_values), dtype=torch.long, - device=in_tensor + device=self.device ) position_ids = position_ids.unsqueeze(0) From 7ec1aefa8e99c6114123f519345541a644119194 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 22:32:42 -0800 Subject: [PATCH 096/589] cleaning up code, fixing passing of tensors --- exo/inference/pytorch/inference.py | 10 +++++----- exo/inference/pytorch/model/hf.py | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 2594c3ca1..2d10397f2 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -51,15 +51,15 @@ async def infer_prompt( # Prefill the model with tokens start_pos = self.model.prefill(toks[:-1]) - last_token = toks[-1] + last_token = torch.tensor( + toks[-1], + device=self.device + ) # Run the forward pass through the model layers output_data, past_key_values = self.model.forward_layers( start_pos, - torch.tensor( - last_token, - device=self.device - ), + last_token, past_key_values=past_key_values ) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index e3c6ef959..055dd2c87 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -22,6 +22,7 @@ def __init__(self, shard: Shard): ) # Extract only the layers for this shard + print(f"\nself.model: {self.model}\n") self.layers = torch.nn.ModuleList([ self.full_model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) ]) From cdc7efba7eb8d15e5b995f532049ea62f451d70a Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 22:33:52 -0800 Subject: [PATCH 097/589] cleaning up code, fixing passing of tensors --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 055dd2c87..6c6b8e621 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -22,7 +22,7 @@ def __init__(self, shard: Shard): ) # Extract only the layers for this shard - print(f"\nself.model: {self.model}\n") + print(f"\nself.model: {self.full_model.model}\n") self.layers = torch.nn.ModuleList([ self.full_model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) ]) From 6934d354769c8bfa5c2a66f3abc1af686471a179 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 22:41:10 -0800 Subject: [PATCH 098/589] cleaning up code, fixing passing of tensors --- exo/api/chatgpt_api.py | 2 +- exo/inference/pytorch/model/hf.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/exo/api/chatgpt_api.py b/exo/api/chatgpt_api.py index 9ed4a47a4..fb041b3ca 100644 --- a/exo/api/chatgpt_api.py +++ b/exo/api/chatgpt_api.py @@ -16,7 +16,7 @@ ### llama "llama-3.1-8b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32), - "PyTorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Meta-Llama-3.1-8B", start_layer=0, end_layer=0, n_layers=12), + "PyTorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Meta-Llama-3.1-8B", start_layer=0, end_layer=0, n_layers=32), }, "llama-3.1-70b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 6c6b8e621..8d2c2aad8 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -40,9 +40,7 @@ def prefill(self, tokens: list[int], start_pos: int=0) -> int: for token in tokens: # Convert token to a tensor and get embeddings token_tensor = torch.tensor([[token]], device=self.device) - - if self.shard.is_first_layer(): - token_tensor = self.embed_tokens(token_tensor) + token_tensor = self.embed_tokens(token_tensor) if DEBUG >= 2: print(f"\ntoken_tensor shape: {token_tensor.shape}") @@ -65,6 +63,10 @@ def forward_layers( """ Forward pass through the specified layers. """ + # embed in_tensor + in_tensor = self.embed_tokens(in_tensor) + + # check past key values if past_key_values is None: past_key_values = [None] * len(self.layers) From 5f87cb666f84fc448d92a9420b19ac3827591956 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 22:43:06 -0800 Subject: [PATCH 099/589] cleaning up code, fixing passing of tensors --- exo/inference/pytorch/model/hf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 8d2c2aad8..6c609cbc3 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -33,6 +33,7 @@ def __init__(self, shard: Shard): self.lm_head = self.full_model.lm_head def prefill(self, tokens: list[int], start_pos: int=0) -> int: + print(f"\nprefill called") """ Process the initial input tokens and set up the initial hidden states. """ @@ -64,7 +65,7 @@ def forward_layers( Forward pass through the specified layers. """ # embed in_tensor - in_tensor = self.embed_tokens(in_tensor) + # in_tensor = self.embed_tokens(in_tensor) # check past key values if past_key_values is None: From 8eae07d9d56747bd8c59a05dba854ea07d05a967 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 22:46:00 -0800 Subject: [PATCH 100/589] cleaning up code, fixing passing of tensors --- exo/inference/pytorch/model/hf.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 6c609cbc3..2ac676532 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -64,8 +64,11 @@ def forward_layers( """ Forward pass through the specified layers. """ - # embed in_tensor - # in_tensor = self.embed_tokens(in_tensor) + # embed tensor if first layer + if self.shard.is_first_layer(): + if DEBUG >= 2: + print(f"Embedding first layer in_tensor {in_tensor.shape()}") + in_tensor = self.embed_tokens(in_tensor) # check past key values if past_key_values is None: From f8f8e54de099f82d57244217b7abe665725ab9ab Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 22:46:58 -0800 Subject: [PATCH 101/589] cleaning up code, fixing passing of tensors --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 2ac676532..2c902e549 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -67,7 +67,7 @@ def forward_layers( # embed tensor if first layer if self.shard.is_first_layer(): if DEBUG >= 2: - print(f"Embedding first layer in_tensor {in_tensor.shape()}") + print(f"Embedding first layer in_tensor {in_tensor.shape}") in_tensor = self.embed_tokens(in_tensor) # check past key values From 189760a764efdf82e37e256efbba64c65e5cec8e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 22:49:44 -0800 Subject: [PATCH 102/589] cleaning up code, fixing passing of tensors --- exo/inference/pytorch/model/hf.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 2c902e549..e3f00611b 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -93,6 +93,9 @@ def forward_layers( past_key_value = None # Forward pass through the layer + if DEBUG >= 2: + print(f"pass tensor to layer[{i}] {layer}") + layer_outputs = layer( in_tensor if not out_tensor else out_tensor, position_ids=position_ids, @@ -105,16 +108,3 @@ def forward_layers( new_past_key_values.append(layer_outputs[1]) return out_tensor, new_past_key_values - - - def forward(self, input_ids, past_key_values=None): - """ - Forward pass through the model. - """ - hidden_states, new_past_key_values = self.forward_layers(input_ids, past_key_values) - hidden_states = self.norm(hidden_states) - logits = self.lm_head(hidden_states) - - if DEBUG >= 2: - print(f"\nLogits shape: {logits.shape}") # Debugging - return logits, new_past_key_values From 704da617e7f87e19c8b0dcb763cc2d7d5c2246e0 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 22:53:50 -0800 Subject: [PATCH 103/589] messing with layers --- exo/inference/pytorch/model/hf.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index e3f00611b..3508aed65 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -23,9 +23,14 @@ def __init__(self, shard: Shard): # Extract only the layers for this shard print(f"\nself.model: {self.full_model.model}\n") - self.layers = torch.nn.ModuleList([ - self.full_model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) - ]) + self.layers = [] + for i in range(shard.start_layer, shard.end_layer + 1): + if DEBUG >= 2: + print(f"layer[{i}]: {self.full_model.model.layers[i]}") + + self.layers.append(self.full_model.model.layers[i]) + + # self.layers = torch.nn.ModuleList(layer_list) # Embeddings and final layer norm self.embed_tokens = self.full_model.model.embed_tokens @@ -95,7 +100,7 @@ def forward_layers( # Forward pass through the layer if DEBUG >= 2: print(f"pass tensor to layer[{i}] {layer}") - + layer_outputs = layer( in_tensor if not out_tensor else out_tensor, position_ids=position_ids, From 9bad63f8ec92b5bd8b601d55c8b7105c8795cc58 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 23:02:01 -0800 Subject: [PATCH 104/589] messing with layers --- exo/inference/pytorch/model/hf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 3508aed65..006447a5a 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -23,6 +23,7 @@ def __init__(self, shard: Shard): # Extract only the layers for this shard print(f"\nself.model: {self.full_model.model}\n") + print(f"\nlayer amount: {len(self.full_model.model.layers)}") self.layers = [] for i in range(shard.start_layer, shard.end_layer + 1): if DEBUG >= 2: From 2ecb3b749953d87ca6b9557c2f40d760b0c2273d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 23:23:23 -0800 Subject: [PATCH 105/589] messing with layers --- exo/inference/pytorch/inference.py | 24 +++------- exo/inference/pytorch/model/hf.py | 77 ++++++++++++++---------------- 2 files changed, 42 insertions(+), 59 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 2d10397f2..d20755a1b 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -44,38 +44,26 @@ async def infer_prompt( await self.ensure_shard(shard) # Tokenize the prompt - toks = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device) + toks = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device) # Load the past key values from the inference state if available past_key_values = self._load_kv_cache(inference_state) - # Prefill the model with tokens - start_pos = self.model.prefill(toks[:-1]) - last_token = torch.tensor( - toks[-1], - device=self.device - ) - # Run the forward pass through the model layers output_data, past_key_values = self.model.forward_layers( - start_pos, - last_token, + input_ids=toks, past_key_values=past_key_values ) # Save the past key values to the inference state - new_inference_state = self._save_kv_cache(past_key_values) + self._save_kv_cache(past_key_values) - is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] + is_finished = False # Assuming a mechanism to determine if the sequence is finished if DEBUG >= 2: - print(f"Output data: {output_data}, new inference state: {new_inference_state}, finished: {is_finished}") + print(f"Output data: {output_data}, new inference state: {past_key_values}, finished: {is_finished}") - return ( - output_data, - json.dumps({"start_pos": start_pos}), - is_finished - ) + return output_data, "", is_finished async def infer_tensor( self, diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 006447a5a..bca8ce6f1 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -38,79 +38,74 @@ def __init__(self, shard: Shard): self.norm = self.full_model.model.norm self.lm_head = self.full_model.lm_head - def prefill(self, tokens: list[int], start_pos: int=0) -> int: - print(f"\nprefill called") - """ - Process the initial input tokens and set up the initial hidden states. - """ - # Assuming tokens is a 1D tensor of token IDs - for token in tokens: - # Convert token to a tensor and get embeddings - token_tensor = torch.tensor([[token]], device=self.device) - token_tensor = self.embed_tokens(token_tensor) + # def prefill(self, tokens: list[int], start_pos: int=0) -> int: + # print(f"\nprefill called") + # """ + # Process the initial input tokens and set up the initial hidden states. + # """ + # # Assuming tokens is a 1D tensor of token IDs + # for token in tokens: + # # Convert token to a tensor and get embeddings + # token_tensor = torch.tensor([[token]], device=self.device) + # token_tensor = self.embed_tokens(token_tensor) - if DEBUG >= 2: - print(f"\ntoken_tensor shape: {token_tensor.shape}") + # if DEBUG >= 2: + # print(f"\ntoken_tensor shape: {token_tensor.shape}") - # Prefill with tokens - self.forward_layers(start_pos, token_tensor, None) + # # Prefill with tokens + # self.forward_layers(start_pos, token_tensor, None) - # Increment start position - start_pos += 1 + # # Increment start position + # start_pos += 1 - return start_pos + # return start_pos def forward_layers( - self, - start_pos: int, - in_tensor: torch.tensor, - past_key_values=None - ) -> Tuple[any, list]: - + self, + input_ids: torch.tensor, + past_key_values=None + ) -> Tuple[any, list]: """ Forward pass through the specified layers. """ - # embed tensor if first layer + # Embed tensor if first layer if self.shard.is_first_layer(): if DEBUG >= 2: - print(f"Embedding first layer in_tensor {in_tensor.shape}") - in_tensor = self.embed_tokens(in_tensor) + print(f"Embedding first layer input_ids {input_ids.shape}") + hidden_states = self.embed_tokens(input_ids) + else: + hidden_states = input_ids - # check past key values + # Check past key values if past_key_values is None: past_key_values = [None] * len(self.layers) # Initialize position_ids position_ids = torch.arange( - start_pos, - start_pos + len(past_key_values), - dtype=torch.long, + hidden_states.size(1), + dtype=torch.long, device=self.device - ) - position_ids = position_ids.unsqueeze(0) + ).unsqueeze(0) new_past_key_values = [] - out_tensor = None for i, layer in enumerate(self.layers): # Get past key value if available - if past_key_values and len(past_key_values) > 0: - past_key_value = past_key_values[i] - else: - past_key_value = None + past_key_value = past_key_values[i] if past_key_values and len(past_key_values) > 0 else None # Forward pass through the layer if DEBUG >= 2: - print(f"pass tensor to layer[{i}] {layer}") + print(f"Pass tensor to layer[{i}] {layer}") layer_outputs = layer( - in_tensor if not out_tensor else out_tensor, + hidden_states, position_ids=position_ids, past_key_value=past_key_value, use_cache=True, output_attentions=False, ) - out_tensor = layer_outputs[0] + hidden_states = layer_outputs[0] new_past_key_values.append(layer_outputs[1]) - return out_tensor, new_past_key_values + return hidden_states, new_past_key_values + From d22439e988e758e17a8dc8af234f991d816bde0c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 23:27:44 -0800 Subject: [PATCH 106/589] fixing kv save --- exo/inference/pytorch/inference.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index d20755a1b..862da6ee0 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -120,7 +120,13 @@ def _save_kv_cache(self, past_key_values): """ if past_key_values is None: return [] - return [kv.cpu().tolist() for kv in past_key_values] + + new_cache = [] + for kv in past_key_values: + if kv: + new_cache.append(kv.cpu().tolist()) + + return new_cache async def ensure_shard(self, shard: Optional[Shard]): """ From 7c54803e9fd9f4938909a7c45ed96b9db78e5c73 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 23:45:13 -0800 Subject: [PATCH 107/589] fixing last layer issue --- exo/inference/pytorch/model/hf.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index bca8ce6f1..d13c57839 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -107,5 +107,10 @@ def forward_layers( hidden_states = layer_outputs[0] new_past_key_values.append(layer_outputs[1]) - return hidden_states, new_past_key_values + if self.shard.is_last_layer(): + hidden_states = self.norm(hidden_states) + logits = self.lm_head(hidden_states).float()[:, -1, :] + return logits + else: + return hidden_states, new_past_key_values From a2478897b3182efdbc0ace84d053526b848d97f1 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 7 Aug 2024 23:46:40 -0800 Subject: [PATCH 108/589] fixing last layer issue --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index d13c57839..b4a4c90a8 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -110,7 +110,7 @@ def forward_layers( if self.shard.is_last_layer(): hidden_states = self.norm(hidden_states) logits = self.lm_head(hidden_states).float()[:, -1, :] - return logits + return logits, new_past_key_values else: return hidden_states, new_past_key_values From d1ea73a95e8e40de055da6b78a6077b879b9b171 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 00:01:18 -0800 Subject: [PATCH 109/589] fixing last layer issue --- exo/inference/pytorch/model/hf.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index b4a4c90a8..acff97561 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -94,7 +94,7 @@ def forward_layers( # Forward pass through the layer if DEBUG >= 2: - print(f"Pass tensor to layer[{i}] {layer}") + print(f"\nPass tensor to layer[{i}] {layer}") layer_outputs = layer( hidden_states, @@ -103,6 +103,9 @@ def forward_layers( use_cache=True, output_attentions=False, ) + + if DEBUG >= 2: + print(f"\nlayer_outputs: {layer_outputs}") hidden_states = layer_outputs[0] new_past_key_values.append(layer_outputs[1]) From 3a36dfb8d490345bd665d1a7e6257530de57c38b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 00:07:15 -0800 Subject: [PATCH 110/589] fixing last layer issue --- exo/inference/pytorch/model/hf.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index acff97561..73d8aab89 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -99,9 +99,8 @@ def forward_layers( layer_outputs = layer( hidden_states, position_ids=position_ids, - past_key_value=past_key_value, - use_cache=True, - output_attentions=False, + # past_key_value=past_key_value, + use_cache=True ) if DEBUG >= 2: From 9596569a55ec793f388f9e72406d78374d67473b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 00:13:39 -0800 Subject: [PATCH 111/589] fixing last layer issue --- exo/inference/pytorch/model/hf.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 73d8aab89..63d7b5f9a 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -110,8 +110,7 @@ def forward_layers( new_past_key_values.append(layer_outputs[1]) if self.shard.is_last_layer(): - hidden_states = self.norm(hidden_states) - logits = self.lm_head(hidden_states).float()[:, -1, :] + _, logits, _, _, = self.full_model(hidden_states, position_ids=position_ids) return logits, new_past_key_values else: return hidden_states, new_past_key_values From 60aa203d9b8a49c7ccf376c46f21245525d483c2 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 20:27:47 -0800 Subject: [PATCH 112/589] removed new past key values from llama hf model, returning None from model LlamaDecoderLayer, updating infer output to numpy array --- exo/inference/pytorch/inference.py | 28 ++++++++++++++++------------ exo/inference/pytorch/model/hf.py | 22 ++++++++++++---------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 862da6ee0..15c7c82fb 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -1,6 +1,6 @@ # experimental, based off of tinygrad/inference.py -import json +import numpy as np import torch import numpy as np from typing import Optional, Callable, Tuple @@ -50,20 +50,25 @@ async def infer_prompt( past_key_values = self._load_kv_cache(inference_state) # Run the forward pass through the model layers - output_data, past_key_values = self.model.forward_layers( + # output_data, past_key_values + output_data = self.model.forward_layers( input_ids=toks, - past_key_values=past_key_values + # past_key_values=past_key_values ) # Save the past key values to the inference state self._save_kv_cache(past_key_values) - is_finished = False # Assuming a mechanism to determine if the sequence is finished + is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] if DEBUG >= 2: print(f"Output data: {output_data}, new inference state: {past_key_values}, finished: {is_finished}") - return output_data, "", is_finished + return ( + np.array(output_data), + "", + is_finished + ) async def infer_tensor( self, @@ -74,13 +79,12 @@ async def infer_tensor( # Ensure the shard is loaded await self.ensure_shard(shard) - start_pos = json.loads(inference_state).get("start_pos", 0) if inference_state else 0 # Run the forward pass through the model layers - output_data, past_key_values = self.model.forward_layers( - start_pos, - torch.tensor(input_data), - past_key_values=past_key_values + # output_data, past_key_values + output_data = self.model.forward_layers( + input_ids=torch.tensor(input_data), + # past_key_values=past_key_values ) is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] @@ -89,8 +93,8 @@ async def infer_tensor( print(f"Output data shape: {output_data.shape}") return ( - output_data, - json.dumps({"start_pos": start_pos}), + np.array(output_data), + "", is_finished ) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 63d7b5f9a..4309734c5 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -63,10 +63,12 @@ def __init__(self, shard: Shard): def forward_layers( self, input_ids: torch.tensor, - past_key_values=None - ) -> Tuple[any, list]: + #past_key_values: list + ) -> torch.tensor: #-> Tuple[torch.tensor, list]: """ Forward pass through the specified layers. + + Note: past_key_values not working for model, might be a library bug """ # Embed tensor if first layer if self.shard.is_first_layer(): @@ -77,8 +79,8 @@ def forward_layers( hidden_states = input_ids # Check past key values - if past_key_values is None: - past_key_values = [None] * len(self.layers) + # if past_key_values is None: + # past_key_values = [None] * len(self.layers) # Initialize position_ids position_ids = torch.arange( @@ -87,10 +89,10 @@ def forward_layers( device=self.device ).unsqueeze(0) - new_past_key_values = [] + #new_past_key_values = [] for i, layer in enumerate(self.layers): # Get past key value if available - past_key_value = past_key_values[i] if past_key_values and len(past_key_values) > 0 else None + # past_key_value = past_key_values[i] if past_key_values and len(past_key_values) > 0 else None # Forward pass through the layer if DEBUG >= 2: @@ -100,18 +102,18 @@ def forward_layers( hidden_states, position_ids=position_ids, # past_key_value=past_key_value, - use_cache=True + # use_cache=True ) if DEBUG >= 2: print(f"\nlayer_outputs: {layer_outputs}") hidden_states = layer_outputs[0] - new_past_key_values.append(layer_outputs[1]) + # new_past_key_values.append(layer_outputs[1]) if self.shard.is_last_layer(): _, logits, _, _, = self.full_model(hidden_states, position_ids=position_ids) - return logits, new_past_key_values + return logits #, new_past_key_values else: - return hidden_states, new_past_key_values + return hidden_states#, new_past_key_values From 217ad64792ef2a01199a807208c6ac8626ab1c40 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 20:32:50 -0800 Subject: [PATCH 113/589] fixing logits --- exo/inference/pytorch/model/hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 4309734c5..7e7c8671a 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -112,8 +112,8 @@ def forward_layers( # new_past_key_values.append(layer_outputs[1]) if self.shard.is_last_layer(): - _, logits, _, _, = self.full_model(hidden_states, position_ids=position_ids) - return logits #, new_past_key_values + logits = self.full_model.model.norm(hidden_states) + return logits.flatten() #, new_past_key_values else: return hidden_states#, new_past_key_values From 7974f23057230f23f2b1dd45d83b1148753c01b3 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 20:40:29 -0800 Subject: [PATCH 114/589] offloading tensor to numpy with cpu --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 15c7c82fb..c2e2faf9b 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -65,7 +65,7 @@ async def infer_prompt( print(f"Output data: {output_data}, new inference state: {past_key_values}, finished: {is_finished}") return ( - np.array(output_data), + output_data.cpu().numpy(), "", is_finished ) @@ -93,7 +93,7 @@ async def infer_tensor( print(f"Output data shape: {output_data.shape}") return ( - np.array(output_data), + output_data.cpu().numpy(), "", is_finished ) From 7ea9c5bc4e3420b059b3c757deef9937e1a1b8c2 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 20:44:23 -0800 Subject: [PATCH 115/589] offloading tensor to numpy with cpu --- exo/inference/pytorch/inference.py | 56 +++++++++++++++--------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index c2e2faf9b..40edb2760 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -65,7 +65,7 @@ async def infer_prompt( print(f"Output data: {output_data}, new inference state: {past_key_values}, finished: {is_finished}") return ( - output_data.cpu().numpy(), + output_data.detach().numpy(), "", is_finished ) @@ -93,44 +93,44 @@ async def infer_tensor( print(f"Output data shape: {output_data.shape}") return ( - output_data.cpu().numpy(), + output_data.detach().numpy(), "", is_finished ) - def _load_kv_cache(self, past_key_values_list): - """ - Load key-value cache from the inference state. + # def _load_kv_cache(self, past_key_values_list): + # """ + # Load key-value cache from the inference state. - Args: - past_key_values_list (list): List of past key-value tensors. + # Args: + # past_key_values_list (list): List of past key-value tensors. - Returns: - list: List of loaded past key-value tensors. - """ - if past_key_values_list is None: - return [] - return [torch.tensor(kv, device=self.device) for kv in past_key_values_list] + # Returns: + # list: List of loaded past key-value tensors. + # """ + # if past_key_values_list is None: + # return [] + # return [torch.tensor(kv, device=self.device) for kv in past_key_values_list] - def _save_kv_cache(self, past_key_values): - """ - Save key-value cache to the inference state. + # def _save_kv_cache(self, past_key_values): + # """ + # Save key-value cache to the inference state. - Args: - past_key_values (list): List of past key-value tensors. + # Args: + # past_key_values (list): List of past key-value tensors. - Returns: - list: List of key-value tensors in a format suitable for saving. - """ - if past_key_values is None: - return [] + # Returns: + # list: List of key-value tensors in a format suitable for saving. + # """ + # if past_key_values is None: + # return [] - new_cache = [] - for kv in past_key_values: - if kv: - new_cache.append(kv.cpu().tolist()) + # new_cache = [] + # for kv in past_key_values: + # if kv: + # new_cache.append(kv.cpu().tolist()) - return new_cache + # return new_cache async def ensure_shard(self, shard: Optional[Shard]): """ From 222612a9714e068924f0d32d87331cee853e7197 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 20:45:46 -0800 Subject: [PATCH 116/589] removing inference state and key value storage --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 40edb2760..0ca9f768d 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -47,7 +47,7 @@ async def infer_prompt( toks = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device) # Load the past key values from the inference state if available - past_key_values = self._load_kv_cache(inference_state) + # past_key_values = self._load_kv_cache(inference_state) # Run the forward pass through the model layers # output_data, past_key_values @@ -57,7 +57,7 @@ async def infer_prompt( ) # Save the past key values to the inference state - self._save_kv_cache(past_key_values) + # self._save_kv_cache(past_key_values) is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] From 8478ca38ea25d26c6330177a4ccdcacb62ee22cb Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 20:47:52 -0800 Subject: [PATCH 117/589] fixing debug error --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 0ca9f768d..a3e4e4a99 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -62,7 +62,7 @@ async def infer_prompt( is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] if DEBUG >= 2: - print(f"Output data: {output_data}, new inference state: {past_key_values}, finished: {is_finished}") + print(f"Output data: {output_data} finished: {is_finished}") return ( output_data.detach().numpy(), From c12b64b2a9eca971c9ef57b22de12280157488e8 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 20:49:30 -0800 Subject: [PATCH 118/589] offloading tensor bug --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index a3e4e4a99..501f3f3a0 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -65,7 +65,7 @@ async def infer_prompt( print(f"Output data: {output_data} finished: {is_finished}") return ( - output_data.detach().numpy(), + np.array(output_data.cpu()), "", is_finished ) @@ -93,7 +93,7 @@ async def infer_tensor( print(f"Output data shape: {output_data.shape}") return ( - output_data.detach().numpy(), + np.array(output_data.cpu()), "", is_finished ) From be8c7d41d7171b3a51fbed88a766e5fc8fe78aae Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 20:52:49 -0800 Subject: [PATCH 119/589] trying no_grad fix --- exo/inference/pytorch/inference.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 501f3f3a0..cf66242f2 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -64,9 +64,12 @@ async def infer_prompt( if DEBUG >= 2: print(f"Output data: {output_data} finished: {is_finished}") + with torch.no_grad(): + output_npa = np.array(output_data.cpu()) + return ( - np.array(output_data.cpu()), - "", + output_npa, + "", is_finished ) @@ -92,8 +95,12 @@ async def infer_tensor( if DEBUG >= 2: print(f"Output data shape: {output_data.shape}") + + with torch.no_grad(): + output_npa = np.array(output_data.cpu()) + return ( - np.array(output_data.cpu()), + output_npa, "", is_finished ) From 237ab341f72674fc1372617a5ce4d0bb3912cec5 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 20:55:36 -0800 Subject: [PATCH 120/589] fixing embed error --- exo/inference/pytorch/model/hf.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 7e7c8671a..240a8480f 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -62,7 +62,7 @@ def __init__(self, shard: Shard): def forward_layers( self, - input_ids: torch.tensor, + hidden_states: torch.tensor, #past_key_values: list ) -> torch.tensor: #-> Tuple[torch.tensor, list]: """ @@ -71,12 +71,12 @@ def forward_layers( Note: past_key_values not working for model, might be a library bug """ # Embed tensor if first layer - if self.shard.is_first_layer(): - if DEBUG >= 2: - print(f"Embedding first layer input_ids {input_ids.shape}") - hidden_states = self.embed_tokens(input_ids) - else: - hidden_states = input_ids + # if self.shard.is_first_layer(): + # if DEBUG >= 2: + # print(f"Embedding first layer input_ids {input_ids.shape}") + # hidden_states = self.embed_tokens(input_ids) + # else: + # hidden_states = input_ids # Check past key values # if past_key_values is None: From 015bd4c8590bc71e9a34bf1c31434b3cc98a9836 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 20:59:07 -0800 Subject: [PATCH 121/589] removing embed --- exo/inference/pytorch/model/hf.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 240a8480f..adb7d4803 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -73,10 +73,15 @@ def forward_layers( # Embed tensor if first layer # if self.shard.is_first_layer(): # if DEBUG >= 2: - # print(f"Embedding first layer input_ids {input_ids.shape}") - # hidden_states = self.embed_tokens(input_ids) + # print(f"Embedding first layer input_ids {hidden_states.shape}") + + # # flatten to 1d and turn to long + # if hidden_states.dim() > 1: + # hidden_states = hidden_states.view(-1) + # hidden_states = hidden_states.long() + # hidden_states = self.embed_tokens(hidden_states) # else: - # hidden_states = input_ids + # hidden_states = hidden_states # Check past key values # if past_key_values is None: From ace849893385f60e595d569b262c506054fb774c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 21:06:15 -0800 Subject: [PATCH 122/589] cleaning up tokenizer --- exo/inference/pytorch/inference.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index cf66242f2..fbe3d9754 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -44,7 +44,7 @@ async def infer_prompt( await self.ensure_shard(shard) # Tokenize the prompt - toks = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device) + toks = self.tokenizer.encode(prompt, return_tensors="pt").input_ids.to(self.device) # Load the past key values from the inference state if available # past_key_values = self._load_kv_cache(inference_state) @@ -52,7 +52,7 @@ async def infer_prompt( # Run the forward pass through the model layers # output_data, past_key_values output_data = self.model.forward_layers( - input_ids=toks, + torch.tensor(toks), # past_key_values=past_key_values ) @@ -86,7 +86,7 @@ async def infer_tensor( # Run the forward pass through the model layers # output_data, past_key_values output_data = self.model.forward_layers( - input_ids=torch.tensor(input_data), + torch.tensor(input_data), # past_key_values=past_key_values ) From 248c2168cd5f953ac1340c7d7537e33cd62e99af Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 21:07:40 -0800 Subject: [PATCH 123/589] cleaning up tokenizer --- exo/inference/pytorch/inference.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index fbe3d9754..05b40f7fc 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -44,7 +44,8 @@ async def infer_prompt( await self.ensure_shard(shard) # Tokenize the prompt - toks = self.tokenizer.encode(prompt, return_tensors="pt").input_ids.to(self.device) + toks = self.tokenizer.encode(prompt, return_tensors="pt") + #.input_ids.to(self.device) # Load the past key values from the inference state if available # past_key_values = self._load_kv_cache(inference_state) From c6ac9a3eca9c1082306f18b127b08c6da9bb81e9 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 21:11:53 -0800 Subject: [PATCH 124/589] cleaning up tokenizer --- exo/inference/pytorch/inference.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 05b40f7fc..a1937f833 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -44,8 +44,7 @@ async def infer_prompt( await self.ensure_shard(shard) # Tokenize the prompt - toks = self.tokenizer.encode(prompt, return_tensors="pt") - #.input_ids.to(self.device) + toks = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device) # Load the past key values from the inference state if available # past_key_values = self._load_kv_cache(inference_state) From 24cd9f1f6ff46ef598d9022162d4860a5087f47d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 21:19:46 -0800 Subject: [PATCH 125/589] cleaning up tokenizer --- exo/inference/pytorch/inference.py | 4 ++++ exo/inference/pytorch/model/hf.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index a1937f833..8083520e5 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -45,12 +45,16 @@ async def infer_prompt( # Tokenize the prompt toks = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device) + toks_tensor = torch.tensor(toks) # Load the past key values from the inference state if available # past_key_values = self._load_kv_cache(inference_state) # Run the forward pass through the model layers # output_data, past_key_values + if DEBUG >= 2: + print(f"toks: {toks}\ntoks_tensor: {toks_tensor}") + output_data = self.model.forward_layers( torch.tensor(toks), # past_key_values=past_key_values diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index adb7d4803..3b94cb29f 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -26,8 +26,8 @@ def __init__(self, shard: Shard): print(f"\nlayer amount: {len(self.full_model.model.layers)}") self.layers = [] for i in range(shard.start_layer, shard.end_layer + 1): - if DEBUG >= 2: - print(f"layer[{i}]: {self.full_model.model.layers[i]}") + # if DEBUG >= 2: + # print(f"loading layer[{i}]: {self.full_model.model.layers[i]}") self.layers.append(self.full_model.model.layers[i]) From a2b9d79d291e7ebbc85399babfa40c9562c33dc3 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 21:23:43 -0800 Subject: [PATCH 126/589] cleaning up tokenizer --- exo/inference/pytorch/inference.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 8083520e5..0219850c3 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -45,7 +45,7 @@ async def infer_prompt( # Tokenize the prompt toks = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device) - toks_tensor = torch.tensor(toks) + toks_embed = self.model.embed_tokens(torch.tensor(toks)) # Load the past key values from the inference state if available # past_key_values = self._load_kv_cache(inference_state) @@ -53,10 +53,10 @@ async def infer_prompt( # Run the forward pass through the model layers # output_data, past_key_values if DEBUG >= 2: - print(f"toks: {toks}\ntoks_tensor: {toks_tensor}") + print(f"toks: {toks}\ntoks_embed: {toks_embed}") output_data = self.model.forward_layers( - torch.tensor(toks), + toks_embed, # past_key_values=past_key_values ) From 935b1559f1f43c55e79acb91f123d2f19855d8bc Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 21:28:10 -0800 Subject: [PATCH 127/589] removing flattening and logit, just return hidden states --- exo/inference/pytorch/model/hf.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 3b94cb29f..7632d0925 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -116,9 +116,10 @@ def forward_layers( hidden_states = layer_outputs[0] # new_past_key_values.append(layer_outputs[1]) - if self.shard.is_last_layer(): - logits = self.full_model.model.norm(hidden_states) - return logits.flatten() #, new_past_key_values - else: - return hidden_states#, new_past_key_values + return hidden_states + # if self.shard.is_last_layer(): + # logits = self.full_model.model.norm(hidden_states) + # return logits.flatten() #, new_past_key_values + # else: + # return hidden_states#, new_past_key_values From 922fb40ffd72087313921f8c6ecc6c45f033920a Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 21:34:53 -0800 Subject: [PATCH 128/589] fix for last layer and stating when finished --- exo/inference/pytorch/inference.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 0219850c3..164130fd4 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -55,10 +55,14 @@ async def infer_prompt( if DEBUG >= 2: print(f"toks: {toks}\ntoks_embed: {toks_embed}") - output_data = self.model.forward_layers( - toks_embed, - # past_key_values=past_key_values - ) + if shard.is_last_layer(): + output_data = self.full_model.model.norm(toks_embed) + output_data = output_data.flatten() + else: + output_data = self.model.forward_layers( + toks_embed, + # past_key_values=past_key_values + ) # Save the past key values to the inference state # self._save_kv_cache(past_key_values) @@ -89,10 +93,15 @@ async def infer_tensor( # Run the forward pass through the model layers # output_data, past_key_values - output_data = self.model.forward_layers( - torch.tensor(input_data), - # past_key_values=past_key_values - ) + in_tensor = torch.tensor(input_data) + if shard.is_last_layer(): + output_data = self.full_model.model.norm(in_tensor) + output_data = output_data.flatten() + else: + output_data = self.model.forward_layers( + in_tensor, + # past_key_values=past_key_values + ) is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] From 5564f00d7500f1a11250508b1022219a23986111 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 21:43:20 -0800 Subject: [PATCH 129/589] making layer adjustments --- exo/inference/pytorch/inference.py | 10 +++++----- exo/inference/pytorch/model/hf.py | 20 ++++++++------------ 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 164130fd4..46d037b7e 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -44,8 +44,8 @@ async def infer_prompt( await self.ensure_shard(shard) # Tokenize the prompt - toks = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device) - toks_embed = self.model.embed_tokens(torch.tensor(toks)) + tokens = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device) + tokens_tensor = torch.tensor(tokens) # Load the past key values from the inference state if available # past_key_values = self._load_kv_cache(inference_state) @@ -53,14 +53,14 @@ async def infer_prompt( # Run the forward pass through the model layers # output_data, past_key_values if DEBUG >= 2: - print(f"toks: {toks}\ntoks_embed: {toks_embed}") + print(f"tokens: {tokens}\ntokens_tensor: {tokens_tensor}") if shard.is_last_layer(): - output_data = self.full_model.model.norm(toks_embed) + output_data = self.full_model.model.norm(tokens_tensor) output_data = output_data.flatten() else: output_data = self.model.forward_layers( - toks_embed, + tokens_tensor, # past_key_values=past_key_values ) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 7632d0925..dd12d0495 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -62,7 +62,7 @@ def __init__(self, shard: Shard): def forward_layers( self, - hidden_states: torch.tensor, + input_data: torch.tensor, #past_key_values: list ) -> torch.tensor: #-> Tuple[torch.tensor, list]: """ @@ -71,17 +71,12 @@ def forward_layers( Note: past_key_values not working for model, might be a library bug """ # Embed tensor if first layer - # if self.shard.is_first_layer(): - # if DEBUG >= 2: - # print(f"Embedding first layer input_ids {hidden_states.shape}") + if self.shard.is_first_layer(): + if DEBUG >= 2: + print(f"Embedding for first layer {input_data.shape}") - # # flatten to 1d and turn to long - # if hidden_states.dim() > 1: - # hidden_states = hidden_states.view(-1) - # hidden_states = hidden_states.long() - # hidden_states = self.embed_tokens(hidden_states) - # else: - # hidden_states = hidden_states + # flatten to 1d and turn to long + input_data = self.embed_tokens(input_data) # Check past key values # if past_key_values is None: @@ -89,12 +84,13 @@ def forward_layers( # Initialize position_ids position_ids = torch.arange( - hidden_states.size(1), + input_data.size(1), dtype=torch.long, device=self.device ).unsqueeze(0) #new_past_key_values = [] + hidden_states = input_data for i, layer in enumerate(self.layers): # Get past key value if available # past_key_value = past_key_values[i] if past_key_values and len(past_key_values) > 0 else None From d436641c2d3666eb370883f2a2732afbffb1af59 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 21:45:31 -0800 Subject: [PATCH 130/589] typo --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index dd12d0495..66f0a9c41 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -34,9 +34,9 @@ def __init__(self, shard: Shard): # self.layers = torch.nn.ModuleList(layer_list) # Embeddings and final layer norm + # used for doing what forward LlamaModel does in transformers self.embed_tokens = self.full_model.model.embed_tokens self.norm = self.full_model.model.norm - self.lm_head = self.full_model.lm_head # def prefill(self, tokens: list[int], start_pos: int=0) -> int: # print(f"\nprefill called") From 75e8e69c61f84b2d70f84f0c0460bddee35acb0e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 21:47:37 -0800 Subject: [PATCH 131/589] typo --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 46d037b7e..6aeedf80c 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -56,7 +56,7 @@ async def infer_prompt( print(f"tokens: {tokens}\ntokens_tensor: {tokens_tensor}") if shard.is_last_layer(): - output_data = self.full_model.model.norm(tokens_tensor) + output_data = self.model.norm(tokens_tensor) output_data = output_data.flatten() else: output_data = self.model.forward_layers( @@ -95,7 +95,7 @@ async def infer_tensor( # output_data, past_key_values in_tensor = torch.tensor(input_data) if shard.is_last_layer(): - output_data = self.full_model.model.norm(in_tensor) + output_data = self.model.norm(in_tensor) output_data = output_data.flatten() else: output_data = self.model.forward_layers( From 9e04930d3899f653b3d58fc2ff9f82804dfaeed0 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 21:59:33 -0800 Subject: [PATCH 132/589] debugging and embedding everything --- exo/inference/pytorch/inference.py | 7 +++---- exo/inference/pytorch/model/hf.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 6aeedf80c..0955ecbe4 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -45,7 +45,6 @@ async def infer_prompt( # Tokenize the prompt tokens = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device) - tokens_tensor = torch.tensor(tokens) # Load the past key values from the inference state if available # past_key_values = self._load_kv_cache(inference_state) @@ -53,14 +52,14 @@ async def infer_prompt( # Run the forward pass through the model layers # output_data, past_key_values if DEBUG >= 2: - print(f"tokens: {tokens}\ntokens_tensor: {tokens_tensor}") + print(f"tokens: {tokens}\n") if shard.is_last_layer(): - output_data = self.model.norm(tokens_tensor) + output_data = self.model.norm(tokens) output_data = output_data.flatten() else: output_data = self.model.forward_layers( - tokens_tensor, + tokens, # past_key_values=past_key_values ) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 66f0a9c41..bd3d2be7c 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -69,14 +69,14 @@ def forward_layers( Forward pass through the specified layers. Note: past_key_values not working for model, might be a library bug - """ - # Embed tensor if first layer - if self.shard.is_first_layer(): - if DEBUG >= 2: - print(f"Embedding for first layer {input_data.shape}") - - # flatten to 1d and turn to long - input_data = self.embed_tokens(input_data) + """ + if DEBUG >= 2: + print(f"forward_layer call\ninput_data: {input_data}") + + # flatten to 1d and turn to long + input_data = self.embed_tokens(input_data) + if DEBUG >= 2: + print(f"embedded input_data {input_data}") # Check past key values # if past_key_values is None: From d7bddc16bf77d4026a05d5e1f2602cbda80197fd Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 22:00:23 -0800 Subject: [PATCH 133/589] embed before norm --- exo/inference/pytorch/inference.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 0955ecbe4..73fc79338 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -55,7 +55,8 @@ async def infer_prompt( print(f"tokens: {tokens}\n") if shard.is_last_layer(): - output_data = self.model.norm(tokens) + tokens_embed = self.model.embed_tokens(tokens) + output_data = self.model.norm(tokens_embed) output_data = output_data.flatten() else: output_data = self.model.forward_layers( From f8df8d3702b540cd8af00bef32348d9f71776388 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 22:02:01 -0800 Subject: [PATCH 134/589] fixing last layer output --- exo/inference/pytorch/inference.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 73fc79338..37c9b8498 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -54,15 +54,16 @@ async def infer_prompt( if DEBUG >= 2: print(f"tokens: {tokens}\n") + + + output_data = self.model.forward_layers( + tokens, + # past_key_values=past_key_values + ) + if shard.is_last_layer(): - tokens_embed = self.model.embed_tokens(tokens) - output_data = self.model.norm(tokens_embed) + output_data = self.model.norm(output_data) output_data = output_data.flatten() - else: - output_data = self.model.forward_layers( - tokens, - # past_key_values=past_key_values - ) # Save the past key values to the inference state # self._save_kv_cache(past_key_values) From 9a511569b6bb1a83360c1c5968f12a29ea77ca53 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 22:06:37 -0800 Subject: [PATCH 135/589] checking layer outputs and inputs --- exo/inference/pytorch/model/hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index bd3d2be7c..db61b7203 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -97,7 +97,7 @@ def forward_layers( # Forward pass through the layer if DEBUG >= 2: - print(f"\nPass tensor to layer[{i}] {layer}") + print(f"\nhidden_states {hidden_states}") layer_outputs = layer( hidden_states, @@ -107,7 +107,7 @@ def forward_layers( ) if DEBUG >= 2: - print(f"\nlayer_outputs: {layer_outputs}") + print(f"\nlayer_outputs[0]: {layer_outputs[0]}") hidden_states = layer_outputs[0] # new_past_key_values.append(layer_outputs[1]) From 4514427257ffb291ccfdb438eadb0df049c6171a Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 22:08:16 -0800 Subject: [PATCH 136/589] reshaping for norm layer call at end --- exo/inference/pytorch/inference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 37c9b8498..51e07460e 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -62,6 +62,7 @@ async def infer_prompt( ) if shard.is_last_layer(): + output_data = output_data.view(1, -1, 4096) output_data = self.model.norm(output_data) output_data = output_data.flatten() From 989f29e8ca9e4ee1bceaf37253821b8f5cf2f49e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 22:11:19 -0800 Subject: [PATCH 137/589] reshaping for norm layer call at end --- exo/inference/pytorch/model/hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index db61b7203..45ee29c5c 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -97,7 +97,7 @@ def forward_layers( # Forward pass through the layer if DEBUG >= 2: - print(f"\nhidden_states {hidden_states}") + print(f"\n[layer {i}] hidden_states {hidden_states}") layer_outputs = layer( hidden_states, @@ -107,7 +107,7 @@ def forward_layers( ) if DEBUG >= 2: - print(f"\nlayer_outputs[0]: {layer_outputs[0]}") + print(f"\n[layer {i}] layer_outputs: {layer_outputs[0]}") hidden_states = layer_outputs[0] # new_past_key_values.append(layer_outputs[1]) From 2fe90e87401b90263c55dcc278729716da1b99b9 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 22:15:50 -0800 Subject: [PATCH 138/589] remove flatten --- exo/inference/pytorch/inference.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 51e07460e..51132946b 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -62,9 +62,9 @@ async def infer_prompt( ) if shard.is_last_layer(): - output_data = output_data.view(1, -1, 4096) + # output_data = output_data.view(1, -1, 4096) output_data = self.model.norm(output_data) - output_data = output_data.flatten() + # output_data = output_data.flatten() # Save the past key values to the inference state # self._save_kv_cache(past_key_values) @@ -98,7 +98,7 @@ async def infer_tensor( in_tensor = torch.tensor(input_data) if shard.is_last_layer(): output_data = self.model.norm(in_tensor) - output_data = output_data.flatten() + # output_data = output_data.flatten() else: output_data = self.model.forward_layers( in_tensor, From bc235f749133680e4b993da1b1d2d2a7ce617862 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 22:21:32 -0800 Subject: [PATCH 139/589] encode tensor to set eos_token_id --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 51132946b..ebf7dafd9 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -44,7 +44,7 @@ async def infer_prompt( await self.ensure_shard(shard) # Tokenize the prompt - tokens = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device) + tokens = torch.tensor(self.tokenizer.encode(prompt, return_tensors="pt").input_ids.to(self.device)) # Load the past key values from the inference state if available # past_key_values = self._load_kv_cache(inference_state) From c60d6186762e6461e781fc9c7f7d22cd5d82d9e8 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 22:25:02 -0800 Subject: [PATCH 140/589] enable eos_token --- exo/inference/pytorch/inference.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index ebf7dafd9..ac63ad097 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -168,7 +168,11 @@ async def ensure_shard(self, shard: Optional[Shard]): print(f"Loading new shard: {shard}") self.model = ShardedHuggingFaceModel(shard) - self.tokenizer = AutoTokenizer.from_pretrained(shard.model_id) + self.tokenizer = AutoTokenizer.from_pretrained( + shard.model_id, + add_eos_token=True, + use_fast=True + ) self.shard = shard if DEBUG >= 2: From e96d146da15f20176f09a76a5e67341f05899c7a Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 8 Aug 2024 22:26:54 -0800 Subject: [PATCH 141/589] token fix --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index ac63ad097..e1fc791a7 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -44,7 +44,7 @@ async def infer_prompt( await self.ensure_shard(shard) # Tokenize the prompt - tokens = torch.tensor(self.tokenizer.encode(prompt, return_tensors="pt").input_ids.to(self.device)) + tokens = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device) # Load the past key values from the inference state if available # past_key_values = self._load_kv_cache(inference_state) From 8feae93b261a3d498681eb87d0c96cd5835f187b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 14:45:29 -0800 Subject: [PATCH 142/589] adding more logging to fix infinite infer_tensor issue --- exo/inference/pytorch/inference.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index e1fc791a7..517a22fd0 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -73,6 +73,7 @@ async def infer_prompt( if DEBUG >= 2: print(f"Output data: {output_data} finished: {is_finished}") + print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") with torch.no_grad(): output_npa = np.array(output_data.cpu()) @@ -89,13 +90,18 @@ async def infer_tensor( shard: Shard, input_data: np.ndarray, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + + in_tensor = torch.tensor(input_data) + if DEBUG >= 2: + print(f"input_data: {input_data}\n") + print(f"in_tensor: {in_tensor}\n") # Ensure the shard is loaded await self.ensure_shard(shard) # Run the forward pass through the model layers # output_data, past_key_values - in_tensor = torch.tensor(input_data) + if shard.is_last_layer(): output_data = self.model.norm(in_tensor) # output_data = output_data.flatten() @@ -108,7 +114,8 @@ async def infer_tensor( is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] if DEBUG >= 2: - print(f"Output data shape: {output_data.shape}") + print(f"Output data: {output_data} finished: {is_finished}") + print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") with torch.no_grad(): From 8922e0a460b4a04019f2d1259db83b34c1276b23 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 14:52:12 -0800 Subject: [PATCH 143/589] testing if just needing to see if layer is normalized it will be finished --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 517a22fd0..d5a5d8ae8 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -69,7 +69,7 @@ async def infer_prompt( # Save the past key values to the inference state # self._save_kv_cache(past_key_values) - is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] + is_finished = output_data.size == 1 if DEBUG >= 2: print(f"Output data: {output_data} finished: {is_finished}") @@ -111,7 +111,7 @@ async def infer_tensor( # past_key_values=past_key_values ) - is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] + is_finished = output_data.size == 1 if DEBUG >= 2: print(f"Output data: {output_data} finished: {is_finished}") From 65728857cc0165631b5195d8fdb1ab03f2ea6f4f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 14:59:13 -0800 Subject: [PATCH 144/589] checking normalization, adding last layer check to forward --- exo/inference/pytorch/inference.py | 18 ++++-------------- exo/inference/pytorch/model/hf.py | 9 ++++++++- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index d5a5d8ae8..b06ac2164 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -61,11 +61,6 @@ async def infer_prompt( # past_key_values=past_key_values ) - if shard.is_last_layer(): - # output_data = output_data.view(1, -1, 4096) - output_data = self.model.norm(output_data) - # output_data = output_data.flatten() - # Save the past key values to the inference state # self._save_kv_cache(past_key_values) @@ -102,20 +97,15 @@ async def infer_tensor( # Run the forward pass through the model layers # output_data, past_key_values - if shard.is_last_layer(): - output_data = self.model.norm(in_tensor) - # output_data = output_data.flatten() - else: - output_data = self.model.forward_layers( - in_tensor, - # past_key_values=past_key_values - ) + output_data = self.model.forward_layers( + in_tensor, + # past_key_values=past_key_values + ) is_finished = output_data.size == 1 if DEBUG >= 2: print(f"Output data: {output_data} finished: {is_finished}") - print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") with torch.no_grad(): diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 45ee29c5c..a412cb020 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -110,7 +110,14 @@ def forward_layers( print(f"\n[layer {i}] layer_outputs: {layer_outputs[0]}") hidden_states = layer_outputs[0] - # new_past_key_values.append(layer_outputs[1]) + + if DEBUG >= 2: + print(f"is last layer? {self.shard.is_last_layer}") + print(f"layer count {self.shard.get_layer_count()}") + + if self.shard.is_last_layer(): + # output_data = output_data.view(1, -1, 4096) + output_data = self.model.norm(hidden_states) return hidden_states # if self.shard.is_last_layer(): From 35b79d2db031a967a62fa0a4d17d991d15bec24c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 15:02:58 -0800 Subject: [PATCH 145/589] fixing norm utilization, adding logging --- exo/inference/pytorch/model/hf.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index a412cb020..0c7b9c11b 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -72,8 +72,10 @@ def forward_layers( """ if DEBUG >= 2: print(f"forward_layer call\ninput_data: {input_data}") + print(f"1 is last layer? {self.shard.is_last_layer}") + print(f"1 layer count {self.shard.get_layer_count()}") - # flatten to 1d and turn to long + # embed data input_data = self.embed_tokens(input_data) if DEBUG >= 2: print(f"embedded input_data {input_data}") @@ -112,12 +114,12 @@ def forward_layers( hidden_states = layer_outputs[0] if DEBUG >= 2: - print(f"is last layer? {self.shard.is_last_layer}") - print(f"layer count {self.shard.get_layer_count()}") + print(f"2 is last layer? {self.shard.is_last_layer}") + print(f"2 layer count {self.shard.get_layer_count()}") if self.shard.is_last_layer(): # output_data = output_data.view(1, -1, 4096) - output_data = self.model.norm(hidden_states) + return self.norm(hidden_states) return hidden_states # if self.shard.is_last_layer(): From ee8b76b6b846312d285f24380ae452d33cb41cfd Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 15:08:31 -0800 Subject: [PATCH 146/589] adding logging, checking embed error --- exo/inference/pytorch/inference.py | 2 ++ exo/inference/pytorch/model/hf.py | 7 +++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index b06ac2164..767dbacea 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -67,6 +67,7 @@ async def infer_prompt( is_finished = output_data.size == 1 if DEBUG >= 2: + print("infer_prompt called") print(f"Output data: {output_data} finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") @@ -88,6 +89,7 @@ async def infer_tensor( in_tensor = torch.tensor(input_data) if DEBUG >= 2: + print("infer_tensor called") print(f"input_data: {input_data}\n") print(f"in_tensor: {in_tensor}\n") diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 0c7b9c11b..e2cbf43cb 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -72,8 +72,7 @@ def forward_layers( """ if DEBUG >= 2: print(f"forward_layer call\ninput_data: {input_data}") - print(f"1 is last layer? {self.shard.is_last_layer}") - print(f"1 layer count {self.shard.get_layer_count()}") + print(f"1 shard {self.shard.to_dict()}") # embed data input_data = self.embed_tokens(input_data) @@ -114,8 +113,8 @@ def forward_layers( hidden_states = layer_outputs[0] if DEBUG >= 2: - print(f"2 is last layer? {self.shard.is_last_layer}") - print(f"2 layer count {self.shard.get_layer_count()}") + print(f"2 is last layer? {self.shard.is_last_layer()}") + print(f"2 shard {self.shard.to_dict()}") if self.shard.is_last_layer(): # output_data = output_data.view(1, -1, 4096) From 310e607e67dd0329947cf8ede2e59d31524cf4a6 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 15:10:42 -0800 Subject: [PATCH 147/589] making it so embed only happens with first layer --- exo/inference/pytorch/model/hf.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index e2cbf43cb..50c9dd262 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -71,13 +71,16 @@ def forward_layers( Note: past_key_values not working for model, might be a library bug """ if DEBUG >= 2: - print(f"forward_layer call\ninput_data: {input_data}") + print("forward_layer call") + print(f"input_data: {input_data}") print(f"1 shard {self.shard.to_dict()}") # embed data - input_data = self.embed_tokens(input_data) - if DEBUG >= 2: - print(f"embedded input_data {input_data}") + if self.shard.is_first_layer(): + input_data = self.embed_tokens(input_data) + + if DEBUG >= 2: + print(f"embedded input_data {input_data}") # Check past key values # if past_key_values is None: From d7a77e9f5e6632ab98027a7c9c380073af5e895e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 15:13:52 -0800 Subject: [PATCH 148/589] putting in layer loop --- exo/inference/pytorch/model/hf.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 50c9dd262..b2a34d6a7 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -75,13 +75,6 @@ def forward_layers( print(f"input_data: {input_data}") print(f"1 shard {self.shard.to_dict()}") - # embed data - if self.shard.is_first_layer(): - input_data = self.embed_tokens(input_data) - - if DEBUG >= 2: - print(f"embedded input_data {input_data}") - # Check past key values # if past_key_values is None: # past_key_values = [None] * len(self.layers) @@ -98,6 +91,12 @@ def forward_layers( for i, layer in enumerate(self.layers): # Get past key value if available # past_key_value = past_key_values[i] if past_key_values and len(past_key_values) > 0 else None + + # embed only at first layer + if i == self.shard.start_layer: + input_data = self.embed_tokens(input_data) + if DEBUG >= 2: + print(f"embedded input_data {input_data}") # Forward pass through the layer if DEBUG >= 2: From ba4f9653494d6cdaf4afc4fc0e624d9520751f74 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 15:16:06 -0800 Subject: [PATCH 149/589] putting in layer loop --- exo/inference/pytorch/model/hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index b2a34d6a7..220c1db03 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -94,9 +94,9 @@ def forward_layers( # embed only at first layer if i == self.shard.start_layer: - input_data = self.embed_tokens(input_data) + hidden_states = self.embed_tokens(hidden_states) if DEBUG >= 2: - print(f"embedded input_data {input_data}") + print(f"embedded hidden_states {hidden_states}") # Forward pass through the layer if DEBUG >= 2: From 8fe37002d0a2c4518a3355785d3f361518359967 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 15:19:08 -0800 Subject: [PATCH 150/589] putting in layer loop --- exo/inference/pytorch/model/hf.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 220c1db03..c3c519941 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -89,19 +89,20 @@ def forward_layers( #new_past_key_values = [] hidden_states = input_data for i, layer in enumerate(self.layers): + # Forward pass through the layer + if DEBUG >= 2: + print(f"\n[layer {i}] {layer}") + print(f"hidden_states {hidden_states}") + # Get past key value if available # past_key_value = past_key_values[i] if past_key_values and len(past_key_values) > 0 else None # embed only at first layer - if i == self.shard.start_layer: + if i == 0: hidden_states = self.embed_tokens(hidden_states) if DEBUG >= 2: print(f"embedded hidden_states {hidden_states}") - # Forward pass through the layer - if DEBUG >= 2: - print(f"\n[layer {i}] hidden_states {hidden_states}") - layer_outputs = layer( hidden_states, position_ids=position_ids, From 5ecda20ef7dd27fef9d2c007d2b75f6c6fa2becb Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 15:23:13 -0800 Subject: [PATCH 151/589] tensor bug --- exo/inference/pytorch/model/hf.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index c3c519941..8e8c2c58b 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -98,10 +98,9 @@ def forward_layers( # past_key_value = past_key_values[i] if past_key_values and len(past_key_values) > 0 else None # embed only at first layer - if i == 0: - hidden_states = self.embed_tokens(hidden_states) - if DEBUG >= 2: - print(f"embedded hidden_states {hidden_states}") + hidden_states = self.embed_tokens(hidden_states) + if DEBUG >= 2: + print(f"embedded hidden_states {hidden_states}") layer_outputs = layer( hidden_states, From 57843f1220154e458855e035cc7ca127aa01ab51 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 15:26:48 -0800 Subject: [PATCH 152/589] adding param to forward layer to check where infer is coming from --- exo/inference/pytorch/inference.py | 2 ++ exo/inference/pytorch/model/hf.py | 13 +++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 767dbacea..50cce140b 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -58,6 +58,7 @@ async def infer_prompt( output_data = self.model.forward_layers( tokens, + "prompt" # past_key_values=past_key_values ) @@ -101,6 +102,7 @@ async def infer_tensor( output_data = self.model.forward_layers( in_tensor, + "tensor" # past_key_values=past_key_values ) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 8e8c2c58b..e2925594e 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -63,6 +63,7 @@ def __init__(self, shard: Shard): def forward_layers( self, input_data: torch.tensor, + infer_from: str #past_key_values: list ) -> torch.tensor: #-> Tuple[torch.tensor, list]: """ @@ -97,10 +98,14 @@ def forward_layers( # Get past key value if available # past_key_value = past_key_values[i] if past_key_values and len(past_key_values) > 0 else None - # embed only at first layer - hidden_states = self.embed_tokens(hidden_states) - if DEBUG >= 2: - print(f"embedded hidden_states {hidden_states}") + # embed only at first layer and infer prompt + if i == 0 and infer_from == "prompt": + if DEBUG >= 2: + print("first layer and infer_prompt") + + hidden_states = self.embed_tokens(hidden_states) + if DEBUG >= 2: + print(f"embedded hidden_states {hidden_states}") layer_outputs = layer( hidden_states, From 9d52ed7714ae61c1fd0fafb723f66a0bed1316cb Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 15:33:10 -0800 Subject: [PATCH 153/589] infinity processing loop bug --- exo/inference/pytorch/model/hf.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index e2925594e..5627f1cb8 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -99,7 +99,7 @@ def forward_layers( # past_key_value = past_key_values[i] if past_key_values and len(past_key_values) > 0 else None # embed only at first layer and infer prompt - if i == 0 and infer_from == "prompt": + if self.shard.start_layer == i and infer_from == "prompt": if DEBUG >= 2: print("first layer and infer_prompt") @@ -120,11 +120,9 @@ def forward_layers( hidden_states = layer_outputs[0] if DEBUG >= 2: - print(f"2 is last layer? {self.shard.is_last_layer()}") print(f"2 shard {self.shard.to_dict()}") - if self.shard.is_last_layer(): - # output_data = output_data.view(1, -1, 4096) + if i == self.shard.end_layer: return self.norm(hidden_states) return hidden_states From 1eca0e63ca8332e8db517d7d92c85c9dae0038a7 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 15:42:07 -0800 Subject: [PATCH 154/589] infinity processing loop bug --- exo/inference/pytorch/model/hf.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 5627f1cb8..02e6a7f9d 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -123,7 +123,9 @@ def forward_layers( print(f"2 shard {self.shard.to_dict()}") if i == self.shard.end_layer: - return self.norm(hidden_states) + print(f"last layer, normalize hidden states") + hs_norm = self.norm(hidden_states) + return hs_norm return hidden_states # if self.shard.is_last_layer(): From a17bd55cad7faa0ceb51c2b6aa1bdca30e209cd0 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 15:51:06 -0800 Subject: [PATCH 155/589] trying to flatten --- exo/inference/pytorch/model/hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 02e6a7f9d..cb87d01bf 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -65,7 +65,7 @@ def forward_layers( input_data: torch.tensor, infer_from: str #past_key_values: list - ) -> torch.tensor: #-> Tuple[torch.tensor, list]: + ) -> any: #-> Tuple[torch.tensor, list]: """ Forward pass through the specified layers. @@ -125,7 +125,7 @@ def forward_layers( if i == self.shard.end_layer: print(f"last layer, normalize hidden states") hs_norm = self.norm(hidden_states) - return hs_norm + return hs_norm.flatten() return hidden_states # if self.shard.is_last_layer(): From 3085dbcb435422b4e82151ddf9d557f5a7507a17 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 15:54:45 -0800 Subject: [PATCH 156/589] trying to flatten --- exo/inference/pytorch/inference.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 50cce140b..1f3d80878 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -69,7 +69,8 @@ async def infer_prompt( if DEBUG >= 2: print("infer_prompt called") - print(f"Output data: {output_data} finished: {is_finished}") + print(f"Output data: {output_data} output_data.size: {output_data.size}") + print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") with torch.no_grad(): From ccea4d216d6fc18f4dfec35d5b3e79164856a0f1 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 15:56:16 -0800 Subject: [PATCH 157/589] testing size fix --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 1f3d80878..f046929e0 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -65,11 +65,11 @@ async def infer_prompt( # Save the past key values to the inference state # self._save_kv_cache(past_key_values) - is_finished = output_data.size == 1 + is_finished = output_data.size() == 1 if DEBUG >= 2: print("infer_prompt called") - print(f"Output data: {output_data} output_data.size: {output_data.size}") + print(f"Output data: {output_data} output_data.size: {output_data.size()}") print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") From 36ecb1ec3a15667101848465ea493732c203fdce Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 15:58:47 -0800 Subject: [PATCH 158/589] testing size fix --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index f046929e0..8083f3460 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -65,7 +65,7 @@ async def infer_prompt( # Save the past key values to the inference state # self._save_kv_cache(past_key_values) - is_finished = output_data.size() == 1 + is_finished = len(output_data.size()) == 1 if DEBUG >= 2: print("infer_prompt called") From b928d3e648d9fc57425358542982cd03d3850d0b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 16:01:41 -0800 Subject: [PATCH 159/589] testing items fix --- exo/inference/pytorch/inference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 8083f3460..57a4cf8c0 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -70,6 +70,7 @@ async def infer_prompt( if DEBUG >= 2: print("infer_prompt called") print(f"Output data: {output_data} output_data.size: {output_data.size()}") + print(f"output_data {output_data.items()}") print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") From 06b35b957c684466f75aede2aca17321f22e4363 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 16:03:35 -0800 Subject: [PATCH 160/589] testing items fix --- exo/inference/pytorch/inference.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 57a4cf8c0..1dbdb9fac 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -62,6 +62,9 @@ async def infer_prompt( # past_key_values=past_key_values ) + with torch.no_grad(): + output_npa = np.array(output_data.cpu()) + # Save the past key values to the inference state # self._save_kv_cache(past_key_values) @@ -70,13 +73,10 @@ async def infer_prompt( if DEBUG >= 2: print("infer_prompt called") print(f"Output data: {output_data} output_data.size: {output_data.size()}") - print(f"output_data {output_data.items()}") + print(f"output_data {output_npa.items()}") print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") - with torch.no_grad(): - output_npa = np.array(output_data.cpu()) - return ( output_npa, "", From 3a0bdba5dcb8381e55020c65853fb66f5b5cc8b2 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 16:05:03 -0800 Subject: [PATCH 161/589] testing items fix --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 1dbdb9fac..396f62b80 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -73,7 +73,7 @@ async def infer_prompt( if DEBUG >= 2: print("infer_prompt called") print(f"Output data: {output_data} output_data.size: {output_data.size()}") - print(f"output_data {output_npa.items()}") + print(f"output_data {output_npa.item()}") print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") From f58566e1a810ab5feca656b03ee1230bad72ec87 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 16:06:28 -0800 Subject: [PATCH 162/589] testing items fix --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 396f62b80..9d7cf7aa3 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -73,7 +73,7 @@ async def infer_prompt( if DEBUG >= 2: print("infer_prompt called") print(f"Output data: {output_data} output_data.size: {output_data.size()}") - print(f"output_data {output_npa.item()}") + print(f"output_npa {output_npa}") print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") From ae576269763a491a0b9cbd6f1a31add30bf5c62b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 16:09:40 -0800 Subject: [PATCH 163/589] testing items fix --- exo/inference/pytorch/inference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 9d7cf7aa3..eba51c8a0 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -73,6 +73,7 @@ async def infer_prompt( if DEBUG >= 2: print("infer_prompt called") print(f"Output data: {output_data} output_data.size: {output_data.size()}") + print(f"output_data {output_data.item()}") print(f"output_npa {output_npa}") print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") From a6cdd6bcf593ee523836f476bac4b9d3de46031e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 16:14:04 -0800 Subject: [PATCH 164/589] testing items fix --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index eba51c8a0..1ca2131e4 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -73,7 +73,7 @@ async def infer_prompt( if DEBUG >= 2: print("infer_prompt called") print(f"Output data: {output_data} output_data.size: {output_data.size()}") - print(f"output_data {output_data.item()}") + print(f"output_data {output_data.squeeze().item()}") print(f"output_npa {output_npa}") print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") From 749c9547a6f0efa0467b0b2a8da725761574284d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 16:20:04 -0800 Subject: [PATCH 165/589] testing items fix --- exo/inference/pytorch/model/hf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index cb87d01bf..4d5a165a5 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -75,6 +75,7 @@ def forward_layers( print("forward_layer call") print(f"input_data: {input_data}") print(f"1 shard {self.shard.to_dict()}") + print(f"1 is_last_layer {self.shard.is_first_layer()}") # Check past key values # if past_key_values is None: From c8e59a7a2b574ac8c0410d174529b916568b0363 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 16:22:47 -0800 Subject: [PATCH 166/589] testing items fix --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 4d5a165a5..0be2e267b 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -75,7 +75,6 @@ def forward_layers( print("forward_layer call") print(f"input_data: {input_data}") print(f"1 shard {self.shard.to_dict()}") - print(f"1 is_last_layer {self.shard.is_first_layer()}") # Check past key values # if past_key_values is None: @@ -128,6 +127,7 @@ def forward_layers( hs_norm = self.norm(hidden_states) return hs_norm.flatten() + print(f"1 is_last_layer {self.shard.is_last_layer()}") return hidden_states # if self.shard.is_last_layer(): # logits = self.full_model.model.norm(hidden_states) From 1d68267e7ca61566f083f395c479d4e2e216e767 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 16:24:44 -0800 Subject: [PATCH 167/589] testing items fix --- exo/inference/pytorch/model/hf.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 0be2e267b..865da29bd 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -119,15 +119,12 @@ def forward_layers( hidden_states = layer_outputs[0] - if DEBUG >= 2: - print(f"2 shard {self.shard.to_dict()}") - - if i == self.shard.end_layer: - print(f"last layer, normalize hidden states") - hs_norm = self.norm(hidden_states) - return hs_norm.flatten() + # if i == self.shard.end_layer: + # print(f"last layer, normalize hidden states") + # hs_norm = self.norm(hidden_states) + # return hs_norm.flatten() - print(f"1 is_last_layer {self.shard.is_last_layer()}") + print(f"2 is_last_layer {self.shard.is_last_layer()}") return hidden_states # if self.shard.is_last_layer(): # logits = self.full_model.model.norm(hidden_states) From 026bbd2f97f15d3cb1323c8b54beacd0410026bc Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 16:28:13 -0800 Subject: [PATCH 168/589] testing items fix --- exo/inference/pytorch/model/hf.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 865da29bd..fc2ef8946 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -89,6 +89,12 @@ def forward_layers( #new_past_key_values = [] hidden_states = input_data + + if self.shard.is_first_layer(): + hidden_states = self.embed_tokens(hidden_states) + if DEBUG >= 2: + print(f"embedded hidden_states {hidden_states}") + for i, layer in enumerate(self.layers): # Forward pass through the layer if DEBUG >= 2: @@ -99,13 +105,13 @@ def forward_layers( # past_key_value = past_key_values[i] if past_key_values and len(past_key_values) > 0 else None # embed only at first layer and infer prompt - if self.shard.start_layer == i and infer_from == "prompt": - if DEBUG >= 2: - print("first layer and infer_prompt") + # if self.shard.start_layer == i and infer_from == "prompt": + # if DEBUG >= 2: + # print("first layer and infer_prompt") - hidden_states = self.embed_tokens(hidden_states) - if DEBUG >= 2: - print(f"embedded hidden_states {hidden_states}") + # hidden_states = self.embed_tokens(hidden_states) + # if DEBUG >= 2: + # print(f"embedded hidden_states {hidden_states}") layer_outputs = layer( hidden_states, @@ -119,12 +125,11 @@ def forward_layers( hidden_states = layer_outputs[0] - # if i == self.shard.end_layer: - # print(f"last layer, normalize hidden states") - # hs_norm = self.norm(hidden_states) - # return hs_norm.flatten() - print(f"2 is_last_layer {self.shard.is_last_layer()}") + if self.shard.is_last_layer(): + hs_norm = self.norm(hidden_states) + return hs_norm.flatten() + return hidden_states # if self.shard.is_last_layer(): # logits = self.full_model.model.norm(hidden_states) From da79891623901646e8d37d547ea80e9c6343e116 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 16:31:56 -0800 Subject: [PATCH 169/589] testing items fix --- exo/inference/pytorch/inference.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 1ca2131e4..5e1cad067 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -70,13 +70,13 @@ async def infer_prompt( is_finished = len(output_data.size()) == 1 - if DEBUG >= 2: - print("infer_prompt called") - print(f"Output data: {output_data} output_data.size: {output_data.size()}") - print(f"output_data {output_data.squeeze().item()}") - print(f"output_npa {output_npa}") - print(f"finished: {is_finished}") - print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") + # if DEBUG >= 2: + # print("infer_prompt called") + # print(f"Output data: {output_data} output_data.size: {output_data.size()}") + # print(f"output_data {output_data.squeeze().item()}") + # print(f"output_npa {output_npa}") + # print(f"finished: {is_finished}") + # print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") return ( output_npa, From 6370f65d4f30c21dc52af61c64dabe8be451d3ad Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 16:35:36 -0800 Subject: [PATCH 170/589] testing items fix --- exo/inference/pytorch/inference.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 5e1cad067..6e8536ef7 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -39,6 +39,8 @@ async def infer_prompt( image_str: Optional[str] = None, inference_state: Optional[str] = None ) -> Tuple[np.ndarray, str, bool]: + if DEBUG >= 2: + print("infer_prompt called") # Ensure the shard is loaded await self.ensure_shard(shard) @@ -65,6 +67,9 @@ async def infer_prompt( with torch.no_grad(): output_npa = np.array(output_data.cpu()) + if DEBUG >= 2: + print(f"output_npa.size: {output_npa.size}") + # Save the past key values to the inference state # self._save_kv_cache(past_key_values) From e8411aeafc4815e15bf8933c403c1dd69a9dbc3d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 16:39:59 -0800 Subject: [PATCH 171/589] sending norm to lm_head and output to float --- exo/inference/pytorch/model/hf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index fc2ef8946..3161bc691 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -128,7 +128,8 @@ def forward_layers( print(f"2 is_last_layer {self.shard.is_last_layer()}") if self.shard.is_last_layer(): hs_norm = self.norm(hidden_states) - return hs_norm.flatten() + hs_lm_head = self.full_model.lm_head(hs_norm) + return hs_lm_head.float() return hidden_states # if self.shard.is_last_layer(): From 254d7ac1ec319e8520f6004574c052678c2d8fd2 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 16:43:21 -0800 Subject: [PATCH 172/589] sending norm to lm_head and output to float --- exo/inference/pytorch/model/hf.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 3161bc691..8c23a61b8 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -128,8 +128,13 @@ def forward_layers( print(f"2 is_last_layer {self.shard.is_last_layer()}") if self.shard.is_last_layer(): hs_norm = self.norm(hidden_states) - hs_lm_head = self.full_model.lm_head(hs_norm) - return hs_lm_head.float() + hs_lm_head = self.full_model.lm_head(hs_norm).float() + + if DEBUG >= 2: + print(f"hs_norm: {hs_norm}") + print(f"hs_lm_head: {hs_lm_head}") + + return hs_lm_head return hidden_states # if self.shard.is_last_layer(): From f20c7f81d831738c895b3f7bb0c5f6efa2466357 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 16:55:09 -0800 Subject: [PATCH 173/589] sending norm to lm_head and output to float --- exo/inference/pytorch/inference.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 6e8536ef7..fc294d167 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -67,8 +67,9 @@ async def infer_prompt( with torch.no_grad(): output_npa = np.array(output_data.cpu()) - if DEBUG >= 2: - print(f"output_npa.size: {output_npa.size}") + if DEBUG >= 2: + print(f"output_data.size(): {output_data.size()}") + print(f"output_npa.size: {output_npa.size}") # Save the past key values to the inference state # self._save_kv_cache(past_key_values) From fb5dff2fbb99fd5570d66463d42d53067e2e32f2 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 16:56:42 -0800 Subject: [PATCH 174/589] sending norm to lm_head and output to float --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 8c23a61b8..c3335628c 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -128,7 +128,7 @@ def forward_layers( print(f"2 is_last_layer {self.shard.is_last_layer()}") if self.shard.is_last_layer(): hs_norm = self.norm(hidden_states) - hs_lm_head = self.full_model.lm_head(hs_norm).float() + hs_lm_head = self.full_model.lm_head(hs_norm).float().flatten() if DEBUG >= 2: print(f"hs_norm: {hs_norm}") From a6a0c2ba83d5c4bdf3ed66f77f4f5866277c7d73 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 17:18:43 -0800 Subject: [PATCH 175/589] sending norm to lm_head and output to float --- exo/inference/pytorch/inference.py | 12 +++---- exo/inference/pytorch/model/hf.py | 55 ++---------------------------- 2 files changed, 8 insertions(+), 59 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index fc294d167..a28d62a96 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -56,11 +56,8 @@ async def infer_prompt( if DEBUG >= 2: print(f"tokens: {tokens}\n") - - output_data = self.model.forward_layers( - tokens, - "prompt" + tokens # past_key_values=past_key_values ) @@ -69,12 +66,14 @@ async def infer_prompt( if DEBUG >= 2: print(f"output_data.size(): {output_data.size()}") + + print(f"output_npa: {output_npa}") print(f"output_npa.size: {output_npa.size}") # Save the past key values to the inference state # self._save_kv_cache(past_key_values) - is_finished = len(output_data.size()) == 1 + is_finished = output_npa.size == 1 # if DEBUG >= 2: # print("infer_prompt called") @@ -110,8 +109,7 @@ async def infer_tensor( # output_data, past_key_values output_data = self.model.forward_layers( - in_tensor, - "tensor" + in_tensor # past_key_values=past_key_values ) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index c3335628c..92f11d2c5 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -38,34 +38,10 @@ def __init__(self, shard: Shard): self.embed_tokens = self.full_model.model.embed_tokens self.norm = self.full_model.model.norm - # def prefill(self, tokens: list[int], start_pos: int=0) -> int: - # print(f"\nprefill called") - # """ - # Process the initial input tokens and set up the initial hidden states. - # """ - # # Assuming tokens is a 1D tensor of token IDs - # for token in tokens: - # # Convert token to a tensor and get embeddings - # token_tensor = torch.tensor([[token]], device=self.device) - # token_tensor = self.embed_tokens(token_tensor) - - # if DEBUG >= 2: - # print(f"\ntoken_tensor shape: {token_tensor.shape}") - - # # Prefill with tokens - # self.forward_layers(start_pos, token_tensor, None) - - # # Increment start position - # start_pos += 1 - - # return start_pos - def forward_layers( self, - input_data: torch.tensor, - infer_from: str - #past_key_values: list - ) -> any: #-> Tuple[torch.tensor, list]: + input_data: torch.tensor + ) -> any: """ Forward pass through the specified layers. @@ -76,10 +52,6 @@ def forward_layers( print(f"input_data: {input_data}") print(f"1 shard {self.shard.to_dict()}") - # Check past key values - # if past_key_values is None: - # past_key_values = [None] * len(self.layers) - # Initialize position_ids position_ids = torch.arange( input_data.size(1), @@ -87,7 +59,6 @@ def forward_layers( device=self.device ).unsqueeze(0) - #new_past_key_values = [] hidden_states = input_data if self.shard.is_first_layer(): @@ -100,24 +71,10 @@ def forward_layers( if DEBUG >= 2: print(f"\n[layer {i}] {layer}") print(f"hidden_states {hidden_states}") - - # Get past key value if available - # past_key_value = past_key_values[i] if past_key_values and len(past_key_values) > 0 else None - - # embed only at first layer and infer prompt - # if self.shard.start_layer == i and infer_from == "prompt": - # if DEBUG >= 2: - # print("first layer and infer_prompt") - - # hidden_states = self.embed_tokens(hidden_states) - # if DEBUG >= 2: - # print(f"embedded hidden_states {hidden_states}") layer_outputs = layer( hidden_states, - position_ids=position_ids, - # past_key_value=past_key_value, - # use_cache=True + position_ids=position_ids ) if DEBUG >= 2: @@ -137,9 +94,3 @@ def forward_layers( return hs_lm_head return hidden_states - # if self.shard.is_last_layer(): - # logits = self.full_model.model.norm(hidden_states) - # return logits.flatten() #, new_past_key_values - # else: - # return hidden_states#, new_past_key_values - From b24046f0ac41ee4f00c8c67c293c071e17d70b6b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 17:20:28 -0800 Subject: [PATCH 176/589] finish issue --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 92f11d2c5..08f9ca2c4 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -91,6 +91,6 @@ def forward_layers( print(f"hs_norm: {hs_norm}") print(f"hs_lm_head: {hs_lm_head}") - return hs_lm_head + return (hs_lm_head, hidden_states) return hidden_states From 8687fe62a435f576683bd33ba287d985bcaee510 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 17:25:16 -0800 Subject: [PATCH 177/589] finish issue --- exo/inference/pytorch/inference.py | 2 +- exo/inference/pytorch/model/hf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index a28d62a96..707aa53b7 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -62,7 +62,7 @@ async def infer_prompt( ) with torch.no_grad(): - output_npa = np.array(output_data.cpu()) + output_npa = np.array([output_data.tolist()]) if DEBUG >= 2: print(f"output_data.size(): {output_data.size()}") diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 08f9ca2c4..92f11d2c5 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -91,6 +91,6 @@ def forward_layers( print(f"hs_norm: {hs_norm}") print(f"hs_lm_head: {hs_lm_head}") - return (hs_lm_head, hidden_states) + return hs_lm_head return hidden_states From 454b205dc5230fdb56297336a4fb7d1037024910 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 17:40:28 -0800 Subject: [PATCH 178/589] finish issue --- exo/inference/pytorch/model/hf.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 92f11d2c5..62db005ba 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -85,12 +85,14 @@ def forward_layers( print(f"2 is_last_layer {self.shard.is_last_layer()}") if self.shard.is_last_layer(): hs_norm = self.norm(hidden_states) - hs_lm_head = self.full_model.lm_head(hs_norm).float().flatten() + hs_lm_head = self.full_model.lm_head(hs_norm).float() + output_token = torch.argmax(hs_lm_head, dim=-1).cpu().numpy().flatten() if DEBUG >= 2: print(f"hs_norm: {hs_norm}") print(f"hs_lm_head: {hs_lm_head}") + print(f"output_token: {output_token}") - return hs_lm_head + return output_token return hidden_states From fb6c43d615bf978c1906ea67f2ee9359bf128946 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 17:43:32 -0800 Subject: [PATCH 179/589] fixing output data to numpy, logits finish --- exo/inference/pytorch/inference.py | 35 +++--------------------------- 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 707aa53b7..7be2ca3cb 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -10,16 +10,12 @@ from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel from exo.helpers import DEBUG -# Default settings -TEMPERATURE = 0.7 -TOP_K = 50 - class PyTorchDynamicShardInferenceEngine(InferenceEngine): """ PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. """ - def __init__(self, debug: bool = False): + def __init__(self): """ Initialize the inference engine. @@ -48,43 +44,18 @@ async def infer_prompt( # Tokenize the prompt tokens = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device) - # Load the past key values from the inference state if available - # past_key_values = self._load_kv_cache(inference_state) - # Run the forward pass through the model layers - # output_data, past_key_values if DEBUG >= 2: print(f"tokens: {tokens}\n") output_data = self.model.forward_layers( tokens - # past_key_values=past_key_values ) - with torch.no_grad(): - output_npa = np.array([output_data.tolist()]) - - if DEBUG >= 2: - print(f"output_data.size(): {output_data.size()}") - - print(f"output_npa: {output_npa}") - print(f"output_npa.size: {output_npa.size}") - - # Save the past key values to the inference state - # self._save_kv_cache(past_key_values) - - is_finished = output_npa.size == 1 - - # if DEBUG >= 2: - # print("infer_prompt called") - # print(f"Output data: {output_data} output_data.size: {output_data.size()}") - # print(f"output_data {output_data.squeeze().item()}") - # print(f"output_npa {output_npa}") - # print(f"finished: {is_finished}") - # print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") + is_finished = output_data.size == 1 return ( - output_npa, + output_data, "", is_finished ) From b6bec5441f65ad9959dcc0f353a5ec04e394658b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 17:44:41 -0800 Subject: [PATCH 180/589] fixing debug flag issue --- exo/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/helpers.py b/exo/helpers.py index b811a0f95..64940f083 100644 --- a/exo/helpers.py +++ b/exo/helpers.py @@ -45,7 +45,7 @@ def get_inference_engine(inference_engine_name): elif inference_engine_name == "pytorch": # will change from debug being true after testing from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine - return PyTorchDynamicShardInferenceEngine(debug=os.getenv("PYTORCH_DEBUG", default=True)) + return PyTorchDynamicShardInferenceEngine() else: raise ValueError(f"Inference engine {inference_engine_name} not supported") From b85fdecee99065e98cc66c1e792da27d106579e6 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 17:54:48 -0800 Subject: [PATCH 181/589] fixing debug flag issue --- exo/inference/pytorch/model/hf.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 62db005ba..c3a72cbc8 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -54,7 +54,7 @@ def forward_layers( # Initialize position_ids position_ids = torch.arange( - input_data.size(1), + input_data.size(1) if input_data.size > 1 else input_data, dtype=torch.long, device=self.device ).unsqueeze(0) @@ -85,14 +85,14 @@ def forward_layers( print(f"2 is_last_layer {self.shard.is_last_layer()}") if self.shard.is_last_layer(): hs_norm = self.norm(hidden_states) - hs_lm_head = self.full_model.lm_head(hs_norm).float() - output_token = torch.argmax(hs_lm_head, dim=-1).cpu().numpy().flatten() + hs_lm_head = self.full_model.lm_head(hs_norm).float()[:, -1, :] + # output_token = .cpu().numpy().flatten() if DEBUG >= 2: print(f"hs_norm: {hs_norm}") print(f"hs_lm_head: {hs_lm_head}") - print(f"output_token: {output_token}") + # print(f"output_token: {output_token}") - return output_token + return hs_lm_head.cpu().numpy() - return hidden_states + return hidden_states.cpu().numpy() From b075d0f4cdca94ac32109f3a72e3b84609e69783 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 18:33:26 -0800 Subject: [PATCH 182/589] adding position embeddings --- exo/inference/pytorch/model/hf.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index c3a72cbc8..a7e443f12 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -54,7 +54,7 @@ def forward_layers( # Initialize position_ids position_ids = torch.arange( - input_data.size(1) if input_data.size > 1 else input_data, + input_data, dtype=torch.long, device=self.device ).unsqueeze(0) @@ -63,8 +63,11 @@ def forward_layers( if self.shard.is_first_layer(): hidden_states = self.embed_tokens(hidden_states) + position_embeddings = self.rotary_emb(hidden_states) + if DEBUG >= 2: print(f"embedded hidden_states {hidden_states}") + print(f"position_ids: {self.position_embeddings}") for i, layer in enumerate(self.layers): # Forward pass through the layer @@ -74,7 +77,7 @@ def forward_layers( layer_outputs = layer( hidden_states, - position_ids=position_ids + position_embeddings=position_embeddings ) if DEBUG >= 2: From fe604f4d6c81652cbb675713e8ad761e2750397a Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 18:34:48 -0800 Subject: [PATCH 183/589] adding position embeddings --- exo/inference/pytorch/model/hf.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index a7e443f12..8d757ec17 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -52,13 +52,6 @@ def forward_layers( print(f"input_data: {input_data}") print(f"1 shard {self.shard.to_dict()}") - # Initialize position_ids - position_ids = torch.arange( - input_data, - dtype=torch.long, - device=self.device - ).unsqueeze(0) - hidden_states = input_data if self.shard.is_first_layer(): From 9ff520030d745e991b0d215325aeff6cfcef7235 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 18:36:30 -0800 Subject: [PATCH 184/589] adding position embeddings --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 8d757ec17..fdf4e76a2 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -56,7 +56,7 @@ def forward_layers( if self.shard.is_first_layer(): hidden_states = self.embed_tokens(hidden_states) - position_embeddings = self.rotary_emb(hidden_states) + position_embeddings = self.full_model.model.rotary_emb(hidden_states) if DEBUG >= 2: print(f"embedded hidden_states {hidden_states}") From 955503671207c4d90efc2d67afe0f7c06e4fcef2 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 18:41:01 -0800 Subject: [PATCH 185/589] adding position embeddings --- exo/inference/pytorch/model/hf.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index fdf4e76a2..b5e325cd5 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -56,7 +56,14 @@ def forward_layers( if self.shard.is_first_layer(): hidden_states = self.embed_tokens(hidden_states) - position_embeddings = self.full_model.model.rotary_emb(hidden_states) + + batch_size, seq_len = input_data.size() + position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) + + position_embeddings = self.full_model.model.rotary_emb( + hidden_states, + position_ids + ) if DEBUG >= 2: print(f"embedded hidden_states {hidden_states}") From e76c17420b7855a1b54a098cb30b70bbe1603bd7 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 18:43:54 -0800 Subject: [PATCH 186/589] adding position embeddings --- exo/inference/pytorch/model/hf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index b5e325cd5..14c9684f0 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -53,6 +53,8 @@ def forward_layers( print(f"1 shard {self.shard.to_dict()}") hidden_states = input_data + position_ids = None + position_embeddings = None if self.shard.is_first_layer(): hidden_states = self.embed_tokens(hidden_states) From 2d94e60bb30c4d31d2ac1013a6329297e55ba0ce Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 18:45:43 -0800 Subject: [PATCH 187/589] adding position embeddings --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 14c9684f0..e6de34368 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -69,7 +69,7 @@ def forward_layers( if DEBUG >= 2: print(f"embedded hidden_states {hidden_states}") - print(f"position_ids: {self.position_embeddings}") + print(f"position_ids: {position_embeddings}") for i, layer in enumerate(self.layers): # Forward pass through the layer From 764b8a727a521abbe4600233d6b9731dfe9cc2cd Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 18:48:10 -0800 Subject: [PATCH 188/589] adding position embeddings --- exo/inference/pytorch/model/hf.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index e6de34368..6e2686419 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -97,7 +97,9 @@ def forward_layers( print(f"hs_norm: {hs_norm}") print(f"hs_lm_head: {hs_lm_head}") # print(f"output_token: {output_token}") - - return hs_lm_head.cpu().numpy() + with torch.no_grad(): + last_state = hs_lm_head.cpu().numpy() + + return last_state return hidden_states.cpu().numpy() From 9733e8ab45d4acd5860074406073ffda5615c66c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 19:02:53 -0800 Subject: [PATCH 189/589] adding sampling --- exo/inference/pytorch/inference.py | 4 +++ exo/inference/pytorch/model/hf.py | 56 +++++++++++++++++++++++++----- 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 7be2ca3cb..1a560c0a3 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -52,6 +52,10 @@ async def infer_prompt( tokens ) + if DEBUG >= 2: + print(f"output_data: {output_data}\n") + print(f"output_data.size {output_data.size}\n") + is_finished = output_data.size == 1 return ( diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 6e2686419..c139cf723 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -4,6 +4,46 @@ from exo.helpers import DEBUG from typing import Tuple +def sample_logits(logits, temp=0.85, top_k=25, top_p=0.9, alpha_f=0.1, alpha_p=0.0): + # Apply temperature scaling + if temp > 0: + logits = logits / temp + + # Top-k sampling + if top_k > 0: + top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) + logits = torch.full_like(logits, -float('inf')) + logits.scatter_(-1, top_k_indices, top_k_values) + + # Top-p (nucleus) sampling + if 0 < top_p < 1.0: + sorted_logits, sorted_indices = torch.sort(logits, descending=True, dim=-1) + cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) + + # Remove tokens with cumulative probability above the threshold + sorted_indices_to_remove = cumulative_probs > top_p + sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() + sorted_indices_to_remove[..., 0] = 0 + + indices_to_remove = sorted_indices_to_remove.scatter(-1, sorted_indices, sorted_indices_to_remove) + logits = logits.masked_fill(indices_to_remove, -float('inf')) + + # Alpha sampling (to discourage repetition) + if alpha_f or alpha_p: + if not hasattr(sample_logits, "alpha_counter"): + setattr(sample_logits, "alpha_counter", torch.zeros_like(logits, dtype=torch.int32).contiguous()) + logits = logits - (sample_logits.alpha_counter * alpha_f + (sample_logits.alpha_counter > 0) * alpha_p) + + # Sample from the logits + probabilities = F.softmax(logits, dim=-1) + sampled_token = torch.multinomial(probabilities, 1) + + # Update alpha counter + if alpha_f or alpha_p: + sample_logits.alpha_counter = (torch.arange(probabilities.numel(), device=logits.device) == sampled_token).where(sample_logits.alpha_counter + 1, sample_logits.alpha_counter) + + return sampled_token + class ShardedHuggingFaceModel(torch.nn.Module): def __init__(self, shard: Shard): super(ShardedHuggingFaceModel, self).__init__() @@ -90,16 +130,16 @@ def forward_layers( print(f"2 is_last_layer {self.shard.is_last_layer()}") if self.shard.is_last_layer(): hs_norm = self.norm(hidden_states) - hs_lm_head = self.full_model.lm_head(hs_norm).float()[:, -1, :] - # output_token = .cpu().numpy().flatten() - + hs_lm_head = self.full_model.lm_head(hs_norm).float() + + # Use the sampling function with default settings + output_token = sample_logits(hs_lm_head).cpu().numpy().flatten() + if DEBUG >= 2: print(f"hs_norm: {hs_norm}") print(f"hs_lm_head: {hs_lm_head}") - # print(f"output_token: {output_token}") - with torch.no_grad(): - last_state = hs_lm_head.cpu().numpy() - - return last_state + print(f"output_token: {output_token}") + + return output_token return hidden_states.cpu().numpy() From 304659604b3c20a10109ec10cfd59aace075ffab Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 19:04:29 -0800 Subject: [PATCH 190/589] fix import --- exo/inference/pytorch/model/hf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index c139cf723..88c78ea5a 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -1,4 +1,5 @@ import torch +from torch.nn import functional as F from transformers import AutoModelForCausalLM from exo.inference.shard import Shard from exo.helpers import DEBUG From 2a21d8ce13fc12856094ea681ca4318260bf8cdb Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 19:06:53 -0800 Subject: [PATCH 191/589] lm_head fix --- exo/inference/pytorch/model/hf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 88c78ea5a..8bec7803d 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -134,7 +134,8 @@ def forward_layers( hs_lm_head = self.full_model.lm_head(hs_norm).float() # Use the sampling function with default settings - output_token = sample_logits(hs_lm_head).cpu().numpy().flatten() + output_token = sample_logits( + hs_lm_head[:, -1, :]).cpu().numpy().flatten() if DEBUG >= 2: print(f"hs_norm: {hs_norm}") From adf1cf38b83f06982590882b7107ab4f5b0264a5 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 19:36:41 -0800 Subject: [PATCH 192/589] sample fix --- exo/inference/pytorch/model/hf.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 8bec7803d..668537c69 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -41,7 +41,9 @@ def sample_logits(logits, temp=0.85, top_k=25, top_p=0.9, alpha_f=0.1, alpha_p=0 # Update alpha counter if alpha_f or alpha_p: - sample_logits.alpha_counter = (torch.arange(probabilities.numel(), device=logits.device) == sampled_token).where(sample_logits.alpha_counter + 1, sample_logits.alpha_counter) + condition = (torch.arange(probabilities.numel(), device=logits.device) == sampled_token) + condition = condition.bool() # Convert condition to boolean tensor + sample_logits.alpha_counter = torch.where(condition, sample_logits.alpha_counter + 1, sample_logits.alpha_counter) return sampled_token From 40f88d45a5da001f8f2797af3cc5a0e3f6e25875 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 19:49:01 -0800 Subject: [PATCH 193/589] eos fix --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 1a560c0a3..ff1a3e97c 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -56,7 +56,7 @@ async def infer_prompt( print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") - is_finished = output_data.size == 1 + is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] return ( output_data, @@ -88,7 +88,7 @@ async def infer_tensor( # past_key_values=past_key_values ) - is_finished = output_data.size == 1 + is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] if DEBUG >= 2: print(f"Output data: {output_data} finished: {is_finished}") From 930944efd5f8446973fb988fc900c2d1ad808a9c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 19:59:26 -0800 Subject: [PATCH 194/589] eos fix --- exo/inference/pytorch/inference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index ff1a3e97c..de789586c 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -55,6 +55,7 @@ async def infer_prompt( if DEBUG >= 2: print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") + print(f"output_data.item() {output_data.item()}") is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] From d719d8e9359872768aa18f1c9841cd7048e26007 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 20:01:37 -0800 Subject: [PATCH 195/589] eos fix --- exo/inference/pytorch/inference.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index de789586c..de73831af 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -72,6 +72,9 @@ async def infer_tensor( input_data: np.ndarray, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + if input_data.dim() == 1: + input_data = input_data.unsqueeze(0) + in_tensor = torch.tensor(input_data) if DEBUG >= 2: print("infer_tensor called") From 500f85e72fc264260fc7868841a381e3d3759636 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 20:02:53 -0800 Subject: [PATCH 196/589] eos fix --- exo/inference/pytorch/inference.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index de73831af..84ac30369 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -71,11 +71,11 @@ async def infer_tensor( shard: Shard, input_data: np.ndarray, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: - - if input_data.dim() == 1: - input_data = input_data.unsqueeze(0) - + in_tensor = torch.tensor(input_data) + if in_tensor.dim() == 1: + in_tensor = in_tensor.unsqueeze(0) + if DEBUG >= 2: print("infer_tensor called") print(f"input_data: {input_data}\n") From 03d4e866e72659aa67ce6bc56170138bcb3f890d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 20:06:43 -0800 Subject: [PATCH 197/589] eos fix --- exo/inference/pytorch/inference.py | 10 ++-------- exo/inference/pytorch/model/hf.py | 3 ++- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 84ac30369..e4dc7fe78 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -73,9 +73,7 @@ async def infer_tensor( inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: in_tensor = torch.tensor(input_data) - if in_tensor.dim() == 1: - in_tensor = in_tensor.unsqueeze(0) - + if DEBUG >= 2: print("infer_tensor called") print(f"input_data: {input_data}\n") @@ -97,12 +95,8 @@ async def infer_tensor( if DEBUG >= 2: print(f"Output data: {output_data} finished: {is_finished}") - - with torch.no_grad(): - output_npa = np.array(output_data.cpu()) - return ( - output_npa, + output_data, "", is_finished ) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 668537c69..0aeac4e39 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -1,4 +1,5 @@ import torch +import numpy as np from torch.nn import functional as F from transformers import AutoModelForCausalLM from exo.inference.shard import Shard @@ -84,7 +85,7 @@ def __init__(self, shard: Shard): def forward_layers( self, input_data: torch.tensor - ) -> any: + ) -> np.ndarray: """ Forward pass through the specified layers. From b2814b45fe8e9176026d45bb4242bf2017f136f5 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 20:17:05 -0800 Subject: [PATCH 198/589] eos fix --- exo/inference/pytorch/inference.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index e4dc7fe78..ffde44720 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -73,7 +73,7 @@ async def infer_tensor( inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: in_tensor = torch.tensor(input_data) - + if DEBUG >= 2: print("infer_tensor called") print(f"input_data: {input_data}\n") @@ -93,7 +93,8 @@ async def infer_tensor( is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] if DEBUG >= 2: - print(f"Output data: {output_data} finished: {is_finished}") + print(f"finished: {is_finished}") + print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") return ( output_data, From a55c9a39fb70240241b7f64dfae77d80602ebd35 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 20:18:01 -0800 Subject: [PATCH 199/589] eos fix --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 0aeac4e39..1419b30cc 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -103,7 +103,7 @@ def forward_layers( if self.shard.is_first_layer(): hidden_states = self.embed_tokens(hidden_states) - batch_size, seq_len = input_data.size() + batch_size, seq_len = hidden_states.size() position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) position_embeddings = self.full_model.model.rotary_emb( From ca6734e7860c0fd94a0819aaf5e37aa7eb2e8952 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 20:21:11 -0800 Subject: [PATCH 200/589] is finished issue --- exo/inference/pytorch/inference.py | 4 ++-- exo/inference/pytorch/model/hf.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index ffde44720..4869117fe 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -57,7 +57,7 @@ async def infer_prompt( print(f"output_data.size {output_data.size}\n") print(f"output_data.item() {output_data.item()}") - is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] + is_finished = output_data[-1] == self.tokenizer.eos_token_id and output_data.size == 1 return ( output_data, @@ -90,7 +90,7 @@ async def infer_tensor( # past_key_values=past_key_values ) - is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] + is_finished = output_data[-1] == self.tokenizer.eos_token_id and output_data.size == 1 if DEBUG >= 2: print(f"finished: {is_finished}") diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 1419b30cc..0aeac4e39 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -103,7 +103,7 @@ def forward_layers( if self.shard.is_first_layer(): hidden_states = self.embed_tokens(hidden_states) - batch_size, seq_len = hidden_states.size() + batch_size, seq_len = input_data.size() position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) position_embeddings = self.full_model.model.rotary_emb( From 609c9199bdeafe176c1166a1e3b5efb3d7728ea0 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 20:27:10 -0800 Subject: [PATCH 201/589] is finished issue --- exo/inference/pytorch/inference.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 4869117fe..a083ef8c5 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -56,6 +56,10 @@ async def infer_prompt( print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") print(f"output_data.item() {output_data.item()}") + print(f"finished: {is_finished}") + print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") + print(f"output_data[-1] {output_data[-1]}") + print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") is_finished = output_data[-1] == self.tokenizer.eos_token_id and output_data.size == 1 From b47932cff02b817b8a46a1adb5af351eb2548dda Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 20:28:27 -0800 Subject: [PATCH 202/589] is finished issue --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index a083ef8c5..3030972aa 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -52,6 +52,8 @@ async def infer_prompt( tokens ) + is_finished = output_data[-1] == self.tokenizer.eos_token_id and output_data.size == 1 + if DEBUG >= 2: print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") @@ -61,8 +63,6 @@ async def infer_prompt( print(f"output_data[-1] {output_data[-1]}") print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") - is_finished = output_data[-1] == self.tokenizer.eos_token_id and output_data.size == 1 - return ( output_data, "", From c878b2415a754145c391c4840c2a95b8fa827e44 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 20:44:10 -0800 Subject: [PATCH 203/589] is finished issue --- exo/inference/pytorch/model/hf.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 0aeac4e39..56cc5fbe1 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -84,8 +84,9 @@ def __init__(self, shard: Shard): def forward_layers( self, - input_data: torch.tensor - ) -> np.ndarray: + input_data: torch.tensor, + past_kvs = [] + ) -> Tuple[np.ndarray, list]: """ Forward pass through the specified layers. @@ -99,6 +100,7 @@ def forward_layers( hidden_states = input_data position_ids = None position_embeddings = None + present_kvs = [] if self.shard.is_first_layer(): hidden_states = self.embed_tokens(hidden_states) @@ -123,13 +125,16 @@ def forward_layers( layer_outputs = layer( hidden_states, - position_embeddings=position_embeddings + position_embeddings=position_embeddings, + past_key_values=past_kvs[i] if past_kvs else None, + use_cache=True ) if DEBUG >= 2: print(f"\n[layer {i}] layer_outputs: {layer_outputs[0]}") hidden_states = layer_outputs[0] + present_kvs = layer_outputs[1] print(f"2 is_last_layer {self.shard.is_last_layer()}") if self.shard.is_last_layer(): @@ -145,6 +150,6 @@ def forward_layers( print(f"hs_lm_head: {hs_lm_head}") print(f"output_token: {output_token}") - return output_token + return (output_token, present_kvs) - return hidden_states.cpu().numpy() + return (hidden_states.cpu().numpy(), present_kvs) From e006edd2aa37b0e27b43ca128d3e74c16ff4b634 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 20:47:21 -0800 Subject: [PATCH 204/589] working on caching --- exo/inference/pytorch/model/hf.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 56cc5fbe1..c7f9aa613 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -122,6 +122,7 @@ def forward_layers( if DEBUG >= 2: print(f"\n[layer {i}] {layer}") print(f"hidden_states {hidden_states}") + print(f"past_kvs {past_kvs}") layer_outputs = layer( hidden_states, @@ -136,6 +137,9 @@ def forward_layers( hidden_states = layer_outputs[0] present_kvs = layer_outputs[1] + if DEBUG >= 2: + print(f"present_kvs {present_kvs}") + print(f"2 is_last_layer {self.shard.is_last_layer()}") if self.shard.is_last_layer(): hs_norm = self.norm(hidden_states) From d3b1106cdd70092d69e3de3fd350db2a9082a902 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 20:53:19 -0800 Subject: [PATCH 205/589] working on caching --- exo/inference/pytorch/model/hf.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index c7f9aa613..80aba11c6 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -1,7 +1,7 @@ import torch import numpy as np from torch.nn import functional as F -from transformers import AutoModelForCausalLM +from transformers import AutoModelForCausalLM, LlamaConfig from exo.inference.shard import Shard from exo.helpers import DEBUG from typing import Tuple @@ -58,11 +58,16 @@ def __init__(self, shard: Shard): self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.shard = shard + # Load the model with the configuration for caching + self.config = LlamaConfig.from_pretrained(shard.model_id) + self.config.use_cache = True # Enable caching + # Load the model self.full_model = AutoModelForCausalLM.from_pretrained( shard.model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, - device_map="auto" + device_map="auto", + config=self.config ) # Extract only the layers for this shard From bb9e9b0be3352054ea97cc80807e7cc790d7498a Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 20:58:01 -0800 Subject: [PATCH 206/589] working on caching --- exo/inference/pytorch/inference.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 3030972aa..c177b31c7 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -3,6 +3,7 @@ import numpy as np import torch import numpy as np +import json from typing import Optional, Callable, Tuple from transformers import AutoTokenizer from exo.inference.shard import Shard @@ -48,7 +49,7 @@ async def infer_prompt( if DEBUG >= 2: print(f"tokens: {tokens}\n") - output_data = self.model.forward_layers( + output_data, inference_state = self.model.forward_layers( tokens ) @@ -65,7 +66,7 @@ async def infer_prompt( return ( output_data, - "", + json.loads(inference_state), is_finished ) @@ -89,7 +90,7 @@ async def infer_tensor( # Run the forward pass through the model layers # output_data, past_key_values - output_data = self.model.forward_layers( + output_data, inference_state = self.model.forward_layers( in_tensor # past_key_values=past_key_values ) @@ -102,7 +103,7 @@ async def infer_tensor( return ( output_data, - "", + json.loads(inference_state), is_finished ) From 063b11d5fcc46b3701a0f85aff295fc16596c88c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 21:02:39 -0800 Subject: [PATCH 207/589] working on caching --- exo/inference/pytorch/inference.py | 61 ++++++++---------------------- 1 file changed, 15 insertions(+), 46 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index c177b31c7..6f2967859 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -39,18 +39,17 @@ async def infer_prompt( if DEBUG >= 2: print("infer_prompt called") - # Ensure the shard is loaded await self.ensure_shard(shard) - # Tokenize the prompt + inference_state = json.loads(inference_state) tokens = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device) - # Run the forward pass through the model layers if DEBUG >= 2: print(f"tokens: {tokens}\n") output_data, inference_state = self.model.forward_layers( - tokens + tokens, + inference_state ) is_finished = output_data[-1] == self.tokenizer.eos_token_id and output_data.size == 1 @@ -59,6 +58,7 @@ async def infer_prompt( print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") print(f"output_data.item() {output_data.item()}") + print(f"inference_state: {inference_state}") print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") print(f"output_data[-1] {output_data[-1]}") @@ -66,7 +66,7 @@ async def infer_prompt( return ( output_data, - json.loads(inference_state), + json.dumps(inference_state), is_finished ) @@ -78,69 +78,38 @@ async def infer_tensor( inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: in_tensor = torch.tensor(input_data) + inference_state = json.loads(inference_state) if DEBUG >= 2: print("infer_tensor called") print(f"input_data: {input_data}\n") print(f"in_tensor: {in_tensor}\n") - # Ensure the shard is loaded await self.ensure_shard(shard) - # Run the forward pass through the model layers - # output_data, past_key_values - output_data, inference_state = self.model.forward_layers( - in_tensor - # past_key_values=past_key_values + in_tensor, + inference_state ) is_finished = output_data[-1] == self.tokenizer.eos_token_id and output_data.size == 1 if DEBUG >= 2: + print(f"output_data: {output_data}\n") + print(f"output_data.size {output_data.size}\n") + print(f"output_data.item() {output_data.item()}") + print(f"inference_state: {inference_state}") print(f"finished: {is_finished}") + print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") + print(f"output_data[-1] {output_data[-1]}") print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") return ( output_data, - json.loads(inference_state), + json.dumps(inference_state), is_finished ) - # def _load_kv_cache(self, past_key_values_list): - # """ - # Load key-value cache from the inference state. - - # Args: - # past_key_values_list (list): List of past key-value tensors. - - # Returns: - # list: List of loaded past key-value tensors. - # """ - # if past_key_values_list is None: - # return [] - # return [torch.tensor(kv, device=self.device) for kv in past_key_values_list] - - # def _save_kv_cache(self, past_key_values): - # """ - # Save key-value cache to the inference state. - - # Args: - # past_key_values (list): List of past key-value tensors. - - # Returns: - # list: List of key-value tensors in a format suitable for saving. - # """ - # if past_key_values is None: - # return [] - - # new_cache = [] - # for kv in past_key_values: - # if kv: - # new_cache.append(kv.cpu().tolist()) - - # return new_cache - async def ensure_shard(self, shard: Optional[Shard]): """ Ensure the model shard is loaded and ready for inference. From af8d9fe5dedacd33581ddf6a7b44d6cac1358e03 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 21:04:14 -0800 Subject: [PATCH 208/589] working on caching --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 6f2967859..433bfb7dd 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -41,7 +41,7 @@ async def infer_prompt( await self.ensure_shard(shard) - inference_state = json.loads(inference_state) + inference_state = json.loads(inference_state) if inference_state else "" tokens = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device) if DEBUG >= 2: @@ -78,7 +78,7 @@ async def infer_tensor( inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: in_tensor = torch.tensor(input_data) - inference_state = json.loads(inference_state) + inference_state = json.loads(inference_state) if inference_state else "" if DEBUG >= 2: print("infer_tensor called") From f3e07eb08e7f2ffae6dadf92c14db610bedffea0 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 22:26:48 -0800 Subject: [PATCH 209/589] changing token to chat template --- exo/inference/pytorch/inference.py | 9 +++++- exo/inference/pytorch/model/hf.py | 44 ++-------------------------- exo/inference/pytorch/model/utils.py | 44 ++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 43 deletions(-) create mode 100644 exo/inference/pytorch/model/utils.py diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 433bfb7dd..51ef3b12b 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -42,7 +42,14 @@ async def infer_prompt( await self.ensure_shard(shard) inference_state = json.loads(inference_state) if inference_state else "" - tokens = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device) + tokens = self.tokenizer.apply_chat_template( + conversation=[{ + "role": "user", + "content": prompt + }], + tokenize=True, + add_generation_prompt=False, + ) if DEBUG >= 2: print(f"tokens: {tokens}\n") diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 80aba11c6..42f2357f1 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -1,52 +1,12 @@ import torch import numpy as np -from torch.nn import functional as F + from transformers import AutoModelForCausalLM, LlamaConfig from exo.inference.shard import Shard from exo.helpers import DEBUG from typing import Tuple -def sample_logits(logits, temp=0.85, top_k=25, top_p=0.9, alpha_f=0.1, alpha_p=0.0): - # Apply temperature scaling - if temp > 0: - logits = logits / temp - - # Top-k sampling - if top_k > 0: - top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) - logits = torch.full_like(logits, -float('inf')) - logits.scatter_(-1, top_k_indices, top_k_values) - - # Top-p (nucleus) sampling - if 0 < top_p < 1.0: - sorted_logits, sorted_indices = torch.sort(logits, descending=True, dim=-1) - cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) - - # Remove tokens with cumulative probability above the threshold - sorted_indices_to_remove = cumulative_probs > top_p - sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() - sorted_indices_to_remove[..., 0] = 0 - - indices_to_remove = sorted_indices_to_remove.scatter(-1, sorted_indices, sorted_indices_to_remove) - logits = logits.masked_fill(indices_to_remove, -float('inf')) - - # Alpha sampling (to discourage repetition) - if alpha_f or alpha_p: - if not hasattr(sample_logits, "alpha_counter"): - setattr(sample_logits, "alpha_counter", torch.zeros_like(logits, dtype=torch.int32).contiguous()) - logits = logits - (sample_logits.alpha_counter * alpha_f + (sample_logits.alpha_counter > 0) * alpha_p) - - # Sample from the logits - probabilities = F.softmax(logits, dim=-1) - sampled_token = torch.multinomial(probabilities, 1) - - # Update alpha counter - if alpha_f or alpha_p: - condition = (torch.arange(probabilities.numel(), device=logits.device) == sampled_token) - condition = condition.bool() # Convert condition to boolean tensor - sample_logits.alpha_counter = torch.where(condition, sample_logits.alpha_counter + 1, sample_logits.alpha_counter) - - return sampled_token +from .utils import sample_logits class ShardedHuggingFaceModel(torch.nn.Module): def __init__(self, shard: Shard): diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py new file mode 100644 index 000000000..fe7ece45c --- /dev/null +++ b/exo/inference/pytorch/model/utils.py @@ -0,0 +1,44 @@ +import torch +from torch.nn import functional as F + +def sample_logits(logits, temp=0.85, top_k=25, top_p=0.9, alpha_f=0.1, alpha_p=0.0): + # Apply temperature scaling + if temp > 0: + logits = logits / temp + + # Top-k sampling + if top_k > 0: + top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) + logits = torch.full_like(logits, -float('inf')) + logits.scatter_(-1, top_k_indices, top_k_values) + + # Top-p (nucleus) sampling + if 0 < top_p < 1.0: + sorted_logits, sorted_indices = torch.sort(logits, descending=True, dim=-1) + cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) + + # Remove tokens with cumulative probability above the threshold + sorted_indices_to_remove = cumulative_probs > top_p + sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() + sorted_indices_to_remove[..., 0] = 0 + + indices_to_remove = sorted_indices_to_remove.scatter(-1, sorted_indices, sorted_indices_to_remove) + logits = logits.masked_fill(indices_to_remove, -float('inf')) + + # Alpha sampling (to discourage repetition) + if alpha_f or alpha_p: + if not hasattr(sample_logits, "alpha_counter"): + setattr(sample_logits, "alpha_counter", torch.zeros_like(logits, dtype=torch.int32).contiguous()) + logits = logits - (sample_logits.alpha_counter * alpha_f + (sample_logits.alpha_counter > 0) * alpha_p) + + # Sample from the logits + probabilities = F.softmax(logits, dim=-1) + sampled_token = torch.multinomial(probabilities, 1) + + # Update alpha counter + if alpha_f or alpha_p: + condition = (torch.arange(probabilities.numel(), device=logits.device) == sampled_token) + condition = condition.bool() # Convert condition to boolean tensor + sample_logits.alpha_counter = torch.where(condition, sample_logits.alpha_counter + 1, sample_logits.alpha_counter) + + return sampled_token \ No newline at end of file From bb65e46a5b98197080d9b84f4d8d8c1100980f84 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 22:28:46 -0800 Subject: [PATCH 210/589] changing token to chat template --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 51ef3b12b..213d52b06 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -42,14 +42,14 @@ async def infer_prompt( await self.ensure_shard(shard) inference_state = json.loads(inference_state) if inference_state else "" - tokens = self.tokenizer.apply_chat_template( + tokens = torch.tensor(self.tokenizer.apply_chat_template( conversation=[{ "role": "user", "content": prompt }], tokenize=True, add_generation_prompt=False, - ) + )) if DEBUG >= 2: print(f"tokens: {tokens}\n") From 8dea6ae1ffb3ba1ba41002ea48d33855b9f8743f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 22:32:05 -0800 Subject: [PATCH 211/589] changing token to chat template --- exo/inference/pytorch/model/hf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 42f2357f1..a9a216bed 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -70,6 +70,9 @@ def forward_layers( if self.shard.is_first_layer(): hidden_states = self.embed_tokens(hidden_states) + if DEBUG >= 2: + print(f"hidden_states: {hidden_states}") + batch_size, seq_len = input_data.size() position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) From a55f4560798d80fac049711e55e1688448cf9308 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 22:44:53 -0800 Subject: [PATCH 212/589] letting model general position_ids --- exo/inference/pytorch/model/hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index a9a216bed..0e15e5f16 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -73,8 +73,8 @@ def forward_layers( if DEBUG >= 2: print(f"hidden_states: {hidden_states}") - batch_size, seq_len = input_data.size() - position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) + # batch_size, seq_len = input_data.size() + # position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) position_embeddings = self.full_model.model.rotary_emb( hidden_states, From de1d7331cfeb410609e0fda5f7a4720064145d30 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 22:54:19 -0800 Subject: [PATCH 213/589] working on kvs --- exo/inference/pytorch/model/hf.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 0e15e5f16..e961e2df0 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -1,7 +1,7 @@ import torch import numpy as np -from transformers import AutoModelForCausalLM, LlamaConfig +from transformers import AutoModelForCausalLM, LlamaConfig, DynamicCache, Cache from exo.inference.shard import Shard from exo.helpers import DEBUG from typing import Tuple @@ -50,7 +50,7 @@ def __init__(self, shard: Shard): def forward_layers( self, input_data: torch.tensor, - past_kvs = [] + past_kvs: Cache = DynamicCache() ) -> Tuple[np.ndarray, list]: """ Forward pass through the specified layers. @@ -65,7 +65,7 @@ def forward_layers( hidden_states = input_data position_ids = None position_embeddings = None - present_kvs = [] + present_kvs = DynamicCache() if self.shard.is_first_layer(): hidden_states = self.embed_tokens(hidden_states) @@ -76,14 +76,14 @@ def forward_layers( # batch_size, seq_len = input_data.size() # position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) - position_embeddings = self.full_model.model.rotary_emb( - hidden_states, - position_ids - ) + # position_embeddings = self.full_model.model.rotary_emb( + # hidden_states, + # position_ids + # ) - if DEBUG >= 2: - print(f"embedded hidden_states {hidden_states}") - print(f"position_ids: {position_embeddings}") + # if DEBUG >= 2: + # print(f"embedded hidden_states {hidden_states}") + # print(f"position_ids: {position_embeddings}") for i, layer in enumerate(self.layers): # Forward pass through the layer @@ -94,13 +94,13 @@ def forward_layers( layer_outputs = layer( hidden_states, - position_embeddings=position_embeddings, - past_key_values=past_kvs[i] if past_kvs else None, + # position_embeddings=position_embeddings, + past_key_values=past_kvs, use_cache=True ) if DEBUG >= 2: - print(f"\n[layer {i}] layer_outputs: {layer_outputs[0]}") + print(f"\n[layer {i}] layer_outputs: {layer_outputs}") hidden_states = layer_outputs[0] present_kvs = layer_outputs[1] From 0d44195b83af7db686eee72d3bbafff444659e05 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 23:25:37 -0800 Subject: [PATCH 214/589] working on kvs --- exo/inference/pytorch/model/hf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index e961e2df0..8bac47ace 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -72,6 +72,7 @@ def forward_layers( if DEBUG >= 2: print(f"hidden_states: {hidden_states}") + print(f"hidden_states.size(): {hidden_states.size()}") # batch_size, seq_len = input_data.size() # position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) From 683f547e554579f5c85753a42145142da7f63715 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 23:27:12 -0800 Subject: [PATCH 215/589] working on kvs --- exo/inference/pytorch/model/hf.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 8bac47ace..2e0ba375a 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -74,13 +74,13 @@ def forward_layers( print(f"hidden_states: {hidden_states}") print(f"hidden_states.size(): {hidden_states.size()}") - # batch_size, seq_len = input_data.size() - # position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) + batch_size, seq_len = input_data.size() + position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) - # position_embeddings = self.full_model.model.rotary_emb( - # hidden_states, - # position_ids - # ) + position_embeddings = self.full_model.model.rotary_emb( + hidden_states, + position_ids + ) # if DEBUG >= 2: # print(f"embedded hidden_states {hidden_states}") @@ -95,7 +95,7 @@ def forward_layers( layer_outputs = layer( hidden_states, - # position_embeddings=position_embeddings, + position_embeddings=position_embeddings, past_key_values=past_kvs, use_cache=True ) From 3e858621d2b72de92071914a6365687a1ef3f358 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 23:28:27 -0800 Subject: [PATCH 216/589] working on kvs --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 2e0ba375a..ed40e20f8 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -68,7 +68,7 @@ def forward_layers( present_kvs = DynamicCache() if self.shard.is_first_layer(): - hidden_states = self.embed_tokens(hidden_states) + if DEBUG >= 2: print(f"hidden_states: {hidden_states}") From 380feff6a720ec14789dd2ec6f50b841a29a3ae0 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 23:30:34 -0800 Subject: [PATCH 217/589] working on kvs --- exo/inference/pytorch/model/hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index ed40e20f8..b133a57a7 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -68,13 +68,13 @@ def forward_layers( present_kvs = DynamicCache() if self.shard.is_first_layer(): - + hidden_states = self.embed_tokens(hidden_states) if DEBUG >= 2: print(f"hidden_states: {hidden_states}") print(f"hidden_states.size(): {hidden_states.size()}") - batch_size, seq_len = input_data.size() + batch_size, seq_len = hidden_states.size() position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) position_embeddings = self.full_model.model.rotary_emb( From 5fe49f0f6f6deacb7d4129fc1bfbcfa334822d06 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 23:34:34 -0800 Subject: [PATCH 218/589] working on kvs --- exo/inference/pytorch/inference.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 213d52b06..f1151df49 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -42,14 +42,15 @@ async def infer_prompt( await self.ensure_shard(shard) inference_state = json.loads(inference_state) if inference_state else "" - tokens = torch.tensor(self.tokenizer.apply_chat_template( + tokens = self.tokenizer.apply_chat_template( conversation=[{ "role": "user", "content": prompt }], tokenize=True, add_generation_prompt=False, - )) + return_tensors="pt" + ) if DEBUG >= 2: print(f"tokens: {tokens}\n") From a2a76d9f697ca2acf49578e46d2114504d900e37 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 9 Aug 2024 23:35:49 -0800 Subject: [PATCH 219/589] working on kvs --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index b133a57a7..2e0ba375a 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -74,7 +74,7 @@ def forward_layers( print(f"hidden_states: {hidden_states}") print(f"hidden_states.size(): {hidden_states.size()}") - batch_size, seq_len = hidden_states.size() + batch_size, seq_len = input_data.size() position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) position_embeddings = self.full_model.model.rotary_emb( From 48f2de6361b02633d84fc7429f125a8576a03812 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 09:33:13 -0800 Subject: [PATCH 220/589] fix main debug error --- main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index e66aa6897..0e5401c2d 100644 --- a/main.py +++ b/main.py @@ -51,7 +51,9 @@ chatgpt_api_endpoints=[f"http://{ip}:{args.chatgpt_api_port}/v1/chat/completions" for ip in get_all_ip_addresses()] web_chat_urls=[f"http://{ip}:{args.chatgpt_api_port}" for ip in get_all_ip_addresses()] if DEBUG >= 0: - print(f"Chat interface started:\n{'\n'.join([' - ' + terminal_link(web_chat_url) for web_chat_url in web_chat_urls])}") + links = '\n'.join([' - ' + terminal_link(web_chat_url) for web_chat_url in web_chat_urls]) + print(f"Chat interface started:\n{links}") + print(f"ChatGPT API endpoint served at:\n{'\n'.join([' - ' + terminal_link(chatgpt_api_endpoint) for chatgpt_api_endpoint in chatgpt_api_endpoints])}") node = StandardNode( args.node_id, From 68d9c46cc9aa31c99b3fc9af3153b558c2817718 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 09:35:01 -0800 Subject: [PATCH 221/589] fix main debug error --- main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 0e5401c2d..92ea2dacc 100644 --- a/main.py +++ b/main.py @@ -53,8 +53,8 @@ if DEBUG >= 0: links = '\n'.join([' - ' + terminal_link(web_chat_url) for web_chat_url in web_chat_urls]) print(f"Chat interface started:\n{links}") - - print(f"ChatGPT API endpoint served at:\n{'\n'.join([' - ' + terminal_link(chatgpt_api_endpoint) for chatgpt_api_endpoint in chatgpt_api_endpoints])}") + api_links = '\n'.join([' - ' + terminal_link(chatgpt_api_endpoint) for chatgpt_api_endpoint in chatgpt_api_endpoints]) + print(f"ChatGPT API endpoint served at:\n{api_links}") node = StandardNode( args.node_id, None, From 7306cfcf3a40f6f7af4b03c5529c416c8b294f48 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 09:58:01 -0800 Subject: [PATCH 222/589] finish error --- exo/inference/pytorch/inference.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index f1151df49..063ae9de7 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -48,10 +48,13 @@ async def infer_prompt( "content": prompt }], tokenize=True, - add_generation_prompt=False, + padding=True, + add_generation_prompt=True, return_tensors="pt" ) + # tokens = self.tokenizer.encode(prompt, return_tensors="pt") + if DEBUG >= 2: print(f"tokens: {tokens}\n") @@ -140,15 +143,4 @@ async def ensure_shard(self, shard: Optional[Shard]): self.shard = shard if DEBUG >= 2: - print(f"Shard loaded successfully: {shard}") - - def set_on_download_progress(self, on_download_progress: Callable[[int, int], None]): - """ - Set a callback function to track download progress. - - Args: - on_download_progress (Callable[[int, int], None]): Callback function to track progress. - """ - # must have this function or inference engine breaks - # This method can be implemented if progress tracking is needed - pass + print(f"Shard loaded successfully: {shard}") \ No newline at end of file From 09eb58a3bb06020d5e88d4070cab6187e98f1025 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 10:39:33 -0800 Subject: [PATCH 223/589] finish error --- exo/inference/pytorch/inference.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 063ae9de7..73b2df25a 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -42,18 +42,7 @@ async def infer_prompt( await self.ensure_shard(shard) inference_state = json.loads(inference_state) if inference_state else "" - tokens = self.tokenizer.apply_chat_template( - conversation=[{ - "role": "user", - "content": prompt - }], - tokenize=True, - padding=True, - add_generation_prompt=True, - return_tensors="pt" - ) - - # tokens = self.tokenizer.encode(prompt, return_tensors="pt") + tokens = self.tokenizer.encode(prompt, return_tensors="pt") if DEBUG >= 2: print(f"tokens: {tokens}\n") From 9c1873787047aeb5ecc26ad1895f3e0afaf336d6 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 10:47:28 -0800 Subject: [PATCH 224/589] finish error --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 73b2df25a..316f08d95 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -52,7 +52,7 @@ async def infer_prompt( inference_state ) - is_finished = output_data[-1] == self.tokenizer.eos_token_id and output_data.size == 1 + is_finished = output_data.size == 1 if DEBUG >= 2: print(f"output_data: {output_data}\n") From bf8ed2a3fc412fbe2af2fa85106876ebe6006a06 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 10:49:42 -0800 Subject: [PATCH 225/589] trying to manipulate sampling --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index fe7ece45c..9bda8b83c 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -1,7 +1,7 @@ import torch from torch.nn import functional as F -def sample_logits(logits, temp=0.85, top_k=25, top_p=0.9, alpha_f=0.1, alpha_p=0.0): +def sample_logits(logits, temp=0.0, top_k=15, top_p=0.9, alpha_f=0.1, alpha_p=0.0): # Apply temperature scaling if temp > 0: logits = logits / temp From ae413d250d637e01ef472fb3647fa01a69df6ed7 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 10:53:48 -0800 Subject: [PATCH 226/589] using resolve tokenizer from api --- exo/inference/pytorch/inference.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 316f08d95..4ee5d2bc8 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -5,10 +5,10 @@ import numpy as np import json from typing import Optional, Callable, Tuple -from transformers import AutoTokenizer from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel +from exo.api.chatgpt_api import resolve_tokenizer from exo.helpers import DEBUG class PyTorchDynamicShardInferenceEngine(InferenceEngine): @@ -124,11 +124,7 @@ async def ensure_shard(self, shard: Optional[Shard]): print(f"Loading new shard: {shard}") self.model = ShardedHuggingFaceModel(shard) - self.tokenizer = AutoTokenizer.from_pretrained( - shard.model_id, - add_eos_token=True, - use_fast=True - ) + self.tokenizer = resolve_tokenizer(shard.model_id) self.shard = shard if DEBUG >= 2: From 5e8663d14112eb1702cb7132eb623683078f9f7e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 10:57:11 -0800 Subject: [PATCH 227/589] using resolve tokenizer from api --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 4ee5d2bc8..4e210d7cf 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -124,7 +124,7 @@ async def ensure_shard(self, shard: Optional[Shard]): print(f"Loading new shard: {shard}") self.model = ShardedHuggingFaceModel(shard) - self.tokenizer = resolve_tokenizer(shard.model_id) + self.tokenizer = await resolve_tokenizer(shard.model_id) self.shard = shard if DEBUG >= 2: From 9e3a4a7aab9cea6c764821d6d831d880a0103078 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 11:03:52 -0800 Subject: [PATCH 228/589] working on eot error --- exo/inference/pytorch/inference.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 4e210d7cf..cde37d5d8 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -52,7 +52,11 @@ async def infer_prompt( inference_state ) - is_finished = output_data.size == 1 + is_finished = output_data.size == 1 #and output_data.item() in [self.tokenizer.eos_token_id] + + if is_finished: + print(f"token from llm decode: {self.tokenizer.decode(output_data)}") + if DEBUG >= 2: print(f"output_data: {output_data}\n") @@ -92,7 +96,7 @@ async def infer_tensor( inference_state ) - is_finished = output_data[-1] == self.tokenizer.eos_token_id and output_data.size == 1 + is_finished = output_data.size == 1 #and output_data.item() in [self.tokenizer.eos_token_id] if DEBUG >= 2: print(f"output_data: {output_data}\n") From 6f90f43286da07672f65adce4835a06fd5ab7f56 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 12:15:48 -0800 Subject: [PATCH 229/589] working on eot error --- exo/inference/pytorch/model/hf.py | 6 +++--- exo/inference/pytorch/model/utils.py | 14 ++++++-------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 2e0ba375a..e363f82b4 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -82,9 +82,9 @@ def forward_layers( position_ids ) - # if DEBUG >= 2: - # print(f"embedded hidden_states {hidden_states}") - # print(f"position_ids: {position_embeddings}") + if DEBUG >= 2: + print(f"embedded hidden_states {hidden_states}") + print(f"position_ids: {position_embeddings}") for i, layer in enumerate(self.layers): # Forward pass through the layer diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 9bda8b83c..074d2bf76 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -8,8 +8,8 @@ def sample_logits(logits, temp=0.0, top_k=15, top_p=0.9, alpha_f=0.1, alpha_p=0. # Top-k sampling if top_k > 0: - top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) - logits = torch.full_like(logits, -float('inf')) + top_k_values, top_k_indices = torch.topk(logits, min(top_k, logits.size(-1)), dim=-1) + logits = torch.full_like(logits, float('-inf')) logits.scatter_(-1, top_k_indices, top_k_values) # Top-p (nucleus) sampling @@ -22,13 +22,13 @@ def sample_logits(logits, temp=0.0, top_k=15, top_p=0.9, alpha_f=0.1, alpha_p=0. sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() sorted_indices_to_remove[..., 0] = 0 - indices_to_remove = sorted_indices_to_remove.scatter(-1, sorted_indices, sorted_indices_to_remove) - logits = logits.masked_fill(indices_to_remove, -float('inf')) + indices_to_remove = sorted_indices[sorted_indices_to_remove] + logits[indices_to_remove] = float('-inf') # Alpha sampling (to discourage repetition) if alpha_f or alpha_p: if not hasattr(sample_logits, "alpha_counter"): - setattr(sample_logits, "alpha_counter", torch.zeros_like(logits, dtype=torch.int32).contiguous()) + sample_logits.alpha_counter = torch.zeros_like(logits, dtype=torch.int32) logits = logits - (sample_logits.alpha_counter * alpha_f + (sample_logits.alpha_counter > 0) * alpha_p) # Sample from the logits @@ -37,8 +37,6 @@ def sample_logits(logits, temp=0.0, top_k=15, top_p=0.9, alpha_f=0.1, alpha_p=0. # Update alpha counter if alpha_f or alpha_p: - condition = (torch.arange(probabilities.numel(), device=logits.device) == sampled_token) - condition = condition.bool() # Convert condition to boolean tensor - sample_logits.alpha_counter = torch.where(condition, sample_logits.alpha_counter + 1, sample_logits.alpha_counter) + sample_logits.alpha_counter.scatter_(-1, sampled_token, sample_logits.alpha_counter.gather(-1, sampled_token) + 1) return sampled_token \ No newline at end of file From 07c146c5bd3d3f4782fa4aa0c8343dad588c56b8 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 12:19:10 -0800 Subject: [PATCH 230/589] working on eot error --- exo/inference/pytorch/model/utils.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 074d2bf76..411dc449f 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -2,13 +2,18 @@ from torch.nn import functional as F def sample_logits(logits, temp=0.0, top_k=15, top_p=0.9, alpha_f=0.1, alpha_p=0.0): + # Ensure logits is a 2D tensor + if logits.dim() == 1: + logits = logits.unsqueeze(0) + # Apply temperature scaling if temp > 0: logits = logits / temp # Top-k sampling if top_k > 0: - top_k_values, top_k_indices = torch.topk(logits, min(top_k, logits.size(-1)), dim=-1) + top_k = min(top_k, logits.size(-1)) + top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) logits = torch.full_like(logits, float('-inf')) logits.scatter_(-1, top_k_indices, top_k_values) @@ -23,7 +28,7 @@ def sample_logits(logits, temp=0.0, top_k=15, top_p=0.9, alpha_f=0.1, alpha_p=0. sorted_indices_to_remove[..., 0] = 0 indices_to_remove = sorted_indices[sorted_indices_to_remove] - logits[indices_to_remove] = float('-inf') + logits[torch.arange(logits.size(0)).unsqueeze(1), indices_to_remove] = float('-inf') # Alpha sampling (to discourage repetition) if alpha_f or alpha_p: @@ -39,4 +44,4 @@ def sample_logits(logits, temp=0.0, top_k=15, top_p=0.9, alpha_f=0.1, alpha_p=0. if alpha_f or alpha_p: sample_logits.alpha_counter.scatter_(-1, sampled_token, sample_logits.alpha_counter.gather(-1, sampled_token) + 1) - return sampled_token \ No newline at end of file + return sampled_token.squeeze() \ No newline at end of file From da2a97bc424728b543f99257dc829ed303d0cc4f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 12:29:30 -0800 Subject: [PATCH 231/589] working on eot error --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index cde37d5d8..7ab4e214c 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -52,7 +52,7 @@ async def infer_prompt( inference_state ) - is_finished = output_data.size == 1 #and output_data.item() in [self.tokenizer.eos_token_id] + is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] if is_finished: print(f"token from llm decode: {self.tokenizer.decode(output_data)}") @@ -96,7 +96,7 @@ async def infer_tensor( inference_state ) - is_finished = output_data.size == 1 #and output_data.item() in [self.tokenizer.eos_token_id] + is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] if DEBUG >= 2: print(f"output_data: {output_data}\n") From f54990a121db9a52c8775cefad9dd1cb2c7aeab8 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 12:33:19 -0800 Subject: [PATCH 232/589] working on eot error --- exo/inference/pytorch/model/hf.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index e363f82b4..0c8d308ba 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -74,7 +74,10 @@ def forward_layers( print(f"hidden_states: {hidden_states}") print(f"hidden_states.size(): {hidden_states.size()}") - batch_size, seq_len = input_data.size() + if hidden_states.size == 2: + batch_size, seq_len = hidden_states.size() + else: + batch_size, seq_len = input_data.size() position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) position_embeddings = self.full_model.model.rotary_emb( From e96579f20385fc64e0cb894f9ed87d93e9974695 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 12:46:22 -0800 Subject: [PATCH 233/589] working on eot error --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 0c8d308ba..7ca4448b2 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -74,7 +74,7 @@ def forward_layers( print(f"hidden_states: {hidden_states}") print(f"hidden_states.size(): {hidden_states.size()}") - if hidden_states.size == 2: + if hidden_states.size() == 2: batch_size, seq_len = hidden_states.size() else: batch_size, seq_len = input_data.size() From 25361ecba59b9d29bd9895d6e80ca6150bf7d337 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 12:54:37 -0800 Subject: [PATCH 234/589] working on eot error --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 7ca4448b2..7127c8902 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -74,7 +74,7 @@ def forward_layers( print(f"hidden_states: {hidden_states}") print(f"hidden_states.size(): {hidden_states.size()}") - if hidden_states.size() == 2: + if len(hidden_states.size()) == 2: batch_size, seq_len = hidden_states.size() else: batch_size, seq_len = input_data.size() From 84a0323718907d0e16d137da2b8d54d24dc12b7f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 13:36:16 -0800 Subject: [PATCH 235/589] eot issue, update sampling --- exo/inference/pytorch/model/hf.py | 5 +- exo/inference/pytorch/model/utils.py | 78 +++++++++++++++++++++------- 2 files changed, 59 insertions(+), 24 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 7127c8902..e363f82b4 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -74,10 +74,7 @@ def forward_layers( print(f"hidden_states: {hidden_states}") print(f"hidden_states.size(): {hidden_states.size()}") - if len(hidden_states.size()) == 2: - batch_size, seq_len = hidden_states.size() - else: - batch_size, seq_len = input_data.size() + batch_size, seq_len = input_data.size() position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) position_embeddings = self.full_model.model.rotary_emb( diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 411dc449f..5e61d818e 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -1,36 +1,73 @@ import torch from torch.nn import functional as F -def sample_logits(logits, temp=0.0, top_k=15, top_p=0.9, alpha_f=0.1, alpha_p=0.0): - # Ensure logits is a 2D tensor - if logits.dim() == 1: - logits = logits.unsqueeze(0) +def top_p_sampling(logits, top_p: float, temperature: float = 1.0): + """ + Perform top-p sampling (nucleus sampling) on logits. + + Args: + logits (torch.Tensor): The logits distribution to sample from. + top_p (float): The cumulative probability threshold for nucleus sampling. + temperature (float): Sampling temperature. + Returns: + torch.Tensor: The selected token indices. + """ # Apply temperature scaling - if temp > 0: - logits = logits / temp + logits = logits / temperature + + # Sort the logits in descending order + sorted_logits, sorted_indices = torch.sort(logits, descending=True, dim=-1) + + # Calculate cumulative probabilities + cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1) + + # Create a mask to remove logits with cumulative probability above the threshold + sorted_indices_to_remove = cumulative_probs > top_p + sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() + sorted_indices_to_remove[..., 0] = 0 + + # Mask the logits + sorted_logits[sorted_indices_to_remove] = -float('Inf') + + # Sample from the filtered distribution + probabilities = torch.softmax(sorted_logits, dim=-1) + sampled_token = torch.multinomial(probabilities, 1) + + # Convert to original index order + return sorted_indices.gather(-1, sampled_token) + +def sample_logits(logits, temp=0.0, top_k=15, top_p=0.9, alpha_f=0.1, alpha_p=0.0): + """ + Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. - # Top-k sampling + Args: + logits (torch.Tensor): The logits distribution to sample from. + temp (float): Temperature for scaling logits. + top_k (int): The number of top tokens to consider for sampling. + top_p (float): The cumulative probability threshold for nucleus sampling. + alpha_f (float): Penalty factor for repetition. + alpha_p (float): Penalty for selecting already selected tokens. + + Returns: + torch.Tensor: The selected token indices. + """ + # Return argmax for deterministic output at low temperature + if temp < 1e-6: + return logits.argmax(dim=-1) + + # Apply Top-k sampling if specified if top_k > 0: top_k = min(top_k, logits.size(-1)) top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) logits = torch.full_like(logits, float('-inf')) logits.scatter_(-1, top_k_indices, top_k_values) - # Top-p (nucleus) sampling + # Apply Top-p (nucleus) sampling if specified if 0 < top_p < 1.0: - sorted_logits, sorted_indices = torch.sort(logits, descending=True, dim=-1) - cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) - - # Remove tokens with cumulative probability above the threshold - sorted_indices_to_remove = cumulative_probs > top_p - sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() - sorted_indices_to_remove[..., 0] = 0 + logits = top_p_sampling(logits, top_p, temp) - indices_to_remove = sorted_indices[sorted_indices_to_remove] - logits[torch.arange(logits.size(0)).unsqueeze(1), indices_to_remove] = float('-inf') - - # Alpha sampling (to discourage repetition) + # Apply alpha sampling to discourage repetition if alpha_f or alpha_p: if not hasattr(sample_logits, "alpha_counter"): sample_logits.alpha_counter = torch.zeros_like(logits, dtype=torch.int32) @@ -44,4 +81,5 @@ def sample_logits(logits, temp=0.0, top_k=15, top_p=0.9, alpha_f=0.1, alpha_p=0. if alpha_f or alpha_p: sample_logits.alpha_counter.scatter_(-1, sampled_token, sample_logits.alpha_counter.gather(-1, sampled_token) + 1) - return sampled_token.squeeze() \ No newline at end of file + return sampled_token.squeeze() + From fffc0345ceecc96b315fb998ecdd3905946b939d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 13:40:18 -0800 Subject: [PATCH 236/589] ensure 2d --- exo/inference/pytorch/inference.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 7ab4e214c..1eb59c7bb 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -84,6 +84,10 @@ async def infer_tensor( in_tensor = torch.tensor(input_data) inference_state = json.loads(inference_state) if inference_state else "" + # Ensure input_data is 2D: [batch_size, seq_len] + if input_data.dim() == 1: + input_data = input_data.unsqueeze(0) # Add a batch dimension: [1, seq_len] + if DEBUG >= 2: print("infer_tensor called") print(f"input_data: {input_data}\n") From 3fbc7d238d3540405fcb0dcf88494e2530b3c6cb Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 13:41:40 -0800 Subject: [PATCH 237/589] ensure 2d --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 1eb59c7bb..9a52bafd4 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -85,8 +85,8 @@ async def infer_tensor( inference_state = json.loads(inference_state) if inference_state else "" # Ensure input_data is 2D: [batch_size, seq_len] - if input_data.dim() == 1: - input_data = input_data.unsqueeze(0) # Add a batch dimension: [1, seq_len] + if in_tensor.dim() == 1: + in_tensor = in_tensor.unsqueeze(0) # Add a batch dimension: [1, seq_len] if DEBUG >= 2: print("infer_tensor called") From df2144e693d48af1218de7d97ed878d811a6c664 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 13:47:01 -0800 Subject: [PATCH 238/589] doing a fraction of temp for top p --- exo/inference/pytorch/model/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 5e61d818e..1a8ae7722 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -14,7 +14,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): torch.Tensor: The selected token indices. """ # Apply temperature scaling - logits = logits / temperature + logits = logits * (1/temperature) # Sort the logits in descending order sorted_logits, sorted_indices = torch.sort(logits, descending=True, dim=-1) @@ -37,7 +37,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): # Convert to original index order return sorted_indices.gather(-1, sampled_token) -def sample_logits(logits, temp=0.0, top_k=15, top_p=0.9, alpha_f=0.1, alpha_p=0.0): +def sample_logits(logits, temp=0.8, top_k=15, top_p=0.9, alpha_f=0.1, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. From 9b37ee12fce752a541f62bc888785708e7cbfc3c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 13:53:08 -0800 Subject: [PATCH 239/589] doing a fraction of temp for top p --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 1a8ae7722..8379c0259 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -14,7 +14,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): torch.Tensor: The selected token indices. """ # Apply temperature scaling - logits = logits * (1/temperature) + logits = logits/temperature # Sort the logits in descending order sorted_logits, sorted_indices = torch.sort(logits, descending=True, dim=-1) From 49eead3d2809e017486816f8adb85e61ea9e4204 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 13:55:03 -0800 Subject: [PATCH 240/589] trying other sampling --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 8379c0259..f0a403faf 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -37,7 +37,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): # Convert to original index order return sorted_indices.gather(-1, sampled_token) -def sample_logits(logits, temp=0.8, top_k=15, top_p=0.9, alpha_f=0.1, alpha_p=0.0): +def sample_logits(logits, temp=0.8, top_k=0, top_p=1.0, alpha_f=0.1, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. From 1192971f117e5c3b44fedd2c1781026a96725a03 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 13:57:33 -0800 Subject: [PATCH 241/589] trying other sampling --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index f0a403faf..92b6d35ca 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -37,7 +37,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): # Convert to original index order return sorted_indices.gather(-1, sampled_token) -def sample_logits(logits, temp=0.8, top_k=0, top_p=1.0, alpha_f=0.1, alpha_p=0.0): +def sample_logits(logits, temp=0.8, top_k=32, top_p=1.0, alpha_f=0.1, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. From 8bc583b6c52c20bbb140439ac3595c855eb2aa25 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 14:03:09 -0800 Subject: [PATCH 242/589] trying other sampling --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 92b6d35ca..59ffe6aa0 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -37,7 +37,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): # Convert to original index order return sorted_indices.gather(-1, sampled_token) -def sample_logits(logits, temp=0.8, top_k=32, top_p=1.0, alpha_f=0.1, alpha_p=0.0): +def sample_logits(logits, temp=0.9, top_k=50, top_p=0.9, alpha_f=0.1, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. From 9afe07fc2e019309ebff7981626f6bd0b6e91b15 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 14:07:58 -0800 Subject: [PATCH 243/589] trying other sampling --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 59ffe6aa0..b217ea490 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -37,7 +37,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): # Convert to original index order return sorted_indices.gather(-1, sampled_token) -def sample_logits(logits, temp=0.9, top_k=50, top_p=0.9, alpha_f=0.1, alpha_p=0.0): +def sample_logits(logits, temp=1.0, top_k=50, top_p=0.95, alpha_f=0.5, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. From 954682b0a45501dfd6881b82a5a7a50b0f8e83a8 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 14:20:16 -0800 Subject: [PATCH 244/589] trying other sampling --- exo/inference/pytorch/model/hf.py | 63 +++++++++++++++++-------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index e363f82b4..b40a3bcf7 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -22,23 +22,29 @@ def __init__(self, shard: Shard): self.config = LlamaConfig.from_pretrained(shard.model_id) self.config.use_cache = True # Enable caching + # Extract only the layers for this shard + # get layers up to end layer + self.config.config.num_hidden_layers = shard.end_layer + 1 + # Load the model self.full_model = AutoModelForCausalLM.from_pretrained( shard.model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto", - config=self.config + config={ + "use_cache" + } ) - # Extract only the layers for this shard - print(f"\nself.model: {self.full_model.model}\n") - print(f"\nlayer amount: {len(self.full_model.model.layers)}") - self.layers = [] - for i in range(shard.start_layer, shard.end_layer + 1): - # if DEBUG >= 2: - # print(f"loading layer[{i}]: {self.full_model.model.layers[i]}") + + + # self.layered_model = self.full_model.model() + # self.layers = [] + # for i in range(shard.start_layer, shard.end_layer + 1): + # # if DEBUG >= 2: + # # print(f"loading layer[{i}]: {self.full_model.model.layers[i]}") - self.layers.append(self.full_model.model.layers[i]) + # self.layers.append(self.full_model.model.layers[i]) # self.layers = torch.nn.ModuleList(layer_list) @@ -86,28 +92,27 @@ def forward_layers( print(f"embedded hidden_states {hidden_states}") print(f"position_ids: {position_embeddings}") - for i, layer in enumerate(self.layers): - # Forward pass through the layer - if DEBUG >= 2: - print(f"\n[layer {i}] {layer}") - print(f"hidden_states {hidden_states}") - print(f"past_kvs {past_kvs}") - - layer_outputs = layer( - hidden_states, - position_embeddings=position_embeddings, - past_key_values=past_kvs, - use_cache=True - ) + # Forward pass through the layer + if DEBUG >= 2: + print(f"\n[layer model] {self.full_model.model}") + print(f"hidden_states {hidden_states}") + print(f"past_kvs {past_kvs}") + + layer_outputs = self.full_model.model( + hidden_states, + position_embeddings=position_embeddings, + past_key_values=past_kvs, + use_cache=True + ) - if DEBUG >= 2: - print(f"\n[layer {i}] layer_outputs: {layer_outputs}") - - hidden_states = layer_outputs[0] - present_kvs = layer_outputs[1] + if DEBUG >= 2: + print(f"\nlayer_outputs: {layer_outputs}") + + hidden_states = layer_outputs[0] + present_kvs = layer_outputs[1] - if DEBUG >= 2: - print(f"present_kvs {present_kvs}") + if DEBUG >= 2: + print(f"present_kvs {present_kvs}") print(f"2 is_last_layer {self.shard.is_last_layer()}") if self.shard.is_last_layer(): From 0547fa37a65db0336a5b338ca10528225ee37e04 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 14:29:09 -0800 Subject: [PATCH 245/589] fix typo --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index b40a3bcf7..436653e7a 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -24,7 +24,7 @@ def __init__(self, shard: Shard): # Extract only the layers for this shard # get layers up to end layer - self.config.config.num_hidden_layers = shard.end_layer + 1 + self.config.num_hidden_layers = shard.end_layer + 1 # Load the model self.full_model = AutoModelForCausalLM.from_pretrained( From 88c94a86d6e64cbb9bf297aa08257d2966bcdc30 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 14:31:30 -0800 Subject: [PATCH 246/589] use input embeds --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 436653e7a..1136d1724 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -100,7 +100,7 @@ def forward_layers( layer_outputs = self.full_model.model( hidden_states, - position_embeddings=position_embeddings, + inputs_embeds=position_embeddings, past_key_values=past_kvs, use_cache=True ) From 4160c40f10df1eac78065534a1c03de284220d4b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 14:34:32 -0800 Subject: [PATCH 247/589] use position ids --- exo/inference/pytorch/model/hf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 1136d1724..802fc4385 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -100,7 +100,8 @@ def forward_layers( layer_outputs = self.full_model.model( hidden_states, - inputs_embeds=position_embeddings, + position_ids=position_ids + # inputs_embeds=position_embeddings, past_key_values=past_kvs, use_cache=True ) From a6e3c15d92cb0778c94e530d435e7a9d92b323f2 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 14:34:58 -0800 Subject: [PATCH 248/589] use position ids --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 802fc4385..04946d49c 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -100,7 +100,7 @@ def forward_layers( layer_outputs = self.full_model.model( hidden_states, - position_ids=position_ids + position_ids=position_ids, # inputs_embeds=position_embeddings, past_key_values=past_kvs, use_cache=True From 6f3435e9c2dde1313a4645184a0cf7041ac34bbf Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 14:37:17 -0800 Subject: [PATCH 249/589] use position ids --- exo/inference/pytorch/model/hf.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 04946d49c..5e90b2067 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -70,27 +70,27 @@ def forward_layers( hidden_states = input_data position_ids = None - position_embeddings = None + # position_embeddings = None present_kvs = DynamicCache() - if self.shard.is_first_layer(): - hidden_states = self.embed_tokens(hidden_states) + # if self.shard.is_first_layer(): + # hidden_states = self.embed_tokens(hidden_states) - if DEBUG >= 2: - print(f"hidden_states: {hidden_states}") - print(f"hidden_states.size(): {hidden_states.size()}") + # if DEBUG >= 2: + # print(f"hidden_states: {hidden_states}") + # print(f"hidden_states.size(): {hidden_states.size()}") - batch_size, seq_len = input_data.size() - position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) + # batch_size, seq_len = input_data.size() + # position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) - position_embeddings = self.full_model.model.rotary_emb( - hidden_states, - position_ids - ) + # position_embeddings = self.full_model.model.rotary_emb( + # hidden_states, + # position_ids + # ) - if DEBUG >= 2: - print(f"embedded hidden_states {hidden_states}") - print(f"position_ids: {position_embeddings}") + # if DEBUG >= 2: + # print(f"embedded hidden_states {hidden_states}") + # print(f"position_ids: {position_embeddings}") # Forward pass through the layer if DEBUG >= 2: From 157d0ddb0492a5bb28d487d54b17a8b820063726 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 14:37:32 -0800 Subject: [PATCH 250/589] rmv embed fix --- exo/inference/pytorch/model/hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 5e90b2067..60a1b52f0 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -69,7 +69,7 @@ def forward_layers( print(f"1 shard {self.shard.to_dict()}") hidden_states = input_data - position_ids = None + # position_ids = None # position_embeddings = None present_kvs = DynamicCache() @@ -100,7 +100,7 @@ def forward_layers( layer_outputs = self.full_model.model( hidden_states, - position_ids=position_ids, + # position_ids=position_ids, # inputs_embeds=position_embeddings, past_key_values=past_kvs, use_cache=True From 0d12ef5175dc86195e9387f4b6f69e5b93e02010 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 14:42:09 -0800 Subject: [PATCH 251/589] layer test --- exo/inference/pytorch/model/hf.py | 42 +++---------------------------- 1 file changed, 3 insertions(+), 39 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 60a1b52f0..7f88db049 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -24,7 +24,7 @@ def __init__(self, shard: Shard): # Extract only the layers for this shard # get layers up to end layer - self.config.num_hidden_layers = shard.end_layer + 1 + self.config.num_hidden_layers = 2 # Load the model self.full_model = AutoModelForCausalLM.from_pretrained( @@ -35,18 +35,6 @@ def __init__(self, shard: Shard): "use_cache" } ) - - - - # self.layered_model = self.full_model.model() - # self.layers = [] - # for i in range(shard.start_layer, shard.end_layer + 1): - # # if DEBUG >= 2: - # # print(f"loading layer[{i}]: {self.full_model.model.layers[i]}") - - # self.layers.append(self.full_model.model.layers[i]) - - # self.layers = torch.nn.ModuleList(layer_list) # Embeddings and final layer norm # used for doing what forward LlamaModel does in transformers @@ -66,37 +54,16 @@ def forward_layers( if DEBUG >= 2: print("forward_layer call") print(f"input_data: {input_data}") - print(f"1 shard {self.shard.to_dict()}") + print(f"shard {self.shard.to_dict()}") hidden_states = input_data - # position_ids = None - # position_embeddings = None present_kvs = DynamicCache() - # if self.shard.is_first_layer(): - # hidden_states = self.embed_tokens(hidden_states) - - # if DEBUG >= 2: - # print(f"hidden_states: {hidden_states}") - # print(f"hidden_states.size(): {hidden_states.size()}") - - # batch_size, seq_len = input_data.size() - # position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) - - # position_embeddings = self.full_model.model.rotary_emb( - # hidden_states, - # position_ids - # ) - - # if DEBUG >= 2: - # print(f"embedded hidden_states {hidden_states}") - # print(f"position_ids: {position_embeddings}") - # Forward pass through the layer if DEBUG >= 2: print(f"\n[layer model] {self.full_model.model}") print(f"hidden_states {hidden_states}") - print(f"past_kvs {past_kvs}") + # print(f"past_kvs {past_kvs}") layer_outputs = self.full_model.model( hidden_states, @@ -112,9 +79,6 @@ def forward_layers( hidden_states = layer_outputs[0] present_kvs = layer_outputs[1] - if DEBUG >= 2: - print(f"present_kvs {present_kvs}") - print(f"2 is_last_layer {self.shard.is_last_layer()}") if self.shard.is_last_layer(): hs_norm = self.norm(hidden_states) From 73fc9bf4f083626ce243d45793e33bb77888bb26 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 14:46:38 -0800 Subject: [PATCH 252/589] layer test --- exo/inference/pytorch/model/hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 7f88db049..74b0cf4fe 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -76,8 +76,8 @@ def forward_layers( if DEBUG >= 2: print(f"\nlayer_outputs: {layer_outputs}") - hidden_states = layer_outputs[0] - present_kvs = layer_outputs[1] + hidden_states = layer_outputs.last_hidden_state + present_kvs = layer_outputs.past_key_values print(f"2 is_last_layer {self.shard.is_last_layer()}") if self.shard.is_last_layer(): From 2cdf81305d709566d05aa7308473d804e0cc7daf Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 14:51:51 -0800 Subject: [PATCH 253/589] layer test --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 74b0cf4fe..37148711b 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -73,7 +73,7 @@ def forward_layers( use_cache=True ) - if DEBUG >= 2: + if DEBUG >= 4: print(f"\nlayer_outputs: {layer_outputs}") hidden_states = layer_outputs.last_hidden_state From 0b3ec2cc7d33a8dab4197862b7f1b8b831922192 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 15:01:17 -0800 Subject: [PATCH 254/589] layer test --- exo/inference/pytorch/inference.py | 4 ++-- exo/inference/pytorch/model/hf.py | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 9a52bafd4..98fe9cc18 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -106,7 +106,7 @@ async def infer_tensor( print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") print(f"output_data.item() {output_data.item()}") - print(f"inference_state: {inference_state}") + print(f"inference_state: {inference_state.size()}") print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") print(f"output_data[-1] {output_data[-1]}") @@ -114,7 +114,7 @@ async def infer_tensor( return ( output_data, - json.dumps(inference_state), + json.dumps(inference_state.cpu().numpy()), is_finished ) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 37148711b..d15c85bd5 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -27,13 +27,10 @@ def __init__(self, shard: Shard): self.config.num_hidden_layers = 2 # Load the model - self.full_model = AutoModelForCausalLM.from_pretrained( + self.full_model = AutoModelForCausalLM(self.config).from_pretrained( shard.model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, - device_map="auto", - config={ - "use_cache" - } + device_map="auto" ) # Embeddings and final layer norm From fb1e0a6bfb09b06adb2927e50d804e2ac1112884 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 15:04:36 -0800 Subject: [PATCH 255/589] layer test --- exo/inference/pytorch/model/hf.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index d15c85bd5..60340edf7 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -27,12 +27,14 @@ def __init__(self, shard: Shard): self.config.num_hidden_layers = 2 # Load the model - self.full_model = AutoModelForCausalLM(self.config).from_pretrained( + self.full_model = AutoModelForCausalLM.from_pretrained( shard.model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) + self.full_model.config = self.config + # Embeddings and final layer norm # used for doing what forward LlamaModel does in transformers self.embed_tokens = self.full_model.model.embed_tokens From f662ad39a30064bbd1e8e10e59d2c59c6c4b2c41 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 15:08:10 -0800 Subject: [PATCH 256/589] layer test --- exo/inference/pytorch/inference.py | 8 ++++---- exo/inference/pytorch/model/utils.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 98fe9cc18..7540cbaa2 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -41,7 +41,7 @@ async def infer_prompt( await self.ensure_shard(shard) - inference_state = json.loads(inference_state) if inference_state else "" + inference_state = json.loads(torch.tensor(inference_state)) if inference_state else "" tokens = self.tokenizer.encode(prompt, return_tensors="pt") if DEBUG >= 2: @@ -62,7 +62,7 @@ async def infer_prompt( print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") print(f"output_data.item() {output_data.item()}") - print(f"inference_state: {inference_state}") + print(f"inference_state: {inference_state.size()}") print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") print(f"output_data[-1] {output_data[-1]}") @@ -70,7 +70,7 @@ async def infer_prompt( return ( output_data, - json.dumps(inference_state), + json.dumps(inference_state.cpu().numpy()), is_finished ) @@ -82,7 +82,7 @@ async def infer_tensor( inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: in_tensor = torch.tensor(input_data) - inference_state = json.loads(inference_state) if inference_state else "" + inference_state = json.loads(torch.tensor(inference_state)) if inference_state else "" # Ensure input_data is 2D: [batch_size, seq_len] if in_tensor.dim() == 1: diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index b217ea490..581a94b58 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -37,7 +37,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): # Convert to original index order return sorted_indices.gather(-1, sampled_token) -def sample_logits(logits, temp=1.0, top_k=50, top_p=0.95, alpha_f=0.5, alpha_p=0.0): +def sample_logits(logits, temp=1.0, top_k=20, top_p=0.95, alpha_f=0.1, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. From 1b27532d5edda06aa24869e435a49bb33a60d822 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 16:00:36 -0800 Subject: [PATCH 257/589] layer test --- exo/inference/pytorch/inference.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 7540cbaa2..8b3a73f92 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -10,6 +10,7 @@ from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel from exo.api.chatgpt_api import resolve_tokenizer from exo.helpers import DEBUG +from transformers import DynamicCache class PyTorchDynamicShardInferenceEngine(InferenceEngine): """ @@ -41,7 +42,14 @@ async def infer_prompt( await self.ensure_shard(shard) - inference_state = json.loads(torch.tensor(inference_state)) if inference_state else "" + # need to make this so inference_state is not a string + if inference_state: + inference_state = DynamicCache.from_legacy_cache( + json.loads(torch.tensor(inference_state)) + ) + else: + inference_state = DynamicCache() + tokens = self.tokenizer.encode(prompt, return_tensors="pt") if DEBUG >= 2: From 0876c79be0823bf315da06ee60638f652313aab6 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 16:02:48 -0800 Subject: [PATCH 258/589] layer test --- exo/inference/pytorch/inference.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 8b3a73f92..9b8813534 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -70,7 +70,7 @@ async def infer_prompt( print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") print(f"output_data.item() {output_data.item()}") - print(f"inference_state: {inference_state.size()}") + print(f"inference_state.get_max_length(): {inference_state.get_max_length()}") print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") print(f"output_data[-1] {output_data[-1]}") @@ -90,7 +90,13 @@ async def infer_tensor( inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: in_tensor = torch.tensor(input_data) - inference_state = json.loads(torch.tensor(inference_state)) if inference_state else "" + + if inference_state: + inference_state = DynamicCache.from_legacy_cache( + json.loads(torch.tensor(inference_state)) + ) + else: + inference_state = DynamicCache() # Ensure input_data is 2D: [batch_size, seq_len] if in_tensor.dim() == 1: @@ -114,7 +120,7 @@ async def infer_tensor( print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") print(f"output_data.item() {output_data.item()}") - print(f"inference_state: {inference_state.size()}") + print(f"inference_state.get_max_length(): {inference_state.get_max_length()}") print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") print(f"output_data[-1] {output_data[-1]}") From e5c56c4a4ee354c8ce06e2424cf9f1844054450c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 16:12:40 -0800 Subject: [PATCH 259/589] layer test --- exo/inference/pytorch/inference.py | 34 ++++++++---------------------- exo/inference/pytorch/model/hf.py | 14 ++++++------ 2 files changed, 16 insertions(+), 32 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 9b8813534..a8c72f27c 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -43,21 +43,15 @@ async def infer_prompt( await self.ensure_shard(shard) # need to make this so inference_state is not a string - if inference_state: - inference_state = DynamicCache.from_legacy_cache( - json.loads(torch.tensor(inference_state)) - ) - else: - inference_state = DynamicCache() - + # cant use it with dynamic cache + tokens = self.tokenizer.encode(prompt, return_tensors="pt") if DEBUG >= 2: print(f"tokens: {tokens}\n") - output_data, inference_state = self.model.forward_layers( - tokens, - inference_state + output_data = self.model.forward_layers( + tokens ) is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] @@ -70,7 +64,6 @@ async def infer_prompt( print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") print(f"output_data.item() {output_data.item()}") - print(f"inference_state.get_max_length(): {inference_state.get_max_length()}") print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") print(f"output_data[-1] {output_data[-1]}") @@ -78,7 +71,7 @@ async def infer_prompt( return ( output_data, - json.dumps(inference_state.cpu().numpy()), + "", is_finished ) @@ -90,14 +83,7 @@ async def infer_tensor( inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: in_tensor = torch.tensor(input_data) - - if inference_state: - inference_state = DynamicCache.from_legacy_cache( - json.loads(torch.tensor(inference_state)) - ) - else: - inference_state = DynamicCache() - + # Ensure input_data is 2D: [batch_size, seq_len] if in_tensor.dim() == 1: in_tensor = in_tensor.unsqueeze(0) # Add a batch dimension: [1, seq_len] @@ -109,9 +95,8 @@ async def infer_tensor( await self.ensure_shard(shard) - output_data, inference_state = self.model.forward_layers( - in_tensor, - inference_state + output_data = self.model.forward_layers( + in_tensor ) is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] @@ -120,7 +105,6 @@ async def infer_tensor( print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") print(f"output_data.item() {output_data.item()}") - print(f"inference_state.get_max_length(): {inference_state.get_max_length()}") print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") print(f"output_data[-1] {output_data[-1]}") @@ -128,7 +112,7 @@ async def infer_tensor( return ( output_data, - json.dumps(inference_state.cpu().numpy()), + "", is_finished ) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 60340edf7..ca1c64165 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -40,10 +40,11 @@ def __init__(self, shard: Shard): self.embed_tokens = self.full_model.model.embed_tokens self.norm = self.full_model.model.norm + self.past_key_values = DynamicCache() + def forward_layers( self, - input_data: torch.tensor, - past_kvs: Cache = DynamicCache() + input_data: torch.tensor ) -> Tuple[np.ndarray, list]: """ Forward pass through the specified layers. @@ -56,7 +57,6 @@ def forward_layers( print(f"shard {self.shard.to_dict()}") hidden_states = input_data - present_kvs = DynamicCache() # Forward pass through the layer if DEBUG >= 2: @@ -68,7 +68,7 @@ def forward_layers( hidden_states, # position_ids=position_ids, # inputs_embeds=position_embeddings, - past_key_values=past_kvs, + past_key_values=self.past_key_values, use_cache=True ) @@ -76,7 +76,7 @@ def forward_layers( print(f"\nlayer_outputs: {layer_outputs}") hidden_states = layer_outputs.last_hidden_state - present_kvs = layer_outputs.past_key_values + self.past_key_values = layer_outputs.past_key_values print(f"2 is_last_layer {self.shard.is_last_layer()}") if self.shard.is_last_layer(): @@ -92,6 +92,6 @@ def forward_layers( print(f"hs_lm_head: {hs_lm_head}") print(f"output_token: {output_token}") - return (output_token, present_kvs) + return output_token - return (hidden_states.cpu().numpy(), present_kvs) + return hidden_states.cpu().numpy() From 696c3bb55dc95f15ad0e34b235e7bfe7cf574414 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 16:16:11 -0800 Subject: [PATCH 260/589] layer test --- exo/inference/pytorch/model/hf.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index ca1c64165..d0108c5bb 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -40,7 +40,7 @@ def __init__(self, shard: Shard): self.embed_tokens = self.full_model.model.embed_tokens self.norm = self.full_model.model.norm - self.past_key_values = DynamicCache() + # self.past_key_values = DynamicCache() def forward_layers( self, @@ -68,15 +68,15 @@ def forward_layers( hidden_states, # position_ids=position_ids, # inputs_embeds=position_embeddings, - past_key_values=self.past_key_values, - use_cache=True + # past_key_values=self.past_key_values, + # use_cache=True # not enough vram for using cache ;_; ) - if DEBUG >= 4: + if DEBUG >= 2: print(f"\nlayer_outputs: {layer_outputs}") hidden_states = layer_outputs.last_hidden_state - self.past_key_values = layer_outputs.past_key_values + # self.past_key_values = layer_outputs.past_key_values print(f"2 is_last_layer {self.shard.is_last_layer()}") if self.shard.is_last_layer(): From e807a6376907b10c4de88c179a8c29c945808623 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 16:16:34 -0800 Subject: [PATCH 261/589] layer test --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 581a94b58..2a80260a7 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -37,7 +37,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): # Convert to original index order return sorted_indices.gather(-1, sampled_token) -def sample_logits(logits, temp=1.0, top_k=20, top_p=0.95, alpha_f=0.1, alpha_p=0.0): +def sample_logits(logits, temp=0.6, top_k=20, top_p=0.95, alpha_f=0.1, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. From 0b589ee2c4c3a8b42a5f576cc6de0f774a97137c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 16:17:48 -0800 Subject: [PATCH 262/589] layer test --- exo/inference/pytorch/model/hf.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index d0108c5bb..cd81588ef 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -18,13 +18,7 @@ def __init__(self, shard: Shard): self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.shard = shard - # Load the model with the configuration for caching - self.config = LlamaConfig.from_pretrained(shard.model_id) - self.config.use_cache = True # Enable caching - - # Extract only the layers for this shard - # get layers up to end layer - self.config.num_hidden_layers = 2 + # Load the model self.full_model = AutoModelForCausalLM.from_pretrained( @@ -33,7 +27,15 @@ def __init__(self, shard: Shard): device_map="auto" ) - self.full_model.config = self.config + # set model config to restrict layers and enable caching + self.config = LlamaConfig.from_pretrained(shard.model_id) + # self.config.use_cache = True # Enable caching + + # Extract only the layers for this shard + # get layers up to end layer + self.config.num_hidden_layers = 2 + + self.full_model.model.config = self.config # Embeddings and final layer norm # used for doing what forward LlamaModel does in transformers From ced3879daf6eb6bd185ea72a46e9ee2ba441ca61 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 16:25:10 -0800 Subject: [PATCH 263/589] layer test --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 2a80260a7..1073b82f6 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -37,7 +37,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): # Convert to original index order return sorted_indices.gather(-1, sampled_token) -def sample_logits(logits, temp=0.6, top_k=20, top_p=0.95, alpha_f=0.1, alpha_p=0.0): +def sample_logits(logits, temp=0.0, top_k=15, top_p=0.8, alpha_f=0.1, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. From c6693a8673ea2aaa6449493c0503e6cd68c29912 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 16:25:18 -0800 Subject: [PATCH 264/589] layer test --- exo/inference/pytorch/model/hf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index cd81588ef..258daf5b6 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -34,8 +34,9 @@ def __init__(self, shard: Shard): # Extract only the layers for this shard # get layers up to end layer self.config.num_hidden_layers = 2 - self.full_model.model.config = self.config + if DEBUG >= 2: + print(f"full_model.model layer: {len(self.full_model.layers)}") # Embeddings and final layer norm # used for doing what forward LlamaModel does in transformers From 30e971dccf569a275a7f4f7472f9bec443fd7a61 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 16:26:10 -0800 Subject: [PATCH 265/589] layer test --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 258daf5b6..4a04e132c 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -36,7 +36,7 @@ def __init__(self, shard: Shard): self.config.num_hidden_layers = 2 self.full_model.model.config = self.config if DEBUG >= 2: - print(f"full_model.model layer: {len(self.full_model.layers)}") + print(f"full_model.model layer: {len(self.full_model.model.layers)}") # Embeddings and final layer norm # used for doing what forward LlamaModel does in transformers From 8b4e62492df516de2814446162b8f9b6ce5b2ffc Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 16:33:21 -0800 Subject: [PATCH 266/589] fixing layer issue --- exo/inference/pytorch/model/hf.py | 20 ++++++++++++-------- exo/inference/pytorch/model/utils.py | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 4a04e132c..8a769c4ed 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -27,14 +27,18 @@ def __init__(self, shard: Shard): device_map="auto" ) - # set model config to restrict layers and enable caching - self.config = LlamaConfig.from_pretrained(shard.model_id) - # self.config.use_cache = True # Enable caching - - # Extract only the layers for this shard - # get layers up to end layer - self.config.num_hidden_layers = 2 - self.full_model.model.config = self.config + # using llamaconfig not working setting layers manually + layers = [] + for i in range(shard.start_layer, shard.end_layer + 1): + layer = self.full_model.model.layers[i] + + if DEBUG >= 2: + print(f"Loading layers[{i}]") + + layers.append(layer) + + self.full_model.model.layer = layers + if DEBUG >= 2: print(f"full_model.model layer: {len(self.full_model.model.layers)}") diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 1073b82f6..9f034cccd 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -37,7 +37,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): # Convert to original index order return sorted_indices.gather(-1, sampled_token) -def sample_logits(logits, temp=0.0, top_k=15, top_p=0.8, alpha_f=0.1, alpha_p=0.0): +def sample_logits(logits, temp=0.1, top_k=15, top_p=0.8, alpha_f=0.2, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. From bcb499e86962492cbf8e842fc4671456359fa549 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 16:35:24 -0800 Subject: [PATCH 267/589] temp and layer test --- exo/inference/pytorch/model/hf.py | 2 +- exo/inference/pytorch/model/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 8a769c4ed..5bf3f19be 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -29,7 +29,7 @@ def __init__(self, shard: Shard): # using llamaconfig not working setting layers manually layers = [] - for i in range(shard.start_layer, shard.end_layer + 1): + for i in range(shard.start_layer, 2): layer = self.full_model.model.layers[i] if DEBUG >= 2: diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 9f034cccd..ecc5c6a7e 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -37,7 +37,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): # Convert to original index order return sorted_indices.gather(-1, sampled_token) -def sample_logits(logits, temp=0.1, top_k=15, top_p=0.8, alpha_f=0.2, alpha_p=0.0): +def sample_logits(logits, temp=0.01, top_k=15, top_p=0.8, alpha_f=0.2, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. From 724c6c437d730c7c88cf71f648c1cc4a677dd06c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 16:40:42 -0800 Subject: [PATCH 268/589] temp and layer test --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 5bf3f19be..2c47ee612 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -37,7 +37,7 @@ def __init__(self, shard: Shard): layers.append(layer) - self.full_model.model.layer = layers + self.full_model.model.layers = layers if DEBUG >= 2: print(f"full_model.model layer: {len(self.full_model.model.layers)}") From e23f3f7df1aee472fc09819cb5b3055febd434a1 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 16:44:32 -0800 Subject: [PATCH 269/589] temp and layer test --- exo/inference/pytorch/model/hf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 2c47ee612..0a1153a35 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -1,4 +1,5 @@ import torch +import torch.nn as nn import numpy as np from transformers import AutoModelForCausalLM, LlamaConfig, DynamicCache, Cache @@ -37,7 +38,7 @@ def __init__(self, shard: Shard): layers.append(layer) - self.full_model.model.layers = layers + self.full_model.model.layers = nn.ModuleList(layers) if DEBUG >= 2: print(f"full_model.model layer: {len(self.full_model.model.layers)}") From 7f13a6d7c630f9a74a9ef854fb819c2ab60ffd08 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 16:49:00 -0800 Subject: [PATCH 270/589] temp and layer test --- exo/inference/pytorch/model/hf.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 0a1153a35..f85ce70bc 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -30,7 +30,7 @@ def __init__(self, shard: Shard): # using llamaconfig not working setting layers manually layers = [] - for i in range(shard.start_layer, 2): + for i in range(5, 10): layer = self.full_model.model.layers[i] if DEBUG >= 2: @@ -69,7 +69,7 @@ def forward_layers( # Forward pass through the layer if DEBUG >= 2: print(f"\n[layer model] {self.full_model.model}") - print(f"hidden_states {hidden_states}") + print(f"IN hidden_states {hidden_states}") # print(f"past_kvs {past_kvs}") layer_outputs = self.full_model.model( @@ -81,7 +81,8 @@ def forward_layers( ) if DEBUG >= 2: - print(f"\nlayer_outputs: {layer_outputs}") + print(f"OUT hidden_states {hidden_states}") + # print(f"\nlayer_outputs: {layer_outputs}") hidden_states = layer_outputs.last_hidden_state # self.past_key_values = layer_outputs.past_key_values From ec92328d5e8c82bff75e2f1f0f4cda031fea5e4d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 16:52:46 -0800 Subject: [PATCH 271/589] temp and layer test --- exo/inference/pytorch/model/hf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index f85ce70bc..9343c70ca 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -74,6 +74,7 @@ def forward_layers( layer_outputs = self.full_model.model( hidden_states, + past_key_values=None # position_ids=position_ids, # inputs_embeds=position_embeddings, # past_key_values=self.past_key_values, From fc3d2248be6401890b45a706c46e8ce1cb20c743 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 16:58:49 -0800 Subject: [PATCH 272/589] temp and layer test --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 9343c70ca..28ecd3c02 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -74,7 +74,7 @@ def forward_layers( layer_outputs = self.full_model.model( hidden_states, - past_key_values=None + layer_idx=5 # position_ids=position_ids, # inputs_embeds=position_embeddings, # past_key_values=self.past_key_values, From f14a3397b3d2ba8a37d062808706aa6f1193c688 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:00:02 -0800 Subject: [PATCH 273/589] temp and layer test --- exo/inference/pytorch/model/hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 28ecd3c02..755a2fe84 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -72,9 +72,9 @@ def forward_layers( print(f"IN hidden_states {hidden_states}") # print(f"past_kvs {past_kvs}") + self.full_model.model.layer_idx = 5 layer_outputs = self.full_model.model( - hidden_states, - layer_idx=5 + hidden_states # position_ids=position_ids, # inputs_embeds=position_embeddings, # past_key_values=self.past_key_values, From 4f4a9e11f80846cadfdcaf6666241329ec36162c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:07:39 -0800 Subject: [PATCH 274/589] temp and layer test --- exo/inference/pytorch/model/hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 755a2fe84..f8964a70a 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -74,11 +74,11 @@ def forward_layers( self.full_model.model.layer_idx = 5 layer_outputs = self.full_model.model( - hidden_states + hidden_states, # position_ids=position_ids, # inputs_embeds=position_embeddings, # past_key_values=self.past_key_values, - # use_cache=True # not enough vram for using cache ;_; + use_cache=False # not enough vram for using cache ;_; ) if DEBUG >= 2: From 3da44b3c70785131102c451eaf00330d0089e4a8 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:09:28 -0800 Subject: [PATCH 275/589] change temp --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index ecc5c6a7e..d25de200e 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -37,7 +37,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): # Convert to original index order return sorted_indices.gather(-1, sampled_token) -def sample_logits(logits, temp=0.01, top_k=15, top_p=0.8, alpha_f=0.2, alpha_p=0.0): +def sample_logits(logits, temp=0.001, top_k=15, top_p=0.8, alpha_f=0.2, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. From 0a4a0038f36b1abe52c41282888f1e812d3f4435 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:10:54 -0800 Subject: [PATCH 276/589] change temp and alpha --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index d25de200e..e47c50df5 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -37,7 +37,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): # Convert to original index order return sorted_indices.gather(-1, sampled_token) -def sample_logits(logits, temp=0.001, top_k=15, top_p=0.8, alpha_f=0.2, alpha_p=0.0): +def sample_logits(logits, temp=0.001, top_k=15, top_p=0.8, alpha_f=0.0, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. From e351501f33fd0a4ff474068da77ced77296dce79 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:15:12 -0800 Subject: [PATCH 277/589] change temp and alpha --- exo/inference/pytorch/model/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index e47c50df5..510b951ff 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -37,7 +37,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): # Convert to original index order return sorted_indices.gather(-1, sampled_token) -def sample_logits(logits, temp=0.001, top_k=15, top_p=0.8, alpha_f=0.0, alpha_p=0.0): +def sample_logits(logits, temp=0.001, top_k=15, top_p=0.8, alpha_f=0.1, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. @@ -68,7 +68,7 @@ def sample_logits(logits, temp=0.001, top_k=15, top_p=0.8, alpha_f=0.0, alpha_p= logits = top_p_sampling(logits, top_p, temp) # Apply alpha sampling to discourage repetition - if alpha_f or alpha_p: + if alpha_f > 0.0 or alpha_p > 0.0: if not hasattr(sample_logits, "alpha_counter"): sample_logits.alpha_counter = torch.zeros_like(logits, dtype=torch.int32) logits = logits - (sample_logits.alpha_counter * alpha_f + (sample_logits.alpha_counter > 0) * alpha_p) @@ -78,7 +78,7 @@ def sample_logits(logits, temp=0.001, top_k=15, top_p=0.8, alpha_f=0.0, alpha_p= sampled_token = torch.multinomial(probabilities, 1) # Update alpha counter - if alpha_f or alpha_p: + if alpha_f > 0.0 or alpha_p > 0.0: sample_logits.alpha_counter.scatter_(-1, sampled_token, sample_logits.alpha_counter.gather(-1, sampled_token) + 1) return sampled_token.squeeze() From 325156741e55848681655b0f19ff856d41353d5f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:16:25 -0800 Subject: [PATCH 278/589] change temp --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 510b951ff..b043a45b3 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -37,7 +37,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): # Convert to original index order return sorted_indices.gather(-1, sampled_token) -def sample_logits(logits, temp=0.001, top_k=15, top_p=0.8, alpha_f=0.1, alpha_p=0.0): +def sample_logits(logits, temp=0.0, top_k=15, top_p=0.8, alpha_f=0.1, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. From 16e4f7ec5f33ddc7f27802e509c3fe4cce87ce1e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:17:35 -0800 Subject: [PATCH 279/589] change temp --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index b043a45b3..b8426e51e 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -37,7 +37,7 @@ def top_p_sampling(logits, top_p: float, temperature: float = 1.0): # Convert to original index order return sorted_indices.gather(-1, sampled_token) -def sample_logits(logits, temp=0.0, top_k=15, top_p=0.8, alpha_f=0.1, alpha_p=0.0): +def sample_logits(logits, temp=0.3, top_k=85, top_p=2.0, alpha_f=0.1, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. From 608392765854c5986b772b24a47286be384f5ac2 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:19:55 -0800 Subject: [PATCH 280/589] change temp --- exo/inference/pytorch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index f8964a70a..bbfa0d906 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -30,7 +30,7 @@ def __init__(self, shard: Shard): # using llamaconfig not working setting layers manually layers = [] - for i in range(5, 10): + for i in range(shard.start_layer, shard.end_layer + 1): layer = self.full_model.model.layers[i] if DEBUG >= 2: From 5b02fd1d2f621dce1bd59e95ef7fb4151d80cae4 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:30:31 -0800 Subject: [PATCH 281/589] change sampling --- exo/inference/pytorch/model/utils.py | 142 ++++++++++++++++++--------- 1 file changed, 94 insertions(+), 48 deletions(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index b8426e51e..b2ea5847f 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -1,60 +1,108 @@ import torch from torch.nn import functional as F -def top_p_sampling(logits, top_p: float, temperature: float = 1.0): - """ - Perform top-p sampling (nucleus sampling) on logits. +# def top_p_sampling(logits, top_p: float, temperature: float = 1.0): +# """ +# Perform top-p sampling (nucleus sampling) on logits. - Args: - logits (torch.Tensor): The logits distribution to sample from. - top_p (float): The cumulative probability threshold for nucleus sampling. - temperature (float): Sampling temperature. +# Args: +# logits (torch.Tensor): The logits distribution to sample from. +# top_p (float): The cumulative probability threshold for nucleus sampling. +# temperature (float): Sampling temperature. - Returns: - torch.Tensor: The selected token indices. - """ - # Apply temperature scaling - logits = logits/temperature +# Returns: +# torch.Tensor: The selected token indices. +# """ +# # Apply temperature scaling +# logits = logits/temperature - # Sort the logits in descending order - sorted_logits, sorted_indices = torch.sort(logits, descending=True, dim=-1) +# # Sort the logits in descending order +# sorted_logits, sorted_indices = torch.sort(logits, descending=True, dim=-1) - # Calculate cumulative probabilities - cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1) +# # Calculate cumulative probabilities +# cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1) - # Create a mask to remove logits with cumulative probability above the threshold - sorted_indices_to_remove = cumulative_probs > top_p - sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() - sorted_indices_to_remove[..., 0] = 0 +# # Create a mask to remove logits with cumulative probability above the threshold +# sorted_indices_to_remove = cumulative_probs > top_p +# sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() +# sorted_indices_to_remove[..., 0] = 0 - # Mask the logits - sorted_logits[sorted_indices_to_remove] = -float('Inf') +# # Mask the logits +# sorted_logits[sorted_indices_to_remove] = -float('Inf') - # Sample from the filtered distribution - probabilities = torch.softmax(sorted_logits, dim=-1) - sampled_token = torch.multinomial(probabilities, 1) +# # Sample from the filtered distribution +# probabilities = torch.softmax(sorted_logits, dim=-1) +# sampled_token = torch.multinomial(probabilities, 1) - # Convert to original index order - return sorted_indices.gather(-1, sampled_token) +# # Convert to original index order +# return sorted_indices.gather(-1, sampled_token) + +# def sample_logits(logits, temp=0.3, top_k=85, top_p=2.0, alpha_f=0.1, alpha_p=0.0): +# """ +# Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. + +# Args: +# logits (torch.Tensor): The logits distribution to sample from. +# temp (float): Temperature for scaling logits. +# top_k (int): The number of top tokens to consider for sampling. +# top_p (float): The cumulative probability threshold for nucleus sampling. +# alpha_f (float): Penalty factor for repetition. +# alpha_p (float): Penalty for selecting already selected tokens. + +# Returns: +# torch.Tensor: The selected token indices. +# """ +# # Return argmax for deterministic output at low temperature +# if temp < 1e-6: +# return logits.argmax(dim=-1) + +# # Apply Top-k sampling if specified +# if top_k > 0: +# top_k = min(top_k, logits.size(-1)) +# top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) +# logits = torch.full_like(logits, float('-inf')) +# logits.scatter_(-1, top_k_indices, top_k_values) -def sample_logits(logits, temp=0.3, top_k=85, top_p=2.0, alpha_f=0.1, alpha_p=0.0): +# # Apply Top-p (nucleus) sampling if specified +# if 0 < top_p < 1.0: +# logits = top_p_sampling(logits, top_p, temp) + +# # Apply alpha sampling to discourage repetition +# if alpha_f > 0.0 or alpha_p > 0.0: +# if not hasattr(sample_logits, "alpha_counter"): +# sample_logits.alpha_counter = torch.zeros_like(logits, dtype=torch.int32) +# logits = logits - (sample_logits.alpha_counter * alpha_f + (sample_logits.alpha_counter > 0) * alpha_p) + +# # Sample from the logits +# probabilities = F.softmax(logits, dim=-1) +# sampled_token = torch.multinomial(probabilities, 1) + +# # Update alpha counter +# if alpha_f > 0.0 or alpha_p > 0.0: +# sample_logits.alpha_counter.scatter_(-1, sampled_token, sample_logits.alpha_counter.gather(-1, sampled_token) + 1) + +# return sampled_token.squeeze() + +def sample_logits(logits, temperature=1.0, top_k=50, top_p=0.95): """ - Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. + Sample tokens from logits using temperature, top-k, and top-p (nucleus) sampling. Args: logits (torch.Tensor): The logits distribution to sample from. - temp (float): Temperature for scaling logits. + temperature (float): Temperature for scaling logits. top_k (int): The number of top tokens to consider for sampling. top_p (float): The cumulative probability threshold for nucleus sampling. - alpha_f (float): Penalty factor for repetition. - alpha_p (float): Penalty for selecting already selected tokens. Returns: - torch.Tensor: The selected token indices. + torch.Tensor: The selected token index. """ - # Return argmax for deterministic output at low temperature - if temp < 1e-6: - return logits.argmax(dim=-1) + + # Ensure logits are in a floating-point format + logits = logits.float() + + # Apply temperature scaling + if temperature != 1.0: + logits = logits / temperature # Apply Top-k sampling if specified if top_k > 0: @@ -65,21 +113,19 @@ def sample_logits(logits, temp=0.3, top_k=85, top_p=2.0, alpha_f=0.1, alpha_p=0. # Apply Top-p (nucleus) sampling if specified if 0 < top_p < 1.0: - logits = top_p_sampling(logits, top_p, temp) + sorted_logits, _ = torch.sort(logits, descending=True, dim=-1) + cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) + + # Remove tokens with cumulative probability above the threshold + sorted_indices_to_remove = cumulative_probs > top_p + sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() + sorted_indices_to_remove[..., 0] = 0 - # Apply alpha sampling to discourage repetition - if alpha_f > 0.0 or alpha_p > 0.0: - if not hasattr(sample_logits, "alpha_counter"): - sample_logits.alpha_counter = torch.zeros_like(logits, dtype=torch.int32) - logits = logits - (sample_logits.alpha_counter * alpha_f + (sample_logits.alpha_counter > 0) * alpha_p) + sorted_logits[sorted_indices_to_remove] = -float('Inf') + logits = sorted_logits # Sample from the logits probabilities = F.softmax(logits, dim=-1) sampled_token = torch.multinomial(probabilities, 1) - # Update alpha counter - if alpha_f > 0.0 or alpha_p > 0.0: - sample_logits.alpha_counter.scatter_(-1, sampled_token, sample_logits.alpha_counter.gather(-1, sampled_token) + 1) - - return sampled_token.squeeze() - + return sampled_token.squeeze() \ No newline at end of file From 9805ac2ca229c49ac1b35b6c7ca2bd5d6a2a4e88 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:33:21 -0800 Subject: [PATCH 282/589] change sampling --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index b2ea5847f..64f30bd23 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -83,7 +83,7 @@ # return sampled_token.squeeze() -def sample_logits(logits, temperature=1.0, top_k=50, top_p=0.95): +def sample_logits(logits, temperature=1.0, top_k=15, top_p=0.95): """ Sample tokens from logits using temperature, top-k, and top-p (nucleus) sampling. From 8da3114b36911bc460fd62d00589918052fc2278 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:35:02 -0800 Subject: [PATCH 283/589] change sampling --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 64f30bd23..163e61fbf 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -83,7 +83,7 @@ # return sampled_token.squeeze() -def sample_logits(logits, temperature=1.0, top_k=15, top_p=0.95): +def sample_logits(logits, temperature=0.6, top_k=15, top_p=0.55): """ Sample tokens from logits using temperature, top-k, and top-p (nucleus) sampling. From c62dd2d6ce589e516b0defe76cb08a4110b19e37 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:37:50 -0800 Subject: [PATCH 284/589] change sampling --- exo/inference/pytorch/model/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 163e61fbf..54fd6245e 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -83,7 +83,7 @@ # return sampled_token.squeeze() -def sample_logits(logits, temperature=0.6, top_k=15, top_p=0.55): +def sample_logits(logits, temperature=0.0, top_k=0, top_p=1.0): """ Sample tokens from logits using temperature, top-k, and top-p (nucleus) sampling. @@ -102,7 +102,7 @@ def sample_logits(logits, temperature=0.6, top_k=15, top_p=0.55): # Apply temperature scaling if temperature != 1.0: - logits = logits / temperature + logits = logits * (1 / temperature) # Apply Top-k sampling if specified if top_k > 0: From 0f7f96dc848aa81edbef59641d0679ecfb7adb72 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:41:01 -0800 Subject: [PATCH 285/589] change sampling --- exo/inference/pytorch/model/utils.py | 46 +++++++++++++++------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 54fd6245e..4cb704d28 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -101,28 +101,30 @@ def sample_logits(logits, temperature=0.0, top_k=0, top_p=1.0): logits = logits.float() # Apply temperature scaling - if temperature != 1.0: - logits = logits * (1 / temperature) - - # Apply Top-k sampling if specified - if top_k > 0: - top_k = min(top_k, logits.size(-1)) - top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) - logits = torch.full_like(logits, float('-inf')) - logits.scatter_(-1, top_k_indices, top_k_values) - - # Apply Top-p (nucleus) sampling if specified - if 0 < top_p < 1.0: - sorted_logits, _ = torch.sort(logits, descending=True, dim=-1) - cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) - - # Remove tokens with cumulative probability above the threshold - sorted_indices_to_remove = cumulative_probs > top_p - sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() - sorted_indices_to_remove[..., 0] = 0 - - sorted_logits[sorted_indices_to_remove] = -float('Inf') - logits = sorted_logits + if temperature == 0: + logits = logits.argmax(dim=-1) + else: + # Apply Top-k sampling if specified + if top_k > 0: + top_k = min(top_k, logits.size(-1)) + top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) + logits = torch.full_like(logits, float('-inf')) + logits.scatter_(-1, top_k_indices, top_k_values) + + # Apply Top-p (nucleus) sampling if specified + if 0 < top_p < 1.0: + sorted_logits, _ = torch.sort(logits, descending=True, dim=-1) + cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) + + # Remove tokens with cumulative probability above the threshold + sorted_indices_to_remove = cumulative_probs > top_p + sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() + sorted_indices_to_remove[..., 0] = 0 + + sorted_logits[sorted_indices_to_remove] = -float('Inf') + logits = sorted_logits + else: + logits = logits * (1/temperature) # Sample from the logits probabilities = F.softmax(logits, dim=-1) From fc3661939359cd9641016c72bdb35c6b218585a9 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:43:22 -0800 Subject: [PATCH 286/589] change sampling --- exo/inference/pytorch/model/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 4cb704d28..ab3055239 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -97,8 +97,8 @@ def sample_logits(logits, temperature=0.0, top_k=0, top_p=1.0): torch.Tensor: The selected token index. """ - # Ensure logits are in a floating-point format - logits = logits.float() + # Ensure logits are long + logits = logits.long() # Apply temperature scaling if temperature == 0: From b5f98d58dc4ea09b71a29c84501ea845d540651f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:46:27 -0800 Subject: [PATCH 287/589] remove softmax --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index ab3055239..89a4522ba 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -127,7 +127,7 @@ def sample_logits(logits, temperature=0.0, top_k=0, top_p=1.0): logits = logits * (1/temperature) # Sample from the logits - probabilities = F.softmax(logits, dim=-1) + # probabilities = F.softmax(logits, dim=-1) sampled_token = torch.multinomial(probabilities, 1) return sampled_token.squeeze() \ No newline at end of file From 52d608f372c899446690797d141597b67a98c4e5 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:47:31 -0800 Subject: [PATCH 288/589] remove softmax --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 89a4522ba..b681a40ba 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -128,6 +128,6 @@ def sample_logits(logits, temperature=0.0, top_k=0, top_p=1.0): # Sample from the logits # probabilities = F.softmax(logits, dim=-1) - sampled_token = torch.multinomial(probabilities, 1) + sampled_token = torch.multinomial(logits, 1) return sampled_token.squeeze() \ No newline at end of file From b17a9ab39d55f33eadb631c0022cd748f15db98a Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:48:54 -0800 Subject: [PATCH 289/589] float long issue --- exo/inference/pytorch/model/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index b681a40ba..7b1a07350 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -97,8 +97,8 @@ def sample_logits(logits, temperature=0.0, top_k=0, top_p=1.0): torch.Tensor: The selected token index. """ - # Ensure logits are long - logits = logits.long() + # Ensure logits are float + logits = logits.float() # Apply temperature scaling if temperature == 0: From 69552e050897c152e6d3471713d21624bcf30449 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:50:13 -0800 Subject: [PATCH 290/589] float long issue --- exo/inference/pytorch/model/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 7b1a07350..db2d890e4 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -101,8 +101,8 @@ def sample_logits(logits, temperature=0.0, top_k=0, top_p=1.0): logits = logits.float() # Apply temperature scaling - if temperature == 0: - logits = logits.argmax(dim=-1) + if temperature == 0.0: + logits = logits.argmax(dim=-1).float() else: # Apply Top-k sampling if specified if top_k > 0: From 1ee8a10371b404af348396ff00d616a92f2817e3 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 17:52:37 -0800 Subject: [PATCH 291/589] float long issue --- exo/inference/pytorch/model/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index db2d890e4..07b228c61 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -127,7 +127,7 @@ def sample_logits(logits, temperature=0.0, top_k=0, top_p=1.0): logits = logits * (1/temperature) # Sample from the logits - # probabilities = F.softmax(logits, dim=-1) - sampled_token = torch.multinomial(logits, 1) + probabilities = F.softmax(logits, dim=-1) + sampled_token = torch.multinomial(probabilities, 1) return sampled_token.squeeze() \ No newline at end of file From 1d9f48286cf7db7e0019ae1da314da12bde5706c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 18:00:11 -0800 Subject: [PATCH 292/589] float long issue --- exo/inference/pytorch/model/utils.py | 68 ++++++++++++++++------------ 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 07b228c61..e17c0fb12 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -83,7 +83,7 @@ # return sampled_token.squeeze() -def sample_logits(logits, temperature=0.0, top_k=0, top_p=1.0): +def sample_logits(ogits, temperature=1.0, top_k=0, top_p=1.0, alpha_f=0.0, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, and top-p (nucleus) sampling. @@ -92,42 +92,54 @@ def sample_logits(logits, temperature=0.0, top_k=0, top_p=1.0): temperature (float): Temperature for scaling logits. top_k (int): The number of top tokens to consider for sampling. top_p (float): The cumulative probability threshold for nucleus sampling. + alpha_f (float): Penalty factor for repetition frequency. + alpha_p (float): Penalty for repeated selection. Returns: torch.Tensor: The selected token index. """ - # Ensure logits are float + # Ensure logits are float logits = logits.float() + # If temperature is very low, just use argmax + if temperature < 1e-6: + return logits.argmax(dim=-1) + + # Alpha sampling (adjusting logits based on past selections) + if alpha_f > 0.0 or alpha_p > 0.0: + logits -= (sample_logits.alpha_counter * alpha_f + (sample_logits.alpha_counter > 0) * alpha_p) + + # Replace NaNs with -inf to prevent softmax issues + logits = torch.where(torch.isnan(logits), torch.full_like(logits, -float('inf')), logits) + # Apply temperature scaling - if temperature == 0.0: - logits = logits.argmax(dim=-1).float() - else: - # Apply Top-k sampling if specified - if top_k > 0: - top_k = min(top_k, logits.size(-1)) - top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) - logits = torch.full_like(logits, float('-inf')) - logits.scatter_(-1, top_k_indices, top_k_values) - - # Apply Top-p (nucleus) sampling if specified - if 0 < top_p < 1.0: - sorted_logits, _ = torch.sort(logits, descending=True, dim=-1) - cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) - - # Remove tokens with cumulative probability above the threshold - sorted_indices_to_remove = cumulative_probs > top_p - sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() - sorted_indices_to_remove[..., 0] = 0 - - sorted_logits[sorted_indices_to_remove] = -float('Inf') - logits = sorted_logits - else: - logits = logits * (1/temperature) - - # Sample from the logits + logits = logits / temperature + + # Top-k sampling + if top_k > 0: + top_k = min(top_k, logits.size(-1)) + top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) + logits = torch.full_like(logits, -float('inf')) + logits.scatter_(-1, top_k_indices, top_k_values) + + # Top-p sampling + if 0 < top_p < 1.0: + sorted_logits, sorted_indices = torch.sort(logits, descending=True, dim=-1) + cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) + + # Remove tokens with cumulative probability above the threshold + sorted_indices_to_remove = cumulative_probs > top_p + sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() + sorted_indices_to_remove[..., 0] = 0 + + sorted_logits[sorted_indices_to_remove] = -float('inf') + logits = sorted_logits + + # Apply softmax to get probabilities probabilities = F.softmax(logits, dim=-1) + + # Sample from the probabilities sampled_token = torch.multinomial(probabilities, 1) return sampled_token.squeeze() \ No newline at end of file From 2ca9689b014297776b77c44505d05c49d5c0d83e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 18:01:07 -0800 Subject: [PATCH 293/589] float long issue --- exo/inference/pytorch/model/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index e17c0fb12..8ab499fc4 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -83,7 +83,7 @@ # return sampled_token.squeeze() -def sample_logits(ogits, temperature=1.0, top_k=0, top_p=1.0, alpha_f=0.0, alpha_p=0.0): +def sample_logits(logits, temperature=1.0, top_k=0, top_p=1.0, alpha_f=0.0, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, and top-p (nucleus) sampling. From 0b8c9f2d6c01e51b5bf524dd4db10d5522c78d33 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 18:28:14 -0800 Subject: [PATCH 294/589] cleaning up utils.py --- exo/inference/pytorch/model/utils.py | 82 ---------------------------- 1 file changed, 82 deletions(-) diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index 8ab499fc4..d56be5d86 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -1,88 +1,6 @@ import torch from torch.nn import functional as F -# def top_p_sampling(logits, top_p: float, temperature: float = 1.0): -# """ -# Perform top-p sampling (nucleus sampling) on logits. - -# Args: -# logits (torch.Tensor): The logits distribution to sample from. -# top_p (float): The cumulative probability threshold for nucleus sampling. -# temperature (float): Sampling temperature. - -# Returns: -# torch.Tensor: The selected token indices. -# """ -# # Apply temperature scaling -# logits = logits/temperature - -# # Sort the logits in descending order -# sorted_logits, sorted_indices = torch.sort(logits, descending=True, dim=-1) - -# # Calculate cumulative probabilities -# cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1) - -# # Create a mask to remove logits with cumulative probability above the threshold -# sorted_indices_to_remove = cumulative_probs > top_p -# sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() -# sorted_indices_to_remove[..., 0] = 0 - -# # Mask the logits -# sorted_logits[sorted_indices_to_remove] = -float('Inf') - -# # Sample from the filtered distribution -# probabilities = torch.softmax(sorted_logits, dim=-1) -# sampled_token = torch.multinomial(probabilities, 1) - -# # Convert to original index order -# return sorted_indices.gather(-1, sampled_token) - -# def sample_logits(logits, temp=0.3, top_k=85, top_p=2.0, alpha_f=0.1, alpha_p=0.0): -# """ -# Sample tokens from logits using temperature, top-k, top-p, and alpha sampling. - -# Args: -# logits (torch.Tensor): The logits distribution to sample from. -# temp (float): Temperature for scaling logits. -# top_k (int): The number of top tokens to consider for sampling. -# top_p (float): The cumulative probability threshold for nucleus sampling. -# alpha_f (float): Penalty factor for repetition. -# alpha_p (float): Penalty for selecting already selected tokens. - -# Returns: -# torch.Tensor: The selected token indices. -# """ -# # Return argmax for deterministic output at low temperature -# if temp < 1e-6: -# return logits.argmax(dim=-1) - -# # Apply Top-k sampling if specified -# if top_k > 0: -# top_k = min(top_k, logits.size(-1)) -# top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) -# logits = torch.full_like(logits, float('-inf')) -# logits.scatter_(-1, top_k_indices, top_k_values) - -# # Apply Top-p (nucleus) sampling if specified -# if 0 < top_p < 1.0: -# logits = top_p_sampling(logits, top_p, temp) - -# # Apply alpha sampling to discourage repetition -# if alpha_f > 0.0 or alpha_p > 0.0: -# if not hasattr(sample_logits, "alpha_counter"): -# sample_logits.alpha_counter = torch.zeros_like(logits, dtype=torch.int32) -# logits = logits - (sample_logits.alpha_counter * alpha_f + (sample_logits.alpha_counter > 0) * alpha_p) - -# # Sample from the logits -# probabilities = F.softmax(logits, dim=-1) -# sampled_token = torch.multinomial(probabilities, 1) - -# # Update alpha counter -# if alpha_f > 0.0 or alpha_p > 0.0: -# sample_logits.alpha_counter.scatter_(-1, sampled_token, sample_logits.alpha_counter.gather(-1, sampled_token) + 1) - -# return sampled_token.squeeze() - def sample_logits(logits, temperature=1.0, top_k=0, top_p=1.0, alpha_f=0.0, alpha_p=0.0): """ Sample tokens from logits using temperature, top-k, and top-p (nucleus) sampling. From 94de83f59111793875f2045796fef8652139a52b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 10 Aug 2024 18:29:55 -0800 Subject: [PATCH 295/589] removing broken llama.py --- exo/inference/pytorch/model/llama.py | 56 ---------------------------- 1 file changed, 56 deletions(-) delete mode 100644 exo/inference/pytorch/model/llama.py diff --git a/exo/inference/pytorch/model/llama.py b/exo/inference/pytorch/model/llama.py deleted file mode 100644 index f6427e025..000000000 --- a/exo/inference/pytorch/model/llama.py +++ /dev/null @@ -1,56 +0,0 @@ -import torch -import torch.nn as nn -from transformers.models.llama.modeling_llama import LlamaForCausalLM -from exo.inference.shard import Shard - -class ShardedLLAMAModel(nn.Module): - def __init__(self, model_path: str, shard: Shard): - super(ShardedLLAMAModel, self).__init__() - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - self.shard = shard - - # Load the full model - self.full_model = LlamaForCausalLM.from_pretrained(model_path) - self.full_model.to(self.device) - - # Extract only the layers for this shard - self.layers = nn.ModuleList([ - self.full_model.model.layers[i] for i in range(shard.start_layer, shard.end_layer + 1) - ]) - - # Embeddings and final layer norm - self.embed_tokens = self.full_model.model.embed_tokens - self.embed_positions = self.full_model.model.embed_positions - self.norm = self.full_model.model.norm - self.lm_head = self.full_model.lm_head - - def forward_layers(self, input_ids, past_key_values=None): - """ - Forward pass through the specified layers. - - Args: - input_ids (torch.Tensor): Input token IDs. - past_key_values (list, optional): Past key values for caching. - - Returns: - tuple: Hidden states and new past key values. - """ - if past_key_values is None: - past_key_values = [None] * len(self.layers) - - # Token and position embeddings - hidden_states = self.embed_tokens(input_ids) + self.embed_positions(input_ids) - - # Apply each layer in this shard - new_past_key_values = [] - for i, layer in enumerate(self.layers): - layer_past = past_key_values[i] - hidden_states, new_layer_past = layer(hidden_states, past_key_values=layer_past, use_cache=True) - new_past_key_values.append(new_layer_past) - - if self.shard.is_last_layer(): - hidden_states = self.norm(hidden_states) - logits = self.lm_head(hidden_states) - return logits, new_past_key_values - else: - return hidden_states, new_past_key_values From 226a0acd8149184d38e365fc7d7e49960a896f13 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 24 Aug 2024 18:23:39 -0800 Subject: [PATCH 296/589] removing unittest, update inference return type, fixing converting tensor to numpy --- exo/inference/pytorch/README.md | 18 ++++++++++++++++ exo/inference/pytorch/inference.py | 13 ++++++++---- exo/inference/pytorch/model/hf.py | 7 +++---- .../pytorch/test_build_transformer.py | 21 ------------------- .../pytorch/test_inference_engine.py | 6 ++---- 5 files changed, 32 insertions(+), 33 deletions(-) create mode 100644 exo/inference/pytorch/README.md delete mode 100644 exo/inference/pytorch/test_build_transformer.py diff --git a/exo/inference/pytorch/README.md b/exo/inference/pytorch/README.md new file mode 100644 index 000000000..8cb0ce076 --- /dev/null +++ b/exo/inference/pytorch/README.md @@ -0,0 +1,18 @@ +# PyTorch & HuggingFace inference engine +Experimental, still under development + + +## Install +Install needed py modules, make sure to be using CUDA 12.4 for the PyTorch install + +```console +$ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 +$ pip install transformers accelerate +``` + +After installing accelerate you get hit with a dependency error, for now ignore until we can fix this as exo works fine with 1.26.4 + +```console +ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. +exo 0.0.1 requires numpy==2.0.0, but you have numpy 1.26.4 which is incompatible. +``` \ No newline at end of file diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index a8c72f27c..dd6434caa 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -63,11 +63,14 @@ async def infer_prompt( if DEBUG >= 2: print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") - print(f"output_data.item() {output_data.item()}") + print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") print(f"output_data[-1] {output_data[-1]}") - print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") + + if output_data.size == 1: + print(f"size 1 output_data.item() {output_data.item()}") + print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") return ( output_data, @@ -104,11 +107,13 @@ async def infer_tensor( if DEBUG >= 2: print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") - print(f"output_data.item() {output_data.item()}") print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") print(f"output_data[-1] {output_data[-1]}") - print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") + + if output_data.size == 1: + print(f"size 1 output_data.item() {output_data.item()}") + print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") return ( output_data, diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index bbfa0d906..96f573b7d 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -19,8 +19,6 @@ def __init__(self, shard: Shard): self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.shard = shard - - # Load the model self.full_model = AutoModelForCausalLM.from_pretrained( shard.model_id, @@ -53,9 +51,10 @@ def __init__(self, shard: Shard): def forward_layers( self, input_data: torch.tensor - ) -> Tuple[np.ndarray, list]: + ) -> np.ndarray: """ Forward pass through the specified layers. + This is without caching Note: past_key_values not working for model, might be a library bug """ @@ -104,4 +103,4 @@ def forward_layers( return output_token - return hidden_states.cpu().numpy() + return hidden_states.cpu().detach().numpy() \ No newline at end of file diff --git a/exo/inference/pytorch/test_build_transformer.py b/exo/inference/pytorch/test_build_transformer.py deleted file mode 100644 index cdbfa6fc4..000000000 --- a/exo/inference/pytorch/test_build_transformer.py +++ /dev/null @@ -1,21 +0,0 @@ -import unittest -from unittest.mock import patch, MagicMock -from pathlib import Path -import torch -from exo.inference.shard import Shard -from exo.inference.pytorch.helpers import build_transformer - -class TestBuildTransformer(unittest.TestCase): - - def test_build_transformer(self): - # Call the build_transformer function - model = build_transformer( - "gpt2", - quantize=True, - device="cuda" - ) - - self.assertIsNotNone(model) - -if __name__ == '__main__': - unittest.main() diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index fbc314f08..f1eaf31eb 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -8,12 +8,10 @@ def main(): model_id="meta-llama/Meta-Llama-3.1-8B", start_layer=0, end_layer=0, - n_layers=12 + n_layers=32 ) - engine = PyTorchDynamicShardInferenceEngine( - shard - ) + engine = PyTorchDynamicShardInferenceEngine() # Prepare the prompt From e11bebd7f534391f825a5df1aab3fe5bba0ec604 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 24 Aug 2024 20:53:30 -0800 Subject: [PATCH 297/589] adding nvidia quadro and t1000 support --- exo/topology/device_capabilities.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/exo/topology/device_capabilities.py b/exo/topology/device_capabilities.py index 6b8de77f1..ba81a08b3 100644 --- a/exo/topology/device_capabilities.py +++ b/exo/topology/device_capabilities.py @@ -97,6 +97,9 @@ def to_dict(self): "NVIDIA A800 80GB PCIE": DeviceFlops(fp32=19.5*TFLOPS, fp16=312.0*TFLOPS, int8=624.0*TFLOPS), "NVIDIA A100 80GB SXM": DeviceFlops(fp32=19.5*TFLOPS, fp16=312.0*TFLOPS, int8=624.0*TFLOPS), "NVIDIA A800 80GB SXM": DeviceFlops(fp32=19.5*TFLOPS, fp16=312.0*TFLOPS, int8=624.0*TFLOPS), + "NVIDIA T1000 8GB": DeviceFlops(fp32=2.5 * TFLOPS, fp16=5.0 * TFLOPS, int8=10.0 * TFLOPS), + "Quadro M2000": DeviceFlops(fp32=0.5 * TFLOPS, fp16=1.0 * TFLOPS, int8=2.0 * TFLOPS), + "Quadro P400": DeviceFlops(fp32=0.641 * TFLOPS, fp16=1.282 * TFLOPS, int8=2.564 * TFLOPS), # ... add more devices if needed ... ### AMD GPUs # RX 6000 series From 778cb6ef03bc3f2451820006c21dd9a4fa2688e7 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 24 Aug 2024 21:06:14 -0800 Subject: [PATCH 298/589] updating test, updating model selection for smaller quant llama3 model --- exo/inference/pytorch/test_inference_engine.py | 2 +- exo/models.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index f1eaf31eb..b1e5b56ac 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -19,7 +19,7 @@ def main(): # Run inference loop = asyncio.get_event_loop() - output_data, new_inference_state, is_eos = loop.run_until_complete( + output_data, _, _ = loop.run_until_complete( engine.infer_prompt( request_id="test_request", shard=shard, prompt=prompt ) diff --git a/exo/models.py b/exo/models.py index d355e88de..1ad4df21b 100644 --- a/exo/models.py +++ b/exo/models.py @@ -5,6 +5,7 @@ "llama-3.1-8b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32), "TinygradDynamicShardInferenceEngine": Shard(model_id="mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", start_layer=0, end_layer=0, n_layers=32), + "PyTorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Meta-Llama-3.1-8B", start_layer=0, end_layer=0, n_layers=32), }, "llama-3.1-70b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), @@ -19,6 +20,10 @@ "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), "TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-70B-R", start_layer=0, end_layer=0, n_layers=80), }, + "llama-3-2B-Base": { + "TinygradDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-2B-Base", start_layer=0, end_layer=0, n_layers=32), + "PyTorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-2B-Base", start_layer=0, end_layer=0, n_layers=32), + }, ### mistral "mistral-nemo": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Nemo-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=40),}, "mistral-large": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Large-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=88),}, From 56aae50cac1ff6bfdb18d5d4b8d67b69ccb53db6 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 24 Aug 2024 21:41:21 -0800 Subject: [PATCH 299/589] added updating model options to update_deps.py --- tinychat/examples/tinychat/index.html | 24 +++++++++---------- tinychat/examples/tinychat/update_deps.py | 29 ++++++++++++++++++++++- 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/tinychat/examples/tinychat/index.html b/tinychat/examples/tinychat/index.html index 6136864f2..b437b0982 100644 --- a/tinychat/examples/tinychat/index.html +++ b/tinychat/examples/tinychat/index.html @@ -18,8 +18,8 @@ - - + + @@ -27,16 +27,16 @@
+ + + + + + + + + +
Date: Sat, 24 Aug 2024 21:52:38 -0800 Subject: [PATCH 300/589] updating inference class init to take shard, updating pytorch test_inference_engine.py, adding in pytorch option for inference engine --- exo/inference/pytorch/inference.py | 4 +- .../pytorch/test_inference_engine.py | 52 +++++++++++++++++-- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index dd6434caa..cc33b6bfe 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -17,14 +17,14 @@ class PyTorchDynamicShardInferenceEngine(InferenceEngine): PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. """ - def __init__(self): + def __init__(self, shard): """ Initialize the inference engine. Args: debug (bool): If True, enables debug logging. Defaults to False. """ - self.shard = None + self.shard = shard self.model = None self.tokenizer = None self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index b1e5b56ac..ffb5a10fb 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -2,8 +2,48 @@ import asyncio from exo.inference.shard import Shard from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine +from exo.download.hf.hf_shard_download import HFShardDownloader +from exo.inference.inference_engine import InferenceEngine +from exo.inference.shard import Shard +from exo.helpers import DEBUG +import os +import numpy as np + +async def test_inference_engine(inference_engine_1: InferenceEngine, inference_engine_2: InferenceEngine, model_id: str): + prompt = "In a single word only, what is the last name of the current president of the USA?" + resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt("A", shard=Shard(model_id=model_id, start_layer=0, end_layer=31, n_layers=32), prompt=prompt) + next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( + "A", + shard=Shard(model_id=model_id, start_layer=0, end_layer=31, n_layers=32), + input_data=resp_full, + inference_state=inference_state_full, + ) -def main(): + pp = 15 + resp1, inference_state_1, _ = await inference_engine_1.infer_prompt("B", shard=Shard(model_id=model_id, start_layer=0, end_layer=pp, n_layers=32), prompt=prompt) + resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( + "B", + shard=Shard(model_id=model_id, start_layer=pp + 1, end_layer=31, n_layers=32), + input_data=resp1, + inference_state=inference_state_1, + ) + resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( + "B", + shard=Shard(model_id=model_id, start_layer=0, end_layer=pp, n_layers=32), + input_data=resp2, + inference_state=inference_state_2, + ) + resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( + "B", + shard=Shard(model_id=model_id, start_layer=pp + 1, end_layer=31, n_layers=32), + input_data=resp3, + inference_state=inference_state_3, + ) + + assert np.array_equal(resp_full, resp2) + assert np.array_equal(next_resp_full, resp4) + +def single_test(): shard = Shard( model_id="meta-llama/Meta-Llama-3.1-8B", start_layer=0, @@ -11,7 +51,7 @@ def main(): n_layers=32 ) - engine = PyTorchDynamicShardInferenceEngine() + engine = PyTorchDynamicShardInferenceEngine(shard) # Prepare the prompt @@ -28,4 +68,10 @@ def main(): assert output_data is not None if __name__ == '__main__': - main() + # single_test() + asyncio.run(test_inference_engine( + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + "andrijdavid/Llama3-2B-Base", + )) + From aa769cae4b9c1e71072ace3640b96e8470e6713b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 24 Aug 2024 21:55:07 -0800 Subject: [PATCH 301/589] adding updates for inference_engine.py --- exo/inference/inference_engine.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/exo/inference/inference_engine.py b/exo/inference/inference_engine.py index b94654932..ad612c758 100644 --- a/exo/inference/inference_engine.py +++ b/exo/inference/inference_engine.py @@ -27,5 +27,8 @@ def get_inference_engine(inference_engine_name: str, shard_downloader: 'ShardDow tinygrad.helpers.DEBUG.value = int(os.getenv("TINYGRAD_DEBUG", default="0")) return TinygradDynamicShardInferenceEngine(shard_downloader) + elif inference_engine_name == "pytorch": + from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine + return PyTorchDynamicShardInferenceEngine(shard_downloader) else: raise ValueError(f"Inference engine {inference_engine_name} not supported") From 08e8b41895255ba12ac873fc060d2a27b79bb747 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 24 Aug 2024 22:05:18 -0800 Subject: [PATCH 302/589] reducing layer amount for llama3-2b-base --- exo/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/models.py b/exo/models.py index 1ad4df21b..0b7b48d60 100644 --- a/exo/models.py +++ b/exo/models.py @@ -21,8 +21,8 @@ "TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-70B-R", start_layer=0, end_layer=0, n_layers=80), }, "llama-3-2B-Base": { - "TinygradDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-2B-Base", start_layer=0, end_layer=0, n_layers=32), - "PyTorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-2B-Base", start_layer=0, end_layer=0, n_layers=32), + "TinygradDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-2B-Base", start_layer=0, end_layer=0, n_layers=5), + "PyTorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-2B-Base", start_layer=0, end_layer=0, n_layers=5), }, ### mistral "mistral-nemo": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Nemo-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=40),}, From dd2812b81642cf909f8fd04c321dbaefb27b286d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 25 Aug 2024 14:20:06 -0800 Subject: [PATCH 303/589] fixing gpu tensor to numpy conversion issues, updating top_p_sampling along with adding in torch topk, added in better random distribution selection for when top_p is too low or high, started work on forward_layer_cached but infer functions need to be changed and take any and not a string --- exo/api/chatgpt_api.py | 21 +++- exo/inference/pytorch/inference.py | 27 +++-- exo/inference/pytorch/model/hf.py | 140 +++++++++++++++++++++----- exo/inference/pytorch/model/utils.py | 102 +++++++++++-------- exo/models.py | 11 +- tinychat/examples/tinychat/index.html | 15 +-- 6 files changed, 226 insertions(+), 90 deletions(-) diff --git a/exo/api/chatgpt_api.py b/exo/api/chatgpt_api.py index d9af9458b..017d71c75 100644 --- a/exo/api/chatgpt_api.py +++ b/exo/api/chatgpt_api.py @@ -113,8 +113,27 @@ def remap_messages(messages: List[Message]) -> List[Message]: def build_prompt(tokenizer, _messages: List[Message]): + if len(_messages) == 1: + user_msg = _messages[0] + + # get instruct sys message + sys_msg = Message(role="system", content="You are a helpful assistant.") + + # restructure for sys_msg to go first + _messages = [sys_msg, user_msg] + messages = remap_messages(_messages) - prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + prompt = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) + + if DEBUG >= 3: + print(f"prompt: {str(prompt)}") + for msg in messages: + print(f"chat role: {msg.role}\ncontent: {msg.content}") + image_str = None for message in messages: if not isinstance(message.content, list): diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index cc33b6bfe..01a80d326 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -37,9 +37,6 @@ async def infer_prompt( image_str: Optional[str] = None, inference_state: Optional[str] = None ) -> Tuple[np.ndarray, str, bool]: - if DEBUG >= 2: - print("infer_prompt called") - await self.ensure_shard(shard) # need to make this so inference_state is not a string @@ -47,9 +44,6 @@ async def infer_prompt( tokens = self.tokenizer.encode(prompt, return_tensors="pt") - if DEBUG >= 2: - print(f"tokens: {tokens}\n") - output_data = self.model.forward_layers( tokens ) @@ -60,7 +54,9 @@ async def infer_prompt( print(f"token from llm decode: {self.tokenizer.decode(output_data)}") - if DEBUG >= 2: + if DEBUG >= 4: + print("infer_prompt called") + print(f"tokens: {tokens}\n") print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") @@ -91,7 +87,7 @@ async def infer_tensor( if in_tensor.dim() == 1: in_tensor = in_tensor.unsqueeze(0) # Add a batch dimension: [1, seq_len] - if DEBUG >= 2: + if DEBUG >= 4: print("infer_tensor called") print(f"input_data: {input_data}\n") print(f"in_tensor: {in_tensor}\n") @@ -104,7 +100,7 @@ async def infer_tensor( is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] - if DEBUG >= 2: + if DEBUG >= 4: print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") print(f"finished: {is_finished}") @@ -131,12 +127,21 @@ async def ensure_shard(self, shard: Optional[Shard]): if self.shard == shard: return - if DEBUG >= 2: + if DEBUG >= 4: print(f"Loading new shard: {shard}") + # if self.model: + # if DEBUG >= 2: + # print(f"\nCLEARING MODEL {self.shard.model_id}\n") + + # # delete model and free up memory to reload + # self.model.cpu() + # del self.model + # torch.cuda.empty_cache() + self.model = ShardedHuggingFaceModel(shard) self.tokenizer = await resolve_tokenizer(shard.model_id) self.shard = shard - if DEBUG >= 2: + if DEBUG >= 4: print(f"Shard loaded successfully: {shard}") \ No newline at end of file diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 96f573b7d..c20649218 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -2,29 +2,45 @@ import torch.nn as nn import numpy as np -from transformers import AutoModelForCausalLM, LlamaConfig, DynamicCache, Cache +from transformers import AutoModelForCausalLM, BitsAndBytesConfig, DynamicCache, Cache from exo.inference.shard import Shard from exo.helpers import DEBUG from typing import Tuple from .utils import sample_logits +TOP_P = 0.75 #0.95 +TOP_K = 20 +TEMP = 0.8 + class ShardedHuggingFaceModel(torch.nn.Module): def __init__(self, shard: Shard): super(ShardedHuggingFaceModel, self).__init__() - if DEBUG >= 2: - print(f"\nShardedHuggingFaceModel init with shard {shard}") + if torch.cuda.is_available(): + self.device = torch.device("cuda") + else: + self.device = torch.device("cpu") - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.shard = shard # Load the model - self.full_model = AutoModelForCausalLM.from_pretrained( - shard.model_id, - torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, - device_map="auto" - ) + try: + self.full_model = AutoModelForCausalLM.from_pretrained( + shard.model_id, + torch_dtype="auto", + device_map="auto", + # offload_buffers=True + ) + # .to(self.device) + except Exception as err: + print(f"Error loading model: {err}") + raise + + if DEBUG >= 2: + print(f"\nShardedHuggingFaceModel init with shard {shard}") + print(f"self.full_model: {self.full_model}") + print(f"self.full_model.model: {self.full_model.model}") # using llamaconfig not working setting layers manually layers = [] @@ -37,6 +53,7 @@ def __init__(self, shard: Shard): layers.append(layer) self.full_model.model.layers = nn.ModuleList(layers) + # .to(self.device) if DEBUG >= 2: print(f"full_model.model layer: {len(self.full_model.model.layers)}") @@ -46,8 +63,6 @@ def __init__(self, shard: Shard): self.embed_tokens = self.full_model.model.embed_tokens self.norm = self.full_model.model.norm - # self.past_key_values = DynamicCache() - def forward_layers( self, input_data: torch.tensor @@ -69,23 +84,98 @@ def forward_layers( if DEBUG >= 2: print(f"\n[layer model] {self.full_model.model}") print(f"IN hidden_states {hidden_states}") - # print(f"past_kvs {past_kvs}") - self.full_model.model.layer_idx = 5 + layer_outputs = self.full_model.model( + hidden_states.to(self.device), + use_cache=False + ) + + if DEBUG >= 2: + print(f"OUT hidden_states {layer_outputs.last_hidden_state}") + + hidden_states = layer_outputs.last_hidden_state + + print(f"2 is_last_layer {self.shard.is_last_layer()}") + if self.shard.is_last_layer(): + hs_norm = self.norm(hidden_states) + hs_lm_head = self.full_model.lm_head(hs_norm).float() + + # Use the sampling function with default settings + with torch.no_grad(): + output_token = sample_logits( + hs_lm_head[:, -1, :], + TEMP, + TOP_P, + TOP_K + ).cpu().numpy().flatten() + + if DEBUG >= 2: + print(f"hs_norm: {hs_norm}") + print(f"hs_lm_head: {hs_lm_head}") + print(f"output_token: {output_token}") + + return output_token + + return hidden_states.cpu().numpy() + + def forward_layers_cached( + self, + input_data: torch.tensor, + past_kvs: Cache = DynamicCache() + ) -> Tuple[np.ndarray, list]: + """ + Forward pass through the specified layers. + With caching + + Note: past_key_values not working for model, might be a library bug + """ + if DEBUG >= 2: + print("forward_layer call") + print(f"input_data: {input_data}") + print(f"shard {self.shard.to_dict()}") + + hidden_states = input_data + position_ids = None + position_embeddings = None + + if self.shard.is_first_layer(): + hidden_states = self.embed_tokens(hidden_states) + + if DEBUG >= 2: + print(f"hidden_states: {hidden_states}") + print(f"hidden_states.size(): {hidden_states.size()}") + + batch_size, seq_len = input_data.size() + position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) + + position_embeddings = self.full_model.model.rotary_emb( + hidden_states, + position_ids + ) + + # if DEBUG >= 2: + # print(f"embedded hidden_states {hidden_states}") + # print(f"position_ids: {position_embeddings}") + + + # Forward pass through the layer + if DEBUG >= 2: + print(f"IN hidden_states {hidden_states}") + print(f"past_kvs {past_kvs}") + layer_outputs = self.full_model.model( hidden_states, - # position_ids=position_ids, - # inputs_embeds=position_embeddings, - # past_key_values=self.past_key_values, - use_cache=False # not enough vram for using cache ;_; + position_ids=position_ids, + inputs_embeds=position_embeddings, + past_key_values=past_kvs, + use_cache=True ) if DEBUG >= 2: - print(f"OUT hidden_states {hidden_states}") - # print(f"\nlayer_outputs: {layer_outputs}") + print(f"\nlayer_outputs: {layer_outputs}") hidden_states = layer_outputs.last_hidden_state - # self.past_key_values = layer_outputs.past_key_values + present_kvs = layer_outputs.past_key_values print(f"2 is_last_layer {self.shard.is_last_layer()}") if self.shard.is_last_layer(): @@ -94,13 +184,17 @@ def forward_layers( # Use the sampling function with default settings output_token = sample_logits( - hs_lm_head[:, -1, :]).cpu().numpy().flatten() + hs_lm_head[:, -1, :], + TEMP, + TOP_P, + TOP_K + ).numpy() if DEBUG >= 2: print(f"hs_norm: {hs_norm}") print(f"hs_lm_head: {hs_lm_head}") print(f"output_token: {output_token}") - return output_token + return (output_token, present_kvs) - return hidden_states.cpu().detach().numpy() \ No newline at end of file + return (hidden_states.numpy(), present_kvs) \ No newline at end of file diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/utils.py index d56be5d86..df84b3977 100644 --- a/exo/inference/pytorch/model/utils.py +++ b/exo/inference/pytorch/model/utils.py @@ -1,17 +1,59 @@ import torch from torch.nn import functional as F -def sample_logits(logits, temperature=1.0, top_k=0, top_p=1.0, alpha_f=0.0, alpha_p=0.0): +def top_p_sampling(scaled_logits: torch.Tensor, top_p: float) -> torch.Tensor: + """ + Apply top-p (nucleus) sampling to logits. + + Args: + scaled_logits (torch.Tensor): The scaled logits from the model's output. + top_p (float): The cumulative probability threshold for top-p filtering. + temp (float): Temperature parameter for softmax distribution reshaping. + + Returns: + torch.Tensor: Token selected based on the top-p criterion. + + Ref: + https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/sample_utils.py#L67C1-L97C17 + """ + scaled_logits = torch.where(torch.isnan(scaled_logits), torch.zeros_like(scaled_logits), scaled_logits) + scaled_logits = torch.where(torch.isinf(scaled_logits), torch.full_like(scaled_logits, 1e6), scaled_logits) + + probs = torch.softmax(scaled_logits, dim=-1) + + sorted_probs, sorted_indices = torch.sort( + probs, + descending=True, + dim=-1 + ) + + cumulative_probs = torch.cumsum(sorted_probs, dim=-1) + mask = cumulative_probs > top_p + + top_probs = torch.where(mask, torch.zeros_like(sorted_probs), sorted_probs) + sum_probs = top_probs.sum(dim=-1, keepdim=True) + top_probs = torch.where(sum_probs > 0, top_probs / sum_probs, torch.ones_like(top_probs) / top_probs.size(-1)) + + if torch.isnan(top_probs).any() or torch.isinf(top_probs).any(): + print("Warning: Top probabilities contain NaN or Inf values after normalization") + top_probs = torch.where(torch.isnan(top_probs) | torch.isinf(top_probs), + 1.0 / top_probs.size(-1), + top_probs) + + sorted_token = torch.multinomial(top_probs, num_samples=1) + + token = sorted_indices.gather(-1, sorted_token) + + return token.squeeze(-1) + +def sample_logits(logits, temp, top_p, top_k): """ Sample tokens from logits using temperature, top-k, and top-p (nucleus) sampling. Args: logits (torch.Tensor): The logits distribution to sample from. - temperature (float): Temperature for scaling logits. - top_k (int): The number of top tokens to consider for sampling. + temp (float): temp for scaling logits. top_p (float): The cumulative probability threshold for nucleus sampling. - alpha_f (float): Penalty factor for repetition frequency. - alpha_p (float): Penalty for repeated selection. Returns: torch.Tensor: The selected token index. @@ -20,44 +62,22 @@ def sample_logits(logits, temperature=1.0, top_k=0, top_p=1.0, alpha_f=0.0, alph # Ensure logits are float logits = logits.float() - # If temperature is very low, just use argmax - if temperature < 1e-6: + # If temp is very low, just use argmax + if temp == 0: return logits.argmax(dim=-1) + + scaled_logits = logits/temp - # Alpha sampling (adjusting logits based on past selections) - if alpha_f > 0.0 or alpha_p > 0.0: - logits -= (sample_logits.alpha_counter * alpha_f + (sample_logits.alpha_counter > 0) * alpha_p) - - # Replace NaNs with -inf to prevent softmax issues - logits = torch.where(torch.isnan(logits), torch.full_like(logits, -float('inf')), logits) - - # Apply temperature scaling - logits = logits / temperature - - # Top-k sampling + # top k if top_k > 0: - top_k = min(top_k, logits.size(-1)) - top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1) - logits = torch.full_like(logits, -float('inf')) - logits.scatter_(-1, top_k_indices, top_k_values) - + top_values, top_indices = torch.topk(scaled_logits, top_k, dim=-1) + scaled_logits = torch.zeros_like(logits).scatter_(-1, top_indices, top_values) + # Top-p sampling if 0 < top_p < 1.0: - sorted_logits, sorted_indices = torch.sort(logits, descending=True, dim=-1) - cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) - - # Remove tokens with cumulative probability above the threshold - sorted_indices_to_remove = cumulative_probs > top_p - sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() - sorted_indices_to_remove[..., 0] = 0 - - sorted_logits[sorted_indices_to_remove] = -float('inf') - logits = sorted_logits - - # Apply softmax to get probabilities - probabilities = F.softmax(logits, dim=-1) - - # Sample from the probabilities - sampled_token = torch.multinomial(probabilities, 1) - - return sampled_token.squeeze() \ No newline at end of file + return top_p_sampling(scaled_logits, top_p) + else: + # random distribution selection + probs = torch.softmax(scaled_logits, dim=-1) + rand_sample = torch.distributions.Categorical(probs) + return rand_sample.sample().squeeze() \ No newline at end of file diff --git a/exo/models.py b/exo/models.py index 0b7b48d60..72a5b5667 100644 --- a/exo/models.py +++ b/exo/models.py @@ -21,8 +21,10 @@ "TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-70B-R", start_layer=0, end_layer=0, n_layers=80), }, "llama-3-2B-Base": { - "TinygradDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-2B-Base", start_layer=0, end_layer=0, n_layers=5), - "PyTorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-2B-Base", start_layer=0, end_layer=0, n_layers=5), + "PyTorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-2B-Base", start_layer=0, end_layer=0, n_layers=6), + }, + "llama-3-1B-Base": { + "PyTorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-1B-Base", start_layer=0, end_layer=0, n_layers=3), }, ### mistral "mistral-nemo": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Nemo-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=40),}, @@ -31,4 +33,9 @@ "deepseek-coder-v2-lite": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx", start_layer=0, end_layer=0, n_layers=27),}, ### llava "llava-1.5-7b-hf": {"MLXDynamicShardInferenceEngine": Shard(model_id="llava-hf/llava-1.5-7b-hf", start_layer=0, end_layer=0, n_layers=32),}, + ### qwen + "Qwen2-0.5B-Instruct": { + "PyTorchDynamicShardInferenceEngine": Shard(model_id="Qwen/Qwen2-0.5B-Instruct", start_layer=0, end_layer=0, n_layers=24), + }, + } diff --git a/tinychat/examples/tinychat/index.html b/tinychat/examples/tinychat/index.html index b437b0982..8ff4c64c7 100644 --- a/tinychat/examples/tinychat/index.html +++ b/tinychat/examples/tinychat/index.html @@ -19,24 +19,15 @@ - + - +
+
Tuple[np.ndarray, str, bool]: + await self.ensure_shard(shard) # need to make this so inference_state is not a string @@ -44,16 +52,27 @@ async def infer_prompt( tokens = self.tokenizer.encode(prompt, return_tensors="pt") - output_data = self.model.forward_layers( - tokens - ) + if self.use_cache: + # convert inference_state or cache from json to DynamicCache + past_kv = DynamicCache() + if inference_state != None: + cache_dict = json.loads(inference_state) + past_kv.key_cache = [torch.tensor(data) for data in cache_dict['key_cache']] + past_kv.value_cache = [torch.tensor(data) for data in cache_dict['value_cache']] + + output_data, current_kvs = self.model.forward( + tokens, + past_kv, + use_cache=True + ) + else: + output_data = self.model.forward( + tokens, + use_cache=False + ) is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] - if is_finished: - print(f"token from llm decode: {self.tokenizer.decode(output_data)}") - - if DEBUG >= 4: print("infer_prompt called") print(f"tokens: {tokens}\n") @@ -68,9 +87,17 @@ async def infer_prompt( print(f"size 1 output_data.item() {output_data.item()}") print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") + if self.use_cache: + # legacy_cache = current_kvs.to_legacy_cache() + print(current_kvs.key_cache) + cache_dict = { + 'key_cache': [tensor.tolist() for tensor in current_kvs.key_cache], + 'value_cache': [tensor.tolist() for tensor in current_kvs.value_cache] + } + return ( output_data, - "", + json.dumps(cache_dict) if self.use_cache else "", is_finished ) @@ -79,28 +106,38 @@ async def infer_tensor( request_id: str, shard: Shard, input_data: np.ndarray, - inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: - - in_tensor = torch.tensor(input_data) - - # Ensure input_data is 2D: [batch_size, seq_len] - if in_tensor.dim() == 1: - in_tensor = in_tensor.unsqueeze(0) # Add a batch dimension: [1, seq_len] - - if DEBUG >= 4: - print("infer_tensor called") - print(f"input_data: {input_data}\n") - print(f"in_tensor: {in_tensor}\n") + inference_state: Optional[str] = None + ) -> Tuple[np.ndarray, str, bool]: await self.ensure_shard(shard) - output_data = self.model.forward_layers( - in_tensor - ) + in_tensor = torch.tensor(input_data) + + if self.use_cache: + # convert inference_state or cache from json to DynamicCache + past_kv = DynamicCache() + if inference_state != None: + cache_dict = json.loads(inference_state) + past_kv.key_cache = [torch.tensor(data) for data in cache_dict['key_cache']] + past_kv.value_cache = [torch.tensor(data) for data in cache_dict['value_cache']] + + output_data, current_kvs = self.model.forward( + in_tensor, + past_kv, + use_cache=True + ) + else: + output_data = self.model.forward( + in_tensor, + use_cache=False + ) is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] if DEBUG >= 4: + print("infer_tensor called") + print(f"input_data: {input_data}\n") + print(f"in_tensor: {in_tensor}\n") print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") print(f"finished: {is_finished}") @@ -111,9 +148,16 @@ async def infer_tensor( print(f"size 1 output_data.item() {output_data.item()}") print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") + if self.use_cache: + legacy_cache = current_kvs.to_legacy_cache() + cache_dict = { + 'key_cache': [tensor.tolist() for tensor in legacy_cache.key_cache], + 'value_cache': [tensor.tolist() for tensor in legacy_cache.value_cache] + } + return ( output_data, - "", + json.dumps(cache_dict) if self.use_cache else "", is_finished ) @@ -139,9 +183,9 @@ async def ensure_shard(self, shard: Optional[Shard]): # del self.model # torch.cuda.empty_cache() - self.model = ShardedHuggingFaceModel(shard) - self.tokenizer = await resolve_tokenizer(shard.model_id) self.shard = shard + self.tokenizer = await resolve_tokenizer(shard.model_id) + self.model = ShardedHuggingFaceModel(shard, self.tokenizer) if DEBUG >= 4: print(f"Shard loaded successfully: {shard}") \ No newline at end of file diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index c20649218..a83454663 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -1,11 +1,12 @@ import torch import torch.nn as nn import numpy as np +import re from transformers import AutoModelForCausalLM, BitsAndBytesConfig, DynamicCache, Cache from exo.inference.shard import Shard from exo.helpers import DEBUG -from typing import Tuple +from typing import Tuple, Optional, Union, List from .utils import sample_logits @@ -14,7 +15,7 @@ TEMP = 0.8 class ShardedHuggingFaceModel(torch.nn.Module): - def __init__(self, shard: Shard): + def __init__(self, shard: Shard, tokenizer: any): super(ShardedHuggingFaceModel, self).__init__() if torch.cuda.is_available(): @@ -23,178 +24,282 @@ def __init__(self, shard: Shard): self.device = torch.device("cpu") self.shard = shard + self.tokenizer = tokenizer # Load the model try: - self.full_model = AutoModelForCausalLM.from_pretrained( + self.llm_model = AutoModelForCausalLM.from_pretrained( shard.model_id, torch_dtype="auto", device_map="auto", # offload_buffers=True ) - # .to(self.device) + + self.base_model = self.llm_model.model except Exception as err: print(f"Error loading model: {err}") raise if DEBUG >= 2: print(f"\nShardedHuggingFaceModel init with shard {shard}") - print(f"self.full_model: {self.full_model}") - print(f"self.full_model.model: {self.full_model.model}") + print(f"self.llm_model: {self.llm_model}") + print(f"self.llm_model.model: {self.llm_model.model}") - # using llamaconfig not working setting layers manually + # load layers from base model to use layers = [] for i in range(shard.start_layer, shard.end_layer + 1): - layer = self.full_model.model.layers[i] + layer = self.llm_model.model.layers[i] if DEBUG >= 2: print(f"Loading layers[{i}]") layers.append(layer) - self.full_model.model.layers = nn.ModuleList(layers) - # .to(self.device) + self.layers = nn.ModuleList(layers).to(self.device) if DEBUG >= 2: - print(f"full_model.model layer: {len(self.full_model.model.layers)}") + print(f"full_model.model layer: {len(self.llm_model.model.layers)}") # Embeddings and final layer norm # used for doing what forward LlamaModel does in transformers - self.embed_tokens = self.full_model.model.embed_tokens - self.norm = self.full_model.model.norm - - def forward_layers( + self.norm = self.llm_model.model.norm + self.lm_head = self.llm_model.lm_head + + def forward( self, - input_data: torch.tensor - ) -> np.ndarray: + input_ids: torch.tensor, + past_kvs: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + use_cache: bool = True + ) -> Tuple[np.ndarray, any]: """ - Forward pass through the specified layers. - This is without caching + Forward through layers using the base model - Note: past_key_values not working for model, might be a library bug - """ - if DEBUG >= 2: - print("forward_layer call") - print(f"input_data: {input_data}") - print(f"shard {self.shard.to_dict()}") + Args: + input_ids: tensor input + past_kvs: past key value stores for cache + use_cache: use cache + + Returns: + hidden_states: numpy of states between layers + or logits: numpy of normalization and linearization of last hidden state + past_kvs: DynamicCache of past key values if use_cache is true + + Ref: + https://github.com/huggingface/transformers/blob/v4.44.2/src/transformers/models/qwen2/modeling_qwen2.py#L804 + https://github.com/huggingface/transformers/blob/v4.44.2/src/transformers/models/llama/modeling_llama.py#L887 + """ - hidden_states = input_data + if self.shard.is_first_layer(): + inputs_embeds = self.base_model.embed_tokens(input_ids.to(self.device)) - # Forward pass through the layer - if DEBUG >= 2: - print(f"\n[layer model] {self.full_model.model}") - print(f"IN hidden_states {hidden_states}") - - layer_outputs = self.full_model.model( - hidden_states.to(self.device), - use_cache=False - ) + if use_cache: + past_kvs = DynamicCache.from_legacy_cache(past_kvs) - if DEBUG >= 2: - print(f"OUT hidden_states {layer_outputs.last_hidden_state}") - - hidden_states = layer_outputs.last_hidden_state + past_seen_tokens = past_kvs.get_seq_length() if past_kvs is not None else 0 + cache_position = torch.arange( + past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device + ) - print(f"2 is_last_layer {self.shard.is_last_layer()}") - if self.shard.is_last_layer(): - hs_norm = self.norm(hidden_states) - hs_lm_head = self.full_model.lm_head(hs_norm).float() + position_ids = cache_position.unsqueeze(0) + + hidden_states = inputs_embeds + + # progress through layers + for decoder_layer in self.layers: + layer_outputs = decoder_layer( + hidden_states, + position_ids=position_ids, + past_key_value=past_kvs, + use_cache=use_cache, + cache_position=cache_position, + ) + + hidden_states = layer_outputs[0] + next_kvs = layer_outputs[1] + + if DEBUG >= 3: + print(f"hidden_state: {hidden_states}") + print(f"next_kvs: {next_kvs}") - # Use the sampling function with default settings + if self.shard.is_last_layer(): + norm = self.norm(hidden_states) + lm_head = self.lm_head(norm).float() + with torch.no_grad(): - output_token = sample_logits( - hs_lm_head[:, -1, :], + logits = sample_logits( + lm_head[:, -1, :], TEMP, TOP_P, TOP_K ).cpu().numpy().flatten() - if DEBUG >= 2: - print(f"hs_norm: {hs_norm}") - print(f"hs_lm_head: {hs_lm_head}") - print(f"output_token: {output_token}") + if DEBUG >= 3: + print( + self.tokenizer.batch_decode( + logits, + skip_special_tokens=True + )[0] + ) - return output_token - - return hidden_states.cpu().numpy() - - def forward_layers_cached( - self, - input_data: torch.tensor, - past_kvs: Cache = DynamicCache() - ) -> Tuple[np.ndarray, list]: - """ - Forward pass through the specified layers. - With caching - - Note: past_key_values not working for model, might be a library bug - """ - if DEBUG >= 2: - print("forward_layer call") - print(f"input_data: {input_data}") - print(f"shard {self.shard.to_dict()}") - - hidden_states = input_data - position_ids = None - position_embeddings = None + return (logits, next_kvs) - if self.shard.is_first_layer(): - hidden_states = self.embed_tokens(hidden_states) - - if DEBUG >= 2: - print(f"hidden_states: {hidden_states}") - print(f"hidden_states.size(): {hidden_states.size()}") - - batch_size, seq_len = input_data.size() - position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) - - position_embeddings = self.full_model.model.rotary_emb( - hidden_states, - position_ids - ) + return ( + hidden_states.cpu().numpy(), + next_kvs + ) - # if DEBUG >= 2: - # print(f"embedded hidden_states {hidden_states}") - # print(f"position_ids: {position_embeddings}") + # def forward_layers( + # self, + # input_data: torch.tensor + # ) -> np.ndarray: + # """ + # Forward pass through the specified layers. + # This is without caching + + # Note: past_key_values not working for model, might be a library bug + # """ + # if DEBUG >= 2: + # print("forward_layer call") + # print(f"input_data: {input_data}") + # print(f"shard {self.shard.to_dict()}") + + # hidden_states = input_data + + # # Forward pass through the layer + # if DEBUG >= 2: + # print(f"\n[layer model] {self.llm_model.model}") + # print(f"IN hidden_states {hidden_states}") + + # layer_outputs = self.llm_model.model( + # hidden_states.to(self.device), + # use_cache=False + # ) + # if DEBUG >= 2: + # print(f"OUT hidden_states {layer_outputs.last_hidden_state}") - # Forward pass through the layer - if DEBUG >= 2: - print(f"IN hidden_states {hidden_states}") - print(f"past_kvs {past_kvs}") + # hidden_states = layer_outputs.last_hidden_state + + # print(f"2 is_last_layer {self.shard.is_last_layer()}") + # if self.shard.is_last_layer(): + # hs_norm = self.norm(hidden_states) + # hs_lm_head = self.llm_model.lm_head(hs_norm).float() + + # # Use the sampling function with default settings + # with torch.no_grad(): + # output_token = sample_logits( + # hs_lm_head[:, -1, :], + # TEMP, + # TOP_P, + # TOP_K + # ).cpu().numpy().flatten() + + # if DEBUG >= 2: + # print(f"hs_norm: {hs_norm}") + # print(f"hs_lm_head: {hs_lm_head}") + # print(f"output_token: {output_token}") + + # return output_token - layer_outputs = self.full_model.model( - hidden_states, - position_ids=position_ids, - inputs_embeds=position_embeddings, - past_key_values=past_kvs, - use_cache=True - ) + # return hidden_states.cpu().numpy() + + # def forward_layers_cached( + # self, + # input_data: torch.tensor, + # past_kvs + # ) -> Tuple[np.ndarray, list]: + # """ + # Forward pass through the specified layers. + # With caching + + # Note: past_key_values not working for model, might be a library bug + # """ + + # if not past_kvs: + # past_kvs = DynamicCache() + # else: + # past_kvs = DynamicCache.from_legacy_cache(past_kvs) + + # if DEBUG >= 2: + # print("forward_layer call") + # print(f"input_data: {input_data}") + # print(f"shard {self.shard.to_dict()}") + # print(f"past_kvs: {past_kvs}") + + # input_ids = input_data.to(self.device) + # position_ids = None + # # position_embeddings = None + + # inputs_embeds = self.embed_tokens(input_ids) + + # if self.shard.is_first_layer(): + # hidden_states = self.embed_tokens(hidden_states) + + # if DEBUG >= 2: + # print(f"hidden_states: {hidden_states}") + # print(f"hidden_states.size(): {hidden_states.size()}") + + # batch_size, seq_len = input_data.size() + # position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) + + # # check if model does not have rotary emb + # # have to apply rotary per model + # # embedding seems very model specific and using position_ids + # # seems more universal, even though some give warning about it + # # if re.match(r"Qwen|qwen", self.shard.model_id): + # # import transformers.models.qwen2.modeling_qwen2 as qwen2 + # # position_embeddings = + # # q=hidden_states, + # # position_ids=position_ids + # # ) + # # else: + # # position_embeddings = self.llm_model.model.rotary_emb( + # # hidden_states, + # # position_ids + # # ) + + # # if DEBUG >= 2: + # # print(f"embedded hidden_states {hidden_states}") + # # print(f"position_ids: {position_embeddings}") - if DEBUG >= 2: - print(f"\nlayer_outputs: {layer_outputs}") - hidden_states = layer_outputs.last_hidden_state - present_kvs = layer_outputs.past_key_values - - print(f"2 is_last_layer {self.shard.is_last_layer()}") - if self.shard.is_last_layer(): - hs_norm = self.norm(hidden_states) - hs_lm_head = self.full_model.lm_head(hs_norm).float() - - # Use the sampling function with default settings - output_token = sample_logits( - hs_lm_head[:, -1, :], - TEMP, - TOP_P, - TOP_K - ).numpy() - - if DEBUG >= 2: - print(f"hs_norm: {hs_norm}") - print(f"hs_lm_head: {hs_lm_head}") - print(f"output_token: {output_token}") - - return (output_token, present_kvs) + # # Forward pass through the layer + # if DEBUG >= 2: + # print(f"IN hidden_states {hidden_states}") + # print(f"past_kvs {past_kvs}") + + # layer_outputs = self.llm_model.model( + # hidden_states, + # position_ids=position_ids, + # past_key_values=past_kvs, + # use_cache=True + # ) + + # if DEBUG >= 2: + # print(f"\nlayer_outputs: {layer_outputs}") + + # hidden_states = layer_outputs.last_hidden_state + # present_kvs = layer_outputs.past_key_values + + # print(f"2 is_last_layer {self.shard.is_last_layer()}") + # if self.shard.is_last_layer(): + # hs_norm = self.norm(hidden_states) + # hs_lm_head = self.llm_model.lm_head(hs_norm).float() + + # # Use the sampling function with default settings + # with torch.no_grad(): + # output_token = sample_logits( + # hs_lm_head[:, -1, :], + # TEMP, + # TOP_P, + # TOP_K + # ).cpu().numpy().flatten() + + # if DEBUG >= 2: + # print(f"hs_norm: {hs_norm}") + # print(f"hs_lm_head: {hs_lm_head}") + # print(f"output_token: {output_token}") + + # return (output_token, present_kvs) - return (hidden_states.numpy(), present_kvs) \ No newline at end of file + # return (hidden_states.cpu().numpy(), present_kvs) \ No newline at end of file From 3beea222d43bdce04b448abc43e7bd2ddcb61a6d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 26 Aug 2024 03:08:47 -0800 Subject: [PATCH 305/589] updates to caching, stuck on issue with infer_prompt and infer_tensor where data from infer_prompt is not complete --- exo/inference/pytorch/inference.py | 56 ++++++++++++------ exo/inference/pytorch/model/hf.py | 91 ++++++++++++++++++------------ 2 files changed, 93 insertions(+), 54 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 308421743..95d71f333 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -12,8 +12,6 @@ from exo.helpers import DEBUG from transformers import DynamicCache -from exo.inference.pytorch.model.utils import sample_logits - class PyTorchDynamicShardInferenceEngine(InferenceEngine): """ PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. @@ -50,15 +48,24 @@ async def infer_prompt( # need to make this so inference_state is not a string # cant use it with dynamic cache - tokens = self.tokenizer.encode(prompt, return_tensors="pt") + tokens = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device) + tokens = self.model.embed_tokens(tokens) + current_kvs = None + if DEBUG >= 4: + print("infer_prompt called") + print(f"tokens: {tokens}\n") + print(f"layer_count: {self.shard.get_layer_count()}") + print(f"is_first_layer: {self.shard.is_first_layer()}") + print(f"is_last_layer: {self.shard.is_last_layer()}") + if self.use_cache: # convert inference_state or cache from json to DynamicCache past_kv = DynamicCache() if inference_state != None: cache_dict = json.loads(inference_state) - past_kv.key_cache = [torch.tensor(data) for data in cache_dict['key_cache']] - past_kv.value_cache = [torch.tensor(data) for data in cache_dict['value_cache']] + past_kv.key_cache = [torch.tensor(data).to(self.device) for data in cache_dict['key_cache']] + past_kv.value_cache = [torch.tensor(data).to(self.device) for data in cache_dict['value_cache']] output_data, current_kvs = self.model.forward( tokens, @@ -74,8 +81,6 @@ async def infer_prompt( is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] if DEBUG >= 4: - print("infer_prompt called") - print(f"tokens: {tokens}\n") print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") @@ -88,8 +93,6 @@ async def infer_prompt( print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") if self.use_cache: - # legacy_cache = current_kvs.to_legacy_cache() - print(current_kvs.key_cache) cache_dict = { 'key_cache': [tensor.tolist() for tensor in current_kvs.key_cache], 'value_cache': [tensor.tolist() for tensor in current_kvs.value_cache] @@ -111,15 +114,35 @@ async def infer_tensor( await self.ensure_shard(shard) - in_tensor = torch.tensor(input_data) + current_kvs = None + + in_tensor = torch.tensor( + input_data, + device=self.device + ) + + if in_tensor.dim() == 1: + in_tensor = in_tensor.unsqueeze(1) + + in_tensor = self.model.embed_tokens(in_tensor) + if DEBUG >= 4: + print("infer_tensor called") + print(f"input_data: {input_data}") + print(f"input_data.size: {input_data.size}") + print(f"input_tensor: {in_tensor}\n") + print(f"shard: {self.shard}") + print(f"layer_count: {self.shard.get_layer_count()}") + print(f"is_first_layer: {self.shard.is_first_layer()}") + print(f"is_last_layer: {self.shard.is_last_layer()}") + if self.use_cache: # convert inference_state or cache from json to DynamicCache past_kv = DynamicCache() if inference_state != None: cache_dict = json.loads(inference_state) - past_kv.key_cache = [torch.tensor(data) for data in cache_dict['key_cache']] - past_kv.value_cache = [torch.tensor(data) for data in cache_dict['value_cache']] + past_kv.key_cache = [torch.tensor(data).to(self.device) for data in cache_dict['key_cache']] + past_kv.value_cache = [torch.tensor(data).to(self.device) for data in cache_dict['value_cache']] output_data, current_kvs = self.model.forward( in_tensor, @@ -135,8 +158,6 @@ async def infer_tensor( is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] if DEBUG >= 4: - print("infer_tensor called") - print(f"input_data: {input_data}\n") print(f"in_tensor: {in_tensor}\n") print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") @@ -148,11 +169,10 @@ async def infer_tensor( print(f"size 1 output_data.item() {output_data.item()}") print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") - if self.use_cache: - legacy_cache = current_kvs.to_legacy_cache() + if self.use_cache and current_kvs: cache_dict = { - 'key_cache': [tensor.tolist() for tensor in legacy_cache.key_cache], - 'value_cache': [tensor.tolist() for tensor in legacy_cache.value_cache] + 'key_cache': [tensor.tolist() for tensor in current_kvs.key_cache], + 'value_cache': [tensor.tolist() for tensor in current_kvs.value_cache] } return ( diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index a83454663..fd907b245 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -8,7 +8,7 @@ from exo.helpers import DEBUG from typing import Tuple, Optional, Union, List -from .utils import sample_logits +from exo.inference.pytorch.model.utils import sample_logits TOP_P = 0.75 #0.95 TOP_K = 20 @@ -30,7 +30,7 @@ def __init__(self, shard: Shard, tokenizer: any): try: self.llm_model = AutoModelForCausalLM.from_pretrained( shard.model_id, - torch_dtype="auto", + torch_dtype=torch.float32, device_map="auto", # offload_buffers=True ) @@ -64,6 +64,7 @@ def __init__(self, shard: Shard, tokenizer: any): # used for doing what forward LlamaModel does in transformers self.norm = self.llm_model.model.norm self.lm_head = self.llm_model.lm_head + self.embed_tokens = self.base_model.embed_tokens def forward( self, @@ -88,65 +89,83 @@ def forward( https://github.com/huggingface/transformers/blob/v4.44.2/src/transformers/models/qwen2/modeling_qwen2.py#L804 https://github.com/huggingface/transformers/blob/v4.44.2/src/transformers/models/llama/modeling_llama.py#L887 """ - - if self.shard.is_first_layer(): - inputs_embeds = self.base_model.embed_tokens(input_ids.to(self.device)) - - if use_cache: - past_kvs = DynamicCache.from_legacy_cache(past_kvs) - + if DEBUG >= 4: + print("forward called") + print(f"input_ids: {input_ids}\n") + print(f"layer_count: {self.shard.get_layer_count()}") + print(f"is_first_layer: {self.shard.is_first_layer()}") + print(f"is_last_layer: {self.shard.is_last_layer()}") + + if use_cache: + past_kvs = DynamicCache.from_legacy_cache(past_kvs) past_seen_tokens = past_kvs.get_seq_length() if past_kvs is not None else 0 - cache_position = torch.arange( - past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device - ) + + # if self.shard.is_first_layer(): + # inputs_embeds = self.embed_tokens(input_ids) + + # cache_position = torch.arange( + # past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device + # ).to(self.device) - position_ids = cache_position.unsqueeze(0) + # position_ids = cache_position.unsqueeze(0).to(self.device) - hidden_states = inputs_embeds + # hidden_states = inputs_embeds # progress through layers for decoder_layer in self.layers: + if DEBUG >= 4: + print("Going through layer") + print(f"{decoder_layer}") + layer_outputs = decoder_layer( - hidden_states, - position_ids=position_ids, + input_ids, + # position_ids=position_ids, past_key_value=past_kvs, use_cache=use_cache, - cache_position=cache_position, + # cache_position=cache_position, ) hidden_states = layer_outputs[0] - next_kvs = layer_outputs[1] + if use_cache: + next_kvs = layer_outputs[1] if DEBUG >= 3: print(f"hidden_state: {hidden_states}") print(f"next_kvs: {next_kvs}") - + if self.shard.is_last_layer(): - norm = self.norm(hidden_states) - lm_head = self.lm_head(norm).float() - + hs_norm = self.norm(hidden_states) + hs_lm_head = self.llm_model.lm_head(hs_norm).float() + + # Use the sampling function with default settings with torch.no_grad(): - logits = sample_logits( - lm_head[:, -1, :], + output_token = sample_logits( + hs_lm_head[:, -1, :], TEMP, TOP_P, TOP_K ).cpu().numpy().flatten() - if DEBUG >= 3: - print( - self.tokenizer.batch_decode( - logits, - skip_special_tokens=True - )[0] - ) + if DEBUG >= 2: + print(f"hs_norm: {hs_norm}") + print(f"hs_lm_head: {hs_lm_head}") + print(f"output_token: {output_token}") - return (logits, next_kvs) + if use_cache: + return (output_token, next_kvs) + + return output_token + + with torch.no_grad(): + out_hidden_states = hidden_states.cpu().numpy() - return ( - hidden_states.cpu().numpy(), - next_kvs - ) + if use_cache: + return ( + out_hidden_states, + next_kvs + ) + + return out_hidden_states # def forward_layers( # self, From 87a14ca7be29768d498290b48c5de0fb76cc61a4 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 26 Aug 2024 03:23:35 -0800 Subject: [PATCH 306/589] trying to fix infer problems --- exo/inference/pytorch/inference.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 95d71f333..ad6e8f3a5 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -121,10 +121,10 @@ async def infer_tensor( device=self.device ) - if in_tensor.dim() == 1: - in_tensor = in_tensor.unsqueeze(1) + # if in_tensor.dim() == 1: + # in_tensor = in_tensor.unsqueeze(1) - in_tensor = self.model.embed_tokens(in_tensor) + # in_tensor = self.model.embed_tokens(in_tensor) if DEBUG >= 4: print("infer_tensor called") From 356bf2f56dfc984406bb80c4dfc8fac11c644e64 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 26 Aug 2024 13:53:38 -0800 Subject: [PATCH 307/589] switched everything to use caching, did more prep for encoding the token/logit coming from infer_tensor to infer_prompt, running into OOM issues trying on server --- exo/inference/pytorch/inference.py | 103 ++++++++---------- exo/inference/pytorch/model/hf.py | 48 +++----- .../pytorch/test_inference_engine.py | 86 +++++++++------ 3 files changed, 116 insertions(+), 121 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index ad6e8f3a5..200b1c4c7 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -12,6 +12,8 @@ from exo.helpers import DEBUG from transformers import DynamicCache + + class PyTorchDynamicShardInferenceEngine(InferenceEngine): """ PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. @@ -29,11 +31,6 @@ def __init__(self, shard): self.tokenizer = None self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - if os.getenv("TORCH_CACHED") == "True": - self.use_cache = True - else: - self.use_cache = False - async def infer_prompt( self, request_id: str, @@ -59,24 +56,17 @@ async def infer_prompt( print(f"is_first_layer: {self.shard.is_first_layer()}") print(f"is_last_layer: {self.shard.is_last_layer()}") - if self.use_cache: - # convert inference_state or cache from json to DynamicCache - past_kv = DynamicCache() - if inference_state != None: - cache_dict = json.loads(inference_state) - past_kv.key_cache = [torch.tensor(data).to(self.device) for data in cache_dict['key_cache']] - past_kv.value_cache = [torch.tensor(data).to(self.device) for data in cache_dict['value_cache']] - - output_data, current_kvs = self.model.forward( - tokens, - past_kv, - use_cache=True - ) - else: - output_data = self.model.forward( - tokens, - use_cache=False - ) + # convert inference_state or cache from json to DynamicCache + past_kv = DynamicCache() + if inference_state != None: + cache_dict = json.loads(inference_state) + past_kv.key_cache = [torch.tensor(data).to(self.device) for data in cache_dict['key_cache']] + past_kv.value_cache = [torch.tensor(data).to(self.device) for data in cache_dict['value_cache']] + + output_data, current_kvs = self.model.forward( + tokens, + past_kv + ) is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] @@ -92,15 +82,14 @@ async def infer_prompt( print(f"size 1 output_data.item() {output_data.item()}") print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") - if self.use_cache: - cache_dict = { - 'key_cache': [tensor.tolist() for tensor in current_kvs.key_cache], - 'value_cache': [tensor.tolist() for tensor in current_kvs.value_cache] - } + cache_dict = { + 'key_cache': [tensor.tolist() for tensor in current_kvs.key_cache], + 'value_cache': [tensor.tolist() for tensor in current_kvs.value_cache] + } return ( output_data, - json.dumps(cache_dict) if self.use_cache else "", + json.dumps(cache_dict), is_finished ) @@ -116,15 +105,13 @@ async def infer_tensor( current_kvs = None - in_tensor = torch.tensor( - input_data, - device=self.device - ) - - # if in_tensor.dim() == 1: - # in_tensor = in_tensor.unsqueeze(1) + if input_data.size == 1: + in_tensor = torch.tensor( + input_data, + device=self.device + ).unsqueeze(0).long() - # in_tensor = self.model.embed_tokens(in_tensor) + in_tensor = self.model.embed_tokens(in_tensor) if DEBUG >= 4: print("infer_tensor called") @@ -136,24 +123,26 @@ async def infer_tensor( print(f"is_first_layer: {self.shard.is_first_layer()}") print(f"is_last_layer: {self.shard.is_last_layer()}") - if self.use_cache: - # convert inference_state or cache from json to DynamicCache - past_kv = DynamicCache() - if inference_state != None: + # convert inference_state or cache from json to DynamicCache + past_kv = DynamicCache() + if inference_state != None: + try: cache_dict = json.loads(inference_state) past_kv.key_cache = [torch.tensor(data).to(self.device) for data in cache_dict['key_cache']] past_kv.value_cache = [torch.tensor(data).to(self.device) for data in cache_dict['value_cache']] - output_data, current_kvs = self.model.forward( - in_tensor, - past_kv, - use_cache=True - ) - else: - output_data = self.model.forward( - in_tensor, - use_cache=False - ) + if DEBUG >= 4: + print("Loaded past_kv from JSON") + print(f"past_kv: {past_kv}") + print(f"past_kv.key_cache len: {len(past_kv.key_cache)}") + print(f"past_kv.value_cache len: {len(past_kv.value_cache)}") + except json.JSONDecodeError: + print(f"ERROR DECODING INFERENCE STATE") + + output_data, current_kvs = self.model.forward( + in_tensor, + past_kv + ) is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] @@ -169,15 +158,15 @@ async def infer_tensor( print(f"size 1 output_data.item() {output_data.item()}") print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") - if self.use_cache and current_kvs: - cache_dict = { - 'key_cache': [tensor.tolist() for tensor in current_kvs.key_cache], - 'value_cache': [tensor.tolist() for tensor in current_kvs.value_cache] - } + + cache_dict = { + 'key_cache': [tensor.tolist() for tensor in current_kvs.key_cache], + 'value_cache': [tensor.tolist() for tensor in current_kvs.value_cache] + } return ( output_data, - json.dumps(cache_dict) if self.use_cache else "", + json.dumps(cache_dict), is_finished ) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index fd907b245..cdb7d7c07 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -55,7 +55,7 @@ def __init__(self, shard: Shard, tokenizer: any): layers.append(layer) - self.layers = nn.ModuleList(layers).to(self.device) + self.layers = nn.ModuleList(layers) if DEBUG >= 2: print(f"full_model.model layer: {len(self.llm_model.model.layers)}") @@ -70,7 +70,6 @@ def forward( self, input_ids: torch.tensor, past_kvs: Optional[Union[Cache, List[torch.FloatTensor]]] = None, - use_cache: bool = True ) -> Tuple[np.ndarray, any]: """ Forward through layers using the base model @@ -96,20 +95,16 @@ def forward( print(f"is_first_layer: {self.shard.is_first_layer()}") print(f"is_last_layer: {self.shard.is_last_layer()}") - if use_cache: - past_kvs = DynamicCache.from_legacy_cache(past_kvs) - past_seen_tokens = past_kvs.get_seq_length() if past_kvs is not None else 0 - - # if self.shard.is_first_layer(): - # inputs_embeds = self.embed_tokens(input_ids) - - # cache_position = torch.arange( - # past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device - # ).to(self.device) + past_kvs = DynamicCache.from_legacy_cache(past_kvs) + past_seen_tokens = past_kvs.get_seq_length() if past_kvs is not None else 0 - # position_ids = cache_position.unsqueeze(0).to(self.device) + cache_position = torch.arange( + past_seen_tokens, + past_seen_tokens + input_ids.shape[1], + device=input_ids.device + ).to(self.device) - # hidden_states = inputs_embeds + position_ids = cache_position.unsqueeze(0).to(self.device) # progress through layers for decoder_layer in self.layers: @@ -119,15 +114,14 @@ def forward( layer_outputs = decoder_layer( input_ids, - # position_ids=position_ids, + position_ids=position_ids, past_key_value=past_kvs, - use_cache=use_cache, - # cache_position=cache_position, + use_cache=True, + cache_position=cache_position, ) hidden_states = layer_outputs[0] - if use_cache: - next_kvs = layer_outputs[1] + next_kvs = layer_outputs[1] if DEBUG >= 3: print(f"hidden_state: {hidden_states}") @@ -151,21 +145,15 @@ def forward( print(f"hs_lm_head: {hs_lm_head}") print(f"output_token: {output_token}") - if use_cache: - return (output_token, next_kvs) - - return output_token + return (output_token, next_kvs) with torch.no_grad(): out_hidden_states = hidden_states.cpu().numpy() - if use_cache: - return ( - out_hidden_states, - next_kvs - ) - - return out_hidden_states + return ( + out_hidden_states, + next_kvs + ) # def forward_layers( # self, diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index ffb5a10fb..725130e2e 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -9,33 +9,68 @@ import os import numpy as np -async def test_inference_engine(inference_engine_1: InferenceEngine, inference_engine_2: InferenceEngine, model_id: str): +async def test_inference_engine(inference_engine_1: InferenceEngine, inference_engine_2: InferenceEngine, model_id: str, n_layers: int): + # prompt = "Why is the sky blue?" prompt = "In a single word only, what is the last name of the current president of the USA?" - resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt("A", shard=Shard(model_id=model_id, start_layer=0, end_layer=31, n_layers=32), prompt=prompt) + + shard = Shard( + model_id=model_id, + start_layer=0, + end_layer=n_layers-1, + n_layers=n_layers + ) + + resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( + "A", + shard=shard, + prompt=prompt + ) + next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( "A", - shard=Shard(model_id=model_id, start_layer=0, end_layer=31, n_layers=32), + shard=shard, input_data=resp_full, inference_state=inference_state_full, ) - pp = 15 - resp1, inference_state_1, _ = await inference_engine_1.infer_prompt("B", shard=Shard(model_id=model_id, start_layer=0, end_layer=pp, n_layers=32), prompt=prompt) + pp = int(n_layers/2) + resp_shard = Shard( + model_id=model_id, + start_layer=0, + end_layer=pp, + n_layers=n_layers + ) + + resp_shard2 = Shard( + model_id=model_id, + start_layer=pp + 1, + end_layer=n_layers-1, + n_layers=n_layers + ) + + resp1, inference_state_1, _ = await inference_engine_1.infer_prompt( + "B", + shard=resp_shard, + prompt=prompt + ) + resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( "B", - shard=Shard(model_id=model_id, start_layer=pp + 1, end_layer=31, n_layers=32), + shard=resp_shard2, input_data=resp1, inference_state=inference_state_1, ) + resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( "B", - shard=Shard(model_id=model_id, start_layer=0, end_layer=pp, n_layers=32), + shard=resp_shard, input_data=resp2, inference_state=inference_state_2, ) + resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( "B", - shard=Shard(model_id=model_id, start_layer=pp + 1, end_layer=31, n_layers=32), + shard=resp_shard2, input_data=resp3, inference_state=inference_state_3, ) @@ -43,35 +78,18 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e assert np.array_equal(resp_full, resp2) assert np.array_equal(next_resp_full, resp4) -def single_test(): - shard = Shard( - model_id="meta-llama/Meta-Llama-3.1-8B", - start_layer=0, - end_layer=0, - n_layers=32 - ) - - engine = PyTorchDynamicShardInferenceEngine(shard) - - - # Prepare the prompt - prompt = "Why is the sky blue?" - - # Run inference - loop = asyncio.get_event_loop() - output_data, _, _ = loop.run_until_complete( - engine.infer_prompt( - request_id="test_request", shard=shard, prompt=prompt - ) - ) - - assert output_data is not None - if __name__ == '__main__': - # single_test() + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "Qwen/Qwen2-0.5B-Instruct", + # 25 + # )) + asyncio.run(test_inference_engine( PyTorchDynamicShardInferenceEngine(HFShardDownloader()), PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "andrijdavid/Llama3-2B-Base", + "andrijdavid/Llama3-1B-Base", + 3 )) From aa8903285006c8c75c15b774373ccf38786dfc32 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 26 Aug 2024 14:03:41 -0800 Subject: [PATCH 308/589] fixing test --- exo/inference/pytorch/test_inference_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index 725130e2e..7b8396529 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -1,7 +1,7 @@ import asyncio from exo.inference.shard import Shard -from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine +from .inference import PyTorchDynamicShardInferenceEngine from exo.download.hf.hf_shard_download import HFShardDownloader from exo.inference.inference_engine import InferenceEngine from exo.inference.shard import Shard From b9331d70319a0b928e8877b231798692538c4899 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 26 Aug 2024 14:05:01 -0800 Subject: [PATCH 309/589] adding init py for old python versions --- exo/inference/pytorch/__init__.py | 0 exo/inference/pytorch/model/__init__.py | 0 exo/inference/pytorch/test_inference_engine.py | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 exo/inference/pytorch/__init__.py create mode 100644 exo/inference/pytorch/model/__init__.py diff --git a/exo/inference/pytorch/__init__.py b/exo/inference/pytorch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/exo/inference/pytorch/model/__init__.py b/exo/inference/pytorch/model/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index 7b8396529..725130e2e 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -1,7 +1,7 @@ import asyncio from exo.inference.shard import Shard -from .inference import PyTorchDynamicShardInferenceEngine +from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine from exo.download.hf.hf_shard_download import HFShardDownloader from exo.inference.inference_engine import InferenceEngine from exo.inference.shard import Shard From 2c7aa9c7b818b23cbbfaa30f6108eeb515904f90 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 26 Aug 2024 14:59:59 -0800 Subject: [PATCH 310/589] update readme and add in init pys --- exo/download/__init__.py | 0 exo/download/hf/__init__.py | 0 exo/inference/pytorch/README.md | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 exo/download/__init__.py create mode 100644 exo/download/hf/__init__.py diff --git a/exo/download/__init__.py b/exo/download/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/exo/download/hf/__init__.py b/exo/download/hf/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/exo/inference/pytorch/README.md b/exo/inference/pytorch/README.md index 8cb0ce076..f87ee3898 100644 --- a/exo/inference/pytorch/README.md +++ b/exo/inference/pytorch/README.md @@ -6,7 +6,7 @@ Experimental, still under development Install needed py modules, make sure to be using CUDA 12.4 for the PyTorch install ```console -$ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 +$ pip install torch --index-url https://download.pytorch.org/whl/cu124 $ pip install transformers accelerate ``` From 6da3e942173fc20778fae2ca9aaeb4bd97d567c0 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 26 Aug 2024 15:04:24 -0800 Subject: [PATCH 311/589] adding more tests --- .../pytorch/test_inference_engine.py | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index 725130e2e..d12aaf01a 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -79,13 +79,15 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e assert np.array_equal(next_resp_full, resp4) if __name__ == '__main__': - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "Qwen/Qwen2-0.5B-Instruct", - # 25 - # )) + print(f"\n\n -------- TEST QWEN2 -------- \n\n") + asyncio.run(test_inference_engine( + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + "Qwen/Qwen2-0.5B-Instruct", + 25 + )) + print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") asyncio.run(test_inference_engine( PyTorchDynamicShardInferenceEngine(HFShardDownloader()), PyTorchDynamicShardInferenceEngine(HFShardDownloader()), @@ -93,3 +95,11 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e 3 )) + print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") + asyncio.run(test_inference_engine( + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + "meta-llama/Meta-Llama-3.1-8B", + 32 + )) + From d0bc93c1471e28d509c9e9836953b65b9cc9c8e7 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 26 Aug 2024 15:07:28 -0800 Subject: [PATCH 312/589] adding more try catch to move through tests --- .../pytorch/test_inference_engine.py | 53 +++++++++++-------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index d12aaf01a..e540516c5 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -80,26 +80,35 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e if __name__ == '__main__': print(f"\n\n -------- TEST QWEN2 -------- \n\n") - asyncio.run(test_inference_engine( - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "Qwen/Qwen2-0.5B-Instruct", - 25 - )) - - print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") - asyncio.run(test_inference_engine( - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "andrijdavid/Llama3-1B-Base", - 3 - )) - - print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") - asyncio.run(test_inference_engine( - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "meta-llama/Meta-Llama-3.1-8B", - 32 - )) + try: + asyncio.run(test_inference_engine( + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + "Qwen/Qwen2-0.5B-Instruct", + 24 + )) + except Exception as err: + print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") + + try: + print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") + asyncio.run(test_inference_engine( + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + "andrijdavid/Llama3-1B-Base", + 3 + )) + except Exception as err: + print(f"\n\n !!!!!!!!!!! LLAMA3-1B-Base TEST FAILED \n{err}\n") + + try: + print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") + asyncio.run(test_inference_engine( + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + "meta-llama/Meta-Llama-3.1-8B", + 32 + )) + except Exception as err: + print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") From 0e221b27f8947074d9ccda641272f10d6c297543 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 26 Aug 2024 15:12:52 -0800 Subject: [PATCH 313/589] tests --- .../pytorch/test_inference_engine.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index e540516c5..c71d3070d 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -80,26 +80,26 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e if __name__ == '__main__': print(f"\n\n -------- TEST QWEN2 -------- \n\n") - try: - asyncio.run(test_inference_engine( - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "Qwen/Qwen2-0.5B-Instruct", - 24 - )) - except Exception as err: - print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") - - try: - print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") - asyncio.run(test_inference_engine( - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "andrijdavid/Llama3-1B-Base", - 3 - )) - except Exception as err: - print(f"\n\n !!!!!!!!!!! LLAMA3-1B-Base TEST FAILED \n{err}\n") + # try: + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "Qwen/Qwen2-0.5B-Instruct", + # 24 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") + + # try: + # print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "andrijdavid/Llama3-1B-Base", + # 3 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! LLAMA3-1B-Base TEST FAILED \n{err}\n") try: print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") From 9fc9fdb166dfe7cf6b4bd794a190b03ef3873cee Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 26 Aug 2024 15:25:43 -0800 Subject: [PATCH 314/589] added position embeddings, update test --- exo/inference/pytorch/model/hf.py | 33 ++++++++++++------- .../pytorch/test_inference_engine.py | 6 +++- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index cdb7d7c07..484e6c4df 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -43,12 +43,12 @@ def __init__(self, shard: Shard, tokenizer: any): if DEBUG >= 2: print(f"\nShardedHuggingFaceModel init with shard {shard}") print(f"self.llm_model: {self.llm_model}") - print(f"self.llm_model.model: {self.llm_model.model}") + print(f"self.base_model: {self.base_model}") # load layers from base model to use layers = [] for i in range(shard.start_layer, shard.end_layer + 1): - layer = self.llm_model.model.layers[i] + layer = self.base_model.layers[i] if DEBUG >= 2: print(f"Loading layers[{i}]") @@ -58,11 +58,11 @@ def __init__(self, shard: Shard, tokenizer: any): self.layers = nn.ModuleList(layers) if DEBUG >= 2: - print(f"full_model.model layer: {len(self.llm_model.model.layers)}") + print(f"full_model.model layer: {len(self.base_model.layers)}") # Embeddings and final layer norm # used for doing what forward LlamaModel does in transformers - self.norm = self.llm_model.model.norm + self.norm = self.base_model.norm self.lm_head = self.llm_model.lm_head self.embed_tokens = self.base_model.embed_tokens @@ -106,6 +106,15 @@ def forward( position_ids = cache_position.unsqueeze(0).to(self.device) + try: + position_embeddings = self.base_model.rotary_emb( + input_ids, + position_ids + ) + except Exception as err: + print(f"rotary_emb not found in base_model") + position_embeddings = None + # progress through layers for decoder_layer in self.layers: if DEBUG >= 4: @@ -114,7 +123,8 @@ def forward( layer_outputs = decoder_layer( input_ids, - position_ids=position_ids, + position_ids=position_ids if not position_embeddings else None, + position_embeddings=position_embeddings, past_key_value=past_kvs, use_cache=True, cache_position=cache_position, @@ -124,8 +134,7 @@ def forward( next_kvs = layer_outputs[1] if DEBUG >= 3: - print(f"hidden_state: {hidden_states}") - print(f"next_kvs: {next_kvs}") + print(f"layer_outputs {layer_outputs}") if self.shard.is_last_layer(): hs_norm = self.norm(hidden_states) @@ -138,7 +147,7 @@ def forward( TEMP, TOP_P, TOP_K - ).cpu().numpy().flatten() + ).numpy(force=True).flatten() if DEBUG >= 2: print(f"hs_norm: {hs_norm}") @@ -174,10 +183,10 @@ def forward( # # Forward pass through the layer # if DEBUG >= 2: - # print(f"\n[layer model] {self.llm_model.model}") + # print(f"\n[layer model] {self.base_model}") # print(f"IN hidden_states {hidden_states}") - # layer_outputs = self.llm_model.model( + # layer_outputs = self.base_model( # hidden_states.to(self.device), # use_cache=False # ) @@ -260,7 +269,7 @@ def forward( # # position_ids=position_ids # # ) # # else: - # # position_embeddings = self.llm_model.model.rotary_emb( + # # position_embeddings = self.base_model.rotary_emb( # # hidden_states, # # position_ids # # ) @@ -275,7 +284,7 @@ def forward( # print(f"IN hidden_states {hidden_states}") # print(f"past_kvs {past_kvs}") - # layer_outputs = self.llm_model.model( + # layer_outputs = self.base_model( # hidden_states, # position_ids=position_ids, # past_key_values=past_kvs, diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index c71d3070d..e4e0e0785 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -26,6 +26,8 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e prompt=prompt ) + print(f"resp_full: {resp_full}") + next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( "A", shard=shard, @@ -33,6 +35,8 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e inference_state=inference_state_full, ) + print(f"next_resp_full: {next_resp_full}") + pp = int(n_layers/2) resp_shard = Shard( model_id=model_id, @@ -79,8 +83,8 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e assert np.array_equal(next_resp_full, resp4) if __name__ == '__main__': - print(f"\n\n -------- TEST QWEN2 -------- \n\n") # try: + # print(f"\n\n -------- TEST QWEN2 -------- \n\n") # asyncio.run(test_inference_engine( # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), From 2635b4c7218c650e93289ab6dcc90d43af2e2d17 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 26 Aug 2024 15:30:57 -0800 Subject: [PATCH 315/589] tests --- .../pytorch/test_inference_engine.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index e4e0e0785..15337d53f 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -94,25 +94,25 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e # except Exception as err: # print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") - # try: - # print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "andrijdavid/Llama3-1B-Base", - # 3 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! LLAMA3-1B-Base TEST FAILED \n{err}\n") - try: - print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") + print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") asyncio.run(test_inference_engine( PyTorchDynamicShardInferenceEngine(HFShardDownloader()), PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "meta-llama/Meta-Llama-3.1-8B", - 32 + "andrijdavid/Llama3-1B-Base", + 3 )) except Exception as err: - print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") + print(f"\n\n !!!!!!!!!!! LLAMA3-1B-Base TEST FAILED \n{err}\n") + + # try: + # print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "meta-llama/Meta-Llama-3.1-8B", + # 32 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") From 86e89eb8ddf2ac2933a317a4e688b2c59449ca1d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 26 Aug 2024 18:35:32 -0800 Subject: [PATCH 316/589] adding back tests --- .../pytorch/test_inference_engine.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index 15337d53f..b690f02e5 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -83,16 +83,16 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e assert np.array_equal(next_resp_full, resp4) if __name__ == '__main__': - # try: - # print(f"\n\n -------- TEST QWEN2 -------- \n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "Qwen/Qwen2-0.5B-Instruct", - # 24 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") + try: + print(f"\n\n -------- TEST QWEN2 -------- \n\n") + asyncio.run(test_inference_engine( + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + "Qwen/Qwen2-0.5B-Instruct", + 24 + )) + except Exception as err: + print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") try: print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") @@ -105,14 +105,14 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e except Exception as err: print(f"\n\n !!!!!!!!!!! LLAMA3-1B-Base TEST FAILED \n{err}\n") - # try: - # print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "meta-llama/Meta-Llama-3.1-8B", - # 32 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") + try: + print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") + asyncio.run(test_inference_engine( + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + "meta-llama/Meta-Llama-3.1-8B", + 32 + )) + except Exception as err: + print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") From 64fbacd6af05434d434bc5362d19602d9e88fe0b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 27 Aug 2024 05:46:36 -0800 Subject: [PATCH 317/589] adding another test --- .../pytorch/test_inference_engine.py | 61 +++++++++++-------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index b690f02e5..8d02e6343 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -83,36 +83,47 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e assert np.array_equal(next_resp_full, resp4) if __name__ == '__main__': - try: - print(f"\n\n -------- TEST QWEN2 -------- \n\n") - asyncio.run(test_inference_engine( - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "Qwen/Qwen2-0.5B-Instruct", - 24 - )) - except Exception as err: - print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") + # try: + # print(f"\n\n -------- TEST QWEN2 -------- \n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "Qwen/Qwen2-0.5B-Instruct", + # 24 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") + + # try: + # print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "andrijdavid/Llama3-1B-Base", + # 3 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! LLAMA3-1B-Base TEST FAILED \n{err}\n") + + # try: + # print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "meta-llama/Meta-Llama-3.1-8B", + # 32 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") try: - print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") + print(f"\n\n ------- TEST Chickaboo/ChickaQ-Large -----\n\n") asyncio.run(test_inference_engine( PyTorchDynamicShardInferenceEngine(HFShardDownloader()), PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "andrijdavid/Llama3-1B-Base", - 3 - )) - except Exception as err: - print(f"\n\n !!!!!!!!!!! LLAMA3-1B-Base TEST FAILED \n{err}\n") - - try: - print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") - asyncio.run(test_inference_engine( - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "meta-llama/Meta-Llama-3.1-8B", - 32 + "Chickaboo/ChickaQ-Large", + 24 )) except Exception as err: - print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") + print(f"\n\n !!!!!!!!!!! Chickaboo/ChickaQ-Large TEST FAILED \n{err}\n") From 0d9313016f2a3d7d5b3cdc3354888caf632ae116 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 27 Aug 2024 08:07:05 -0800 Subject: [PATCH 318/589] added gc collect to remove gpu, fixed tokenizers warning --- .gitignore | 3 + exo/inference/pytorch/README.md | 10 +- exo/inference/pytorch/inference.py | 30 +++-- exo/inference/pytorch/model/hf.py | 186 ++--------------------------- exo/inference/tokenizers.py | 6 +- 5 files changed, 45 insertions(+), 190 deletions(-) diff --git a/.gitignore b/.gitignore index 44892139c..f5609f311 100644 --- a/.gitignore +++ b/.gitignore @@ -170,3 +170,6 @@ cython_debug/ #.idea/ **/*.xcodeproj/* + +# PyTorch interface +.offload diff --git a/exo/inference/pytorch/README.md b/exo/inference/pytorch/README.md index f87ee3898..670c8df63 100644 --- a/exo/inference/pytorch/README.md +++ b/exo/inference/pytorch/README.md @@ -15,4 +15,12 @@ After installing accelerate you get hit with a dependency error, for now ignore ```console ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. exo 0.0.1 requires numpy==2.0.0, but you have numpy 1.26.4 which is incompatible. -``` \ No newline at end of file +``` + +## Low VRAM Notes + +- When trying to do disk_offload getting the error "Cannot copy out of meta tensor; no data!", looking up the error it is tied to (low vram)[https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/13087#issuecomment-2080272004] + +## Multiple GPU in 1 Notes +### Running multiple GPUs on 1 machine +- Getting error "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:1! (when checking argument for argument tensors in method wrapper_CUDA_cat)" diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 200b1c4c7..014b71691 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -1,18 +1,16 @@ # experimental, based off of tinygrad/inference.py -import os import numpy as np import torch import numpy as np import json -from typing import Optional, Callable, Tuple +from typing import Optional, Tuple from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel from exo.api.chatgpt_api import resolve_tokenizer from exo.helpers import DEBUG from transformers import DynamicCache - - +from accelerate import disk_offload class PyTorchDynamicShardInferenceEngine(InferenceEngine): """ @@ -183,14 +181,24 @@ async def ensure_shard(self, shard: Optional[Shard]): if DEBUG >= 4: print(f"Loading new shard: {shard}") - # if self.model: - # if DEBUG >= 2: - # print(f"\nCLEARING MODEL {self.shard.model_id}\n") + if self.model: + if DEBUG >= 2: + print(f"\nCLEARING MODEL {shard.model_id}\n") + print(f"before allocated: {torch.cuda.memory_allocated()}") + print(f"before reserved: {torch.cuda.memory_reserved()}") - # # delete model and free up memory to reload - # self.model.cpu() - # del self.model - # torch.cuda.empty_cache() + # delete model and free up memory to reload + # self.model.cuda() + # disk_offload(model=self.model, offload_dir="./.offload") + import gc + + del self.model + gc.collect() + torch.cuda.empty_cache() + + if DEBUG >= 2: + print(f"after allocated: {torch.cuda.memory_allocated()}") + print(f"after reserved: {torch.cuda.memory_reserved()}") self.shard = shard self.tokenizer = await resolve_tokenizer(shard.model_id) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 484e6c4df..9d8990d7a 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -1,13 +1,9 @@ import torch -import torch.nn as nn import numpy as np -import re - -from transformers import AutoModelForCausalLM, BitsAndBytesConfig, DynamicCache, Cache +from transformers import AutoModelForCausalLM, DynamicCache, Cache from exo.inference.shard import Shard from exo.helpers import DEBUG from typing import Tuple, Optional, Union, List - from exo.inference.pytorch.model.utils import sample_logits TOP_P = 0.75 #0.95 @@ -32,9 +28,11 @@ def __init__(self, shard: Shard, tokenizer: any): shard.model_id, torch_dtype=torch.float32, device_map="auto", - # offload_buffers=True + offload_buffers=True ) + # disk_offload(model=self.llm_model, offload_dir="./.offload") + self.base_model = self.llm_model.model except Exception as err: print(f"Error loading model: {err}") @@ -45,18 +43,6 @@ def __init__(self, shard: Shard, tokenizer: any): print(f"self.llm_model: {self.llm_model}") print(f"self.base_model: {self.base_model}") - # load layers from base model to use - layers = [] - for i in range(shard.start_layer, shard.end_layer + 1): - layer = self.base_model.layers[i] - - if DEBUG >= 2: - print(f"Loading layers[{i}]") - - layers.append(layer) - - self.layers = nn.ModuleList(layers) - if DEBUG >= 2: print(f"full_model.model layer: {len(self.base_model.layers)}") @@ -116,7 +102,9 @@ def forward( position_embeddings = None # progress through layers - for decoder_layer in self.layers: + for i in range(self.shard.start_layer, self.shard.end_layer + 1): + decoder_layer = self.base_model.layers[i] + if DEBUG >= 4: print("Going through layer") print(f"{decoder_layer}") @@ -157,165 +145,9 @@ def forward( return (output_token, next_kvs) with torch.no_grad(): - out_hidden_states = hidden_states.cpu().numpy() + out_hidden_states = hidden_states.numpy(force=True) return ( out_hidden_states, next_kvs - ) - - # def forward_layers( - # self, - # input_data: torch.tensor - # ) -> np.ndarray: - # """ - # Forward pass through the specified layers. - # This is without caching - - # Note: past_key_values not working for model, might be a library bug - # """ - # if DEBUG >= 2: - # print("forward_layer call") - # print(f"input_data: {input_data}") - # print(f"shard {self.shard.to_dict()}") - - # hidden_states = input_data - - # # Forward pass through the layer - # if DEBUG >= 2: - # print(f"\n[layer model] {self.base_model}") - # print(f"IN hidden_states {hidden_states}") - - # layer_outputs = self.base_model( - # hidden_states.to(self.device), - # use_cache=False - # ) - - # if DEBUG >= 2: - # print(f"OUT hidden_states {layer_outputs.last_hidden_state}") - - # hidden_states = layer_outputs.last_hidden_state - - # print(f"2 is_last_layer {self.shard.is_last_layer()}") - # if self.shard.is_last_layer(): - # hs_norm = self.norm(hidden_states) - # hs_lm_head = self.llm_model.lm_head(hs_norm).float() - - # # Use the sampling function with default settings - # with torch.no_grad(): - # output_token = sample_logits( - # hs_lm_head[:, -1, :], - # TEMP, - # TOP_P, - # TOP_K - # ).cpu().numpy().flatten() - - # if DEBUG >= 2: - # print(f"hs_norm: {hs_norm}") - # print(f"hs_lm_head: {hs_lm_head}") - # print(f"output_token: {output_token}") - - # return output_token - - # return hidden_states.cpu().numpy() - - # def forward_layers_cached( - # self, - # input_data: torch.tensor, - # past_kvs - # ) -> Tuple[np.ndarray, list]: - # """ - # Forward pass through the specified layers. - # With caching - - # Note: past_key_values not working for model, might be a library bug - # """ - - # if not past_kvs: - # past_kvs = DynamicCache() - # else: - # past_kvs = DynamicCache.from_legacy_cache(past_kvs) - - # if DEBUG >= 2: - # print("forward_layer call") - # print(f"input_data: {input_data}") - # print(f"shard {self.shard.to_dict()}") - # print(f"past_kvs: {past_kvs}") - - # input_ids = input_data.to(self.device) - # position_ids = None - # # position_embeddings = None - - # inputs_embeds = self.embed_tokens(input_ids) - - # if self.shard.is_first_layer(): - # hidden_states = self.embed_tokens(hidden_states) - - # if DEBUG >= 2: - # print(f"hidden_states: {hidden_states}") - # print(f"hidden_states.size(): {hidden_states.size()}") - - # batch_size, seq_len = input_data.size() - # position_ids = torch.arange(seq_len, dtype=torch.long, device=self.device).unsqueeze(0).expand(batch_size, -1) - - # # check if model does not have rotary emb - # # have to apply rotary per model - # # embedding seems very model specific and using position_ids - # # seems more universal, even though some give warning about it - # # if re.match(r"Qwen|qwen", self.shard.model_id): - # # import transformers.models.qwen2.modeling_qwen2 as qwen2 - # # position_embeddings = - # # q=hidden_states, - # # position_ids=position_ids - # # ) - # # else: - # # position_embeddings = self.base_model.rotary_emb( - # # hidden_states, - # # position_ids - # # ) - - # # if DEBUG >= 2: - # # print(f"embedded hidden_states {hidden_states}") - # # print(f"position_ids: {position_embeddings}") - - - # # Forward pass through the layer - # if DEBUG >= 2: - # print(f"IN hidden_states {hidden_states}") - # print(f"past_kvs {past_kvs}") - - # layer_outputs = self.base_model( - # hidden_states, - # position_ids=position_ids, - # past_key_values=past_kvs, - # use_cache=True - # ) - - # if DEBUG >= 2: - # print(f"\nlayer_outputs: {layer_outputs}") - - # hidden_states = layer_outputs.last_hidden_state - # present_kvs = layer_outputs.past_key_values - - # print(f"2 is_last_layer {self.shard.is_last_layer()}") - # if self.shard.is_last_layer(): - # hs_norm = self.norm(hidden_states) - # hs_lm_head = self.llm_model.lm_head(hs_norm).float() - - # # Use the sampling function with default settings - # with torch.no_grad(): - # output_token = sample_logits( - # hs_lm_head[:, -1, :], - # TEMP, - # TOP_P, - # TOP_K - # ).cpu().numpy().flatten() - - # if DEBUG >= 2: - # print(f"hs_norm: {hs_norm}") - # print(f"hs_lm_head: {hs_lm_head}") - # print(f"output_token: {output_token}") - - # return (output_token, present_kvs) - - # return (hidden_states.cpu().numpy(), present_kvs) \ No newline at end of file + ) \ No newline at end of file diff --git a/exo/inference/tokenizers.py b/exo/inference/tokenizers.py index e0bc332d9..9accd9436 100644 --- a/exo/inference/tokenizers.py +++ b/exo/inference/tokenizers.py @@ -19,7 +19,11 @@ async def resolve_tokenizer(model_id: str): async def _resolve_tokenizer(model_id_or_local_path: str): try: if DEBUG >= 4: print(f"Trying AutoProcessor for {model_id_or_local_path}") - processor = AutoProcessor.from_pretrained(model_id_or_local_path, use_fast=True if "Mistral-Large" in model_id_or_local_path else False) + if "Mistral-Large" in str(model_id_or_local_path): + use_fast = True + else: + use_fast = False + processor = AutoProcessor.from_pretrained(model_id_or_local_path, use_fast=use_fast) if not hasattr(processor, 'eos_token_id'): processor.eos_token_id = getattr(processor, 'tokenizer', getattr(processor, '_tokenizer', processor)).eos_token_id if not hasattr(processor, 'encode'): From 0ae716de1e5d977ae1248bfc2bab201f315fb8b8 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 27 Aug 2024 08:23:53 -0800 Subject: [PATCH 319/589] fixing device --- exo/inference/pytorch/model/hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 9d8990d7a..8c0f0d53f 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -29,7 +29,7 @@ def __init__(self, shard: Shard, tokenizer: any): torch_dtype=torch.float32, device_map="auto", offload_buffers=True - ) + ).to(self.device) # disk_offload(model=self.llm_model, offload_dir="./.offload") @@ -116,7 +116,7 @@ def forward( past_key_value=past_kvs, use_cache=True, cache_position=cache_position, - ) + ).to(self.device) hidden_states = layer_outputs[0] next_kvs = layer_outputs[1] From 7705639ec91e1e2bcaf02eabe594b45314ef818f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 27 Aug 2024 12:07:34 -0800 Subject: [PATCH 320/589] adding smaller model test --- exo/inference/pytorch/inference.py | 9 +- exo/inference/pytorch/model/hf.py | 6 +- .../pytorch/test_inference_engine.py | 90 +++++++++++-------- 3 files changed, 61 insertions(+), 44 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 014b71691..878fb5fd1 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -108,6 +108,11 @@ async def infer_tensor( input_data, device=self.device ).unsqueeze(0).long() + else: + in_tensor = torch.tensor( + input_data, + device=self.device + ).long() in_tensor = self.model.embed_tokens(in_tensor) @@ -175,8 +180,8 @@ async def ensure_shard(self, shard: Optional[Shard]): Args: shard (Optional[Shard]): Shard information for the model. """ - if self.shard == shard: - return + # if self.shard == shard: + # return if DEBUG >= 4: print(f"Loading new shard: {shard}") diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 8c0f0d53f..f3572dc5a 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -28,8 +28,8 @@ def __init__(self, shard: Shard, tokenizer: any): shard.model_id, torch_dtype=torch.float32, device_map="auto", - offload_buffers=True - ).to(self.device) + # offload_buffers=True + ) # disk_offload(model=self.llm_model, offload_dir="./.offload") @@ -116,7 +116,7 @@ def forward( past_key_value=past_kvs, use_cache=True, cache_position=cache_position, - ).to(self.device) + ) hidden_states = layer_outputs[0] next_kvs = layer_outputs[1] diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index 8d02e6343..bacf53bcc 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -13,31 +13,32 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e # prompt = "Why is the sky blue?" prompt = "In a single word only, what is the last name of the current president of the USA?" - shard = Shard( - model_id=model_id, - start_layer=0, - end_layer=n_layers-1, - n_layers=n_layers - ) - - resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( - "A", - shard=shard, - prompt=prompt - ) - - print(f"resp_full: {resp_full}") - - next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( - "A", - shard=shard, - input_data=resp_full, - inference_state=inference_state_full, - ) - - print(f"next_resp_full: {next_resp_full}") + # shard = Shard( + # model_id=model_id, + # start_layer=0, + # end_layer=n_layers-1, + # n_layers=n_layers + # ) + + # resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( + # "A", + # shard=shard, + # prompt=prompt + # ) + + # print(f"resp_full: {resp_full}") + + # next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( + # "A", + # shard=shard, + # input_data=resp_full, + # inference_state=inference_state_full, + # ) + + # print(f"next_resp_full: {next_resp_full}") pp = int(n_layers/2) + resp_shard = Shard( model_id=model_id, start_layer=0, @@ -65,19 +66,19 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e inference_state=inference_state_1, ) - resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( - "B", - shard=resp_shard, - input_data=resp2, - inference_state=inference_state_2, - ) + # resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( + # "B", + # shard=resp_shard, + # input_data=resp2, + # inference_state=inference_state_2, + # ) - resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( - "B", - shard=resp_shard2, - input_data=resp3, - inference_state=inference_state_3, - ) + # resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( + # "B", + # shard=resp_shard2, + # input_data=resp3, + # inference_state=inference_state_3, + # ) assert np.array_equal(resp_full, resp2) assert np.array_equal(next_resp_full, resp4) @@ -116,14 +117,25 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e # except Exception as err: # print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") + # try: + # print(f"\n\n ------- TEST Chickaboo/ChickaQ-Large -----\n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "Chickaboo/ChickaQ-Large", + # 24 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! Chickaboo/ChickaQ-Large TEST FAILED \n{err}\n") + try: - print(f"\n\n ------- TEST Chickaboo/ChickaQ-Large -----\n\n") + print(f"\n\n --------- TEST ambrosfitz/TinyLlama-1.1B-Chat-yawp -------\n\n") asyncio.run(test_inference_engine( PyTorchDynamicShardInferenceEngine(HFShardDownloader()), PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "Chickaboo/ChickaQ-Large", - 24 + "ambrosfitz/TinyLlama-1.1B-Chat-yawp", + 22 )) except Exception as err: - print(f"\n\n !!!!!!!!!!! Chickaboo/ChickaQ-Large TEST FAILED \n{err}\n") + print(f"\n\n !!!!!!!!!!! ambrosfitz/TinyLlama-1.1B-Chat-yawp TEST FAILED \n{err}\n") From 81d597db2b4075b4725308881ed2e577df9bcb5e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 27 Aug 2024 16:30:30 -0800 Subject: [PATCH 321/589] testing --- exo/inference/pytorch/inference.py | 18 ++++++++---------- exo/inference/pytorch/model/hf.py | 2 ++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 878fb5fd1..c6ba8e528 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -103,16 +103,14 @@ async def infer_tensor( current_kvs = None - if input_data.size == 1: - in_tensor = torch.tensor( - input_data, - device=self.device - ).unsqueeze(0).long() - else: - in_tensor = torch.tensor( - input_data, - device=self.device - ).long() + # if input_data.size == 1: + # in_tensor = torch.from_numpy( + # input_data + # ).unsqueeze(0).long() + # else: + in_tensor = torch.from_numpy( + input_data + ).long() in_tensor = self.model.embed_tokens(in_tensor) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index f3572dc5a..aa2873c56 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -108,6 +108,8 @@ def forward( if DEBUG >= 4: print("Going through layer") print(f"{decoder_layer}") + print("input_ids") + print(f"{input_ids}") layer_outputs = decoder_layer( input_ids, From f1d3e311790962f390e044840c238e293549588d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 27 Aug 2024 16:38:01 -0800 Subject: [PATCH 322/589] added tinyllama --- exo/inference/pytorch/inference.py | 16 ++++++++-------- exo/models.py | 3 +++ tinychat/examples/tinychat/index.html | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index c6ba8e528..ba834eb67 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -103,14 +103,14 @@ async def infer_tensor( current_kvs = None - # if input_data.size == 1: - # in_tensor = torch.from_numpy( - # input_data - # ).unsqueeze(0).long() - # else: - in_tensor = torch.from_numpy( - input_data - ).long() + if input_data.size == 1: + in_tensor = torch.from_numpy( + input_data, + ).unsqueeze(0).long().to(self.device) + else: + in_tensor = torch.from_numpy( + input_data + ).long().to(self.device) in_tensor = self.model.embed_tokens(in_tensor) diff --git a/exo/models.py b/exo/models.py index 72a5b5667..137b881ce 100644 --- a/exo/models.py +++ b/exo/models.py @@ -26,6 +26,9 @@ "llama-3-1B-Base": { "PyTorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-1B-Base", start_layer=0, end_layer=0, n_layers=3), }, + "TinyLlama-1.1B-Chat-yaw": { + "PyTorchDynamicShardInferenceEngine": Shard(model_id="ambrosfitz/TinyLlama-1.1B-Chat-yawp", start_layer=0, end_layer=0, n_layers=22), + }, ### mistral "mistral-nemo": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Nemo-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=40),}, "mistral-large": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Large-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=88),}, diff --git a/tinychat/examples/tinychat/index.html b/tinychat/examples/tinychat/index.html index 8ff4c64c7..350cea178 100644 --- a/tinychat/examples/tinychat/index.html +++ b/tinychat/examples/tinychat/index.html @@ -27,7 +27,7 @@
+
Date: Tue, 27 Aug 2024 18:12:12 -0800 Subject: [PATCH 325/589] adding A10, adding test --- .../pytorch/test_inference_engine.py | 20 +++++++++---------- exo/topology/device_capabilities.py | 1 + 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index 41ff337af..4bad37c26 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -106,16 +106,16 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e # except Exception as err: # print(f"\n\n !!!!!!!!!!! LLAMA3-1B-Base TEST FAILED \n{err}\n") - # try: - # print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "meta-llama/Meta-Llama-3.1-8B", - # 32 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") + try: + print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") + asyncio.run(test_inference_engine( + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + "meta-llama/Meta-Llama-3.1-8B", + 32 + )) + except Exception as err: + print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") # try: # print(f"\n\n ------- TEST Chickaboo/ChickaQ-Large -----\n\n") diff --git a/exo/topology/device_capabilities.py b/exo/topology/device_capabilities.py index 51db53ef2..bed1b5107 100644 --- a/exo/topology/device_capabilities.py +++ b/exo/topology/device_capabilities.py @@ -108,6 +108,7 @@ def to_dict(self): "NVIDIA T1000 8GB": DeviceFlops(fp32=2.5 * TFLOPS, fp16=5.0 * TFLOPS, int8=10.0 * TFLOPS), "Quadro M2000": DeviceFlops(fp32=0.5 * TFLOPS, fp16=1.0 * TFLOPS, int8=2.0 * TFLOPS), "Quadro P400": DeviceFlops(fp32=0.641 * TFLOPS, fp16=1.282 * TFLOPS, int8=2.564 * TFLOPS), + "NVIDIA A10": DeviceFlops(fp32=31.2 * TFLOPS, fp16=62.5 * TFLOPS, int8=2.5 * TFLOPS), # ... add more devices if needed ... ### AMD GPUs # RX 6000 series From ed5bea79251af4661d8d89a8c5dd2ba91542dc7d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 27 Aug 2024 18:20:56 -0800 Subject: [PATCH 326/589] removing reloading of shard, changing temp and top_p --- exo/inference/pytorch/inference.py | 23 ++--------------------- exo/inference/pytorch/model/hf.py | 4 ++-- 2 files changed, 4 insertions(+), 23 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index ba834eb67..063a9e4a3 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -178,31 +178,12 @@ async def ensure_shard(self, shard: Optional[Shard]): Args: shard (Optional[Shard]): Shard information for the model. """ - # if self.shard == shard: - # return + if self.shard == shard: + return if DEBUG >= 4: print(f"Loading new shard: {shard}") - if self.model: - if DEBUG >= 2: - print(f"\nCLEARING MODEL {shard.model_id}\n") - print(f"before allocated: {torch.cuda.memory_allocated()}") - print(f"before reserved: {torch.cuda.memory_reserved()}") - - # delete model and free up memory to reload - # self.model.cuda() - # disk_offload(model=self.model, offload_dir="./.offload") - import gc - - del self.model - gc.collect() - torch.cuda.empty_cache() - - if DEBUG >= 2: - print(f"after allocated: {torch.cuda.memory_allocated()}") - print(f"after reserved: {torch.cuda.memory_reserved()}") - self.shard = shard self.tokenizer = await resolve_tokenizer(shard.model_id) self.model = ShardedHuggingFaceModel(shard, self.tokenizer) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 074cc53c2..ed9e6ae17 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -7,8 +7,8 @@ from exo.inference.pytorch.model.utils import sample_logits TOP_P = 0.9 #0.95 -TOP_K = 20 -TEMP = 0.8 +TOP_K = 25 +TEMP = 0.85 class ShardedHuggingFaceModel(torch.nn.Module): def __init__(self, shard: Shard, tokenizer: any): From 032c9b1db7ba3df80826703f08010d312e18174d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 31 Aug 2024 21:21:12 -0800 Subject: [PATCH 327/589] rewrite of sharded model using new split testing of huggingface models --- exo/api/chatgpt_api.py | 16 +- exo/inference/pytorch/inference.py | 96 +++++---- .../pytorch/model/archive/hf_manual.py | 203 ++++++++++++++++++ .../pytorch/model/{ => archive}/utils.py | 0 exo/inference/pytorch/model/hf.py | 149 +++---------- .../pytorch/test_inference_engine.py | 51 +++-- exo/inference/pytorch/test_inference_loop.py | 105 +++++++++ exo/inference/pytorch/test_split_model.py | 108 ++++++++++ 8 files changed, 533 insertions(+), 195 deletions(-) create mode 100644 exo/inference/pytorch/model/archive/hf_manual.py rename exo/inference/pytorch/model/{ => archive}/utils.py (100%) create mode 100644 exo/inference/pytorch/test_inference_loop.py create mode 100644 exo/inference/pytorch/test_split_model.py diff --git a/exo/api/chatgpt_api.py b/exo/api/chatgpt_api.py index 1abda85fe..2619d1635 100644 --- a/exo/api/chatgpt_api.py +++ b/exo/api/chatgpt_api.py @@ -58,6 +58,9 @@ def generate_completion( "finish_reason": finish_reason, }], } + + if DEBUG >= 3: + print(f"completion: {completion}") if not stream: completion["usage"] = { @@ -113,16 +116,9 @@ def remap_messages(messages: List[Message]) -> List[Message]: def build_prompt(tokenizer, _messages: List[Message]): - if len(_messages) == 1: - user_msg = _messages[0] - - # get instruct sys message - sys_msg = Message(role="system", content="You are a helpful assistant.") - - # restructure for sys_msg to go first - _messages = [sys_msg, user_msg] - messages = remap_messages(_messages) + if DEBUG >= 3: + print(f"messages: {messages}") prompt = tokenizer.apply_chat_template( messages, tokenize=False, @@ -140,7 +136,7 @@ def build_prompt(tokenizer, _messages: List[Message]): continue for content in message.content: - # note: we only support one image at a time right now. Multiple is possible. See: https://github.com/huggingface/transformers/blob/e68ec18ce224af879f22d904c7505a765fb77de3/docs/source/en/model_doc/llava.md?plain=1#L41 + # note: wae only support one image at time right now. Multiple is possible. See: https://github.com/huggingface/transformers/blob/e68ec18ce224af879f22d904c7505a765fb77de3/docs/source/en/model_doc/llava.md?plain=1#L41 # follows the convention in https://platform.openai.com/docs/guides/vision if isinstance(content, dict) and content.get("type", None) == "image": image_str = content.get("image", None) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 063a9e4a3..9334153c6 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -6,26 +6,28 @@ from typing import Optional, Tuple from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine -from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel +from exo.inference.pytorch.model.archive.hf_manual import ShardedHuggingFaceModel from exo.api.chatgpt_api import resolve_tokenizer from exo.helpers import DEBUG from transformers import DynamicCache from accelerate import disk_offload +from exo.download.shard_download import ShardDownloader class PyTorchDynamicShardInferenceEngine(InferenceEngine): """ PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. """ - def __init__(self, shard): + def __init__(self, shard_downloader: ShardDownloader): """ Initialize the inference engine. Args: debug (bool): If True, enables debug logging. Defaults to False. """ - self.shard = shard - self.model = None + self.shard = None + self.shard_downloader = shard_downloader + self.stateful_sharded_model = None self.tokenizer = None self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -37,33 +39,33 @@ async def infer_prompt( image_str: Optional[str] = None, inference_state: Optional[str] = None ) -> Tuple[np.ndarray, str, bool]: + if DEBUG >= 4: + print("infer_prompt called") + print(f"prompt: {prompt}") await self.ensure_shard(shard) # need to make this so inference_state is not a string # cant use it with dynamic cache - tokens = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device) - tokens = self.model.embed_tokens(tokens) + inputs = self.tokenizer(prompt, return_tensors="pt") + input_ids = inputs.input_ids.to(self.device) + + # add pad token if none + if self.tokenizer.pad_token == None: + self.tokenizer.add_special_tokens({"pad_token":""}) + self.stateful_sharded_model.base_model.resize_token_embeddings(len(self.tokenizer)) + current_kvs = None if DEBUG >= 4: - print("infer_prompt called") - print(f"tokens: {tokens}\n") + print(f"tokens: {input_ids}\n") print(f"layer_count: {self.shard.get_layer_count()}") print(f"is_first_layer: {self.shard.is_first_layer()}") print(f"is_last_layer: {self.shard.is_last_layer()}") - # convert inference_state or cache from json to DynamicCache - past_kv = DynamicCache() - if inference_state != None: - cache_dict = json.loads(inference_state) - past_kv.key_cache = [torch.tensor(data).to(self.device) for data in cache_dict['key_cache']] - past_kv.value_cache = [torch.tensor(data).to(self.device) for data in cache_dict['value_cache']] - - output_data, current_kvs = self.model.forward( - tokens, - past_kv + output_data = self.stateful_sharded_model.forward( + input_ids ) is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] @@ -98,31 +100,26 @@ async def infer_tensor( input_data: np.ndarray, inference_state: Optional[str] = None ) -> Tuple[np.ndarray, str, bool]: + if DEBUG >= 3: + print("infer_tensor called") + print(f"input_data: {input_data}") + print(f"input_data.size: {input_data.size}") + print(f"input_data.shape: {input_data.shape}") + print(f"shard: {self.shard}") await self.ensure_shard(shard) current_kvs = None + if input_data.size == 1: - in_tensor = torch.from_numpy( - input_data, - ).unsqueeze(0).long().to(self.device) + in_tensor = torch.tensor([[input_data.item()]]).to(self.device) else: - in_tensor = torch.from_numpy( - input_data - ).long().to(self.device) + in_tensor = torch.tensor(input_data).to(self.device) - in_tensor = self.model.embed_tokens(in_tensor) - - if DEBUG >= 4: - print("infer_tensor called") - print(f"input_data: {input_data}") - print(f"input_data.size: {input_data.size}") - print(f"input_tensor: {in_tensor}\n") - print(f"shard: {self.shard}") - print(f"layer_count: {self.shard.get_layer_count()}") - print(f"is_first_layer: {self.shard.is_first_layer()}") - print(f"is_last_layer: {self.shard.is_last_layer()}") + # in_tensor = torch.tensor(input_data).to(self.device) + + # in_tensor = self.stateful_sharded_model.embed_tokens(in_tensor) # convert inference_state or cache from json to DynamicCache past_kv = DynamicCache() @@ -131,29 +128,33 @@ async def infer_tensor( cache_dict = json.loads(inference_state) past_kv.key_cache = [torch.tensor(data).to(self.device) for data in cache_dict['key_cache']] past_kv.value_cache = [torch.tensor(data).to(self.device) for data in cache_dict['value_cache']] - - if DEBUG >= 4: - print("Loaded past_kv from JSON") - print(f"past_kv: {past_kv}") - print(f"past_kv.key_cache len: {len(past_kv.key_cache)}") - print(f"past_kv.value_cache len: {len(past_kv.value_cache)}") + past_kv_length = past_kv[0][0].shape[2] except json.JSONDecodeError: print(f"ERROR DECODING INFERENCE STATE") - output_data, current_kvs = self.model.forward( + if DEBUG >= 3: + # print(f"input_tensor: {in_tensor}") + print(f"layer_count: {self.shard.get_layer_count()}") + print(f"is_first_layer: {self.shard.is_first_layer()}") + print(f"is_last_layer: {self.shard.is_last_layer()}") + print(f"input_data.shape: {input_data.shape}") + + print(f"in_tensor: {in_tensor}") + output_data, current_kvs = self.stateful_sharded_model.forward( in_tensor, + None, past_kv ) is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] - if DEBUG >= 4: - print(f"in_tensor: {in_tensor}\n") + if DEBUG >= 3: print(f"output_data: {output_data}\n") print(f"output_data.size {output_data.size}\n") print(f"finished: {is_finished}") print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") print(f"output_data[-1] {output_data[-1]}") + print("====================================================") if output_data.size == 1: print(f"size 1 output_data.item() {output_data.item()}") @@ -184,9 +185,12 @@ async def ensure_shard(self, shard: Optional[Shard]): if DEBUG >= 4: print(f"Loading new shard: {shard}") - self.shard = shard + # need to build in shard downloader + # model_path = await self.shard_downloader.ensure_shard(shard) + self.tokenizer = await resolve_tokenizer(shard.model_id) - self.model = ShardedHuggingFaceModel(shard, self.tokenizer) + self.stateful_sharded_model = ShardedHuggingFaceModel(shard) + self.shard = shard if DEBUG >= 4: print(f"Shard loaded successfully: {shard}") \ No newline at end of file diff --git a/exo/inference/pytorch/model/archive/hf_manual.py b/exo/inference/pytorch/model/archive/hf_manual.py new file mode 100644 index 000000000..e5af2eaf8 --- /dev/null +++ b/exo/inference/pytorch/model/archive/hf_manual.py @@ -0,0 +1,203 @@ +# Attempted version to recreate manually using LlamaModel and others +# BROKEN +import torch +import numpy as np +from transformers import AutoModelForCausalLM, DynamicCache, Cache, AutoModel +from exo.inference.shard import Shard +from exo.helpers import DEBUG +from typing import Tuple, Optional, Union, List +from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask +from exo.inference.pytorch.model.archive.utils import sample_logits + +TOP_P = 0.7 #0.95 +TOP_K = 50 +TEMP = 0.01 + + +class ShardedHuggingFaceModel(torch.nn.Module): + def __init__(self, shard: Shard): + super(ShardedHuggingFaceModel, self).__init__() + + if torch.cuda.is_available(): + self.device = torch.device("cuda") + else: + self.device = torch.device("cpu") + + self.shard = shard + + # Load the model + try: + self.base_model = AutoModel.from_pretrained( + shard.model_id, + torch_dtype=torch.float32, + device_map="auto", + # offload_buffers=True + ) + + # disk_offload(model=self.base_model, offload_dir="./.offload") + except Exception as err: + print(f"Error loading model: {err}") + raise + + if DEBUG >= 2: + print(f"\nShardedHuggingFaceModel init with shard {shard}") + print(f"self.base_model: {self.base_model}") + + # Embeddings and final layer norm + # used for doing what forward LlamaModel does in transformers + self.norm = self.base_model.norm + self.lm_head = torch.nn.Linear( + self.base_model.config.hidden_size, + self.base_model.config.vocab_size, + bias=False + ).to(self.device) + self.embed_tokens = self.base_model.embed_tokens + + def forward( + self, + input_ids: torch.tensor, + attention_mask: torch.tensor = None, + past_kvs: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + ) -> Tuple[np.ndarray, any]: + """ + Forward through layers using the base model + + Args: + input_ids: tensor input + attention_mask: attention mask from tokenizer + past_kvs: past key value stores for cache + + Returns: + hidden_states: numpy of states between layers + or logits: numpy of normalization and linearization of last hidden state + past_kvs: DynamicCache of past key values if use_cache is true + + Ref: + https://github.com/huggingface/transformers/blob/v4.44.2/src/transformers/models/qwen2/modeling_qwen2.py#L804 + https://github.com/huggingface/transformers/blob/v4.44.2/src/transformers/models/llama/modeling_llama.py#L887 + """ + if DEBUG >= 4: + print("forward called") + print(f"input_ids: {input_ids}\n") + print(f"layer_count: {self.shard.get_layer_count()}") + print(f"is_first_layer: {self.shard.is_first_layer()}") + print(f"is_last_layer: {self.shard.is_last_layer()}") + + if self.shard.is_first_layer(): + if DEBUG >= 2: + print("first layer, embed") + print(f"input_ids: {input_ids}") + input_ids = self.embed_tokens(input_ids) + + if DEBUG >= 2: + print(f"embeded input_ids: {input_ids}") + + if attention_mask == None: + # get attention mask + past_kv_length = len(past_kvs) + batch_size, seq_length = input_ids.shape[:2] + attention_mask = _prepare_4d_causal_attention_mask( + None, (batch_size, seq_length), input_ids, past_kv_length + ) + + past_kvs = DynamicCache.from_legacy_cache(past_kvs) + past_seen_tokens = past_kvs.get_seq_length() if past_kvs is not None else 0 + cache_position = torch.arange( + past_seen_tokens, + past_seen_tokens + input_ids.shape[1], + device=self.device + ) + + position_ids = cache_position.unsqueeze(0).to(self.device) + + try: + position_embeddings = self.base_model.rotary_emb( + input_ids, + position_ids + ) + except Exception as err: + print(f"rotary_emb not found in base_model") + position_embeddings = None + + causal_mask = self.base_model._update_causal_mask( + attention_mask, + input_ids, + cache_position, + past_kvs, + self.base_model.config.output_attentions + ) + + # progress through layers + for i in range(self.shard.start_layer, self.shard.end_layer + 1): + decoder_layer = self.base_model.layers[i] + + if DEBUG >= 4: + print("Going through layer") + print(f"{decoder_layer}") + print("input_ids") + print(f"{input_ids}") + print("causal_mask") + print(f"{causal_mask}") + + try: + layer_outputs = decoder_layer( + input_ids, + attention_mask=causal_mask, + position_ids=position_ids, + position_embeddings=position_embeddings, + past_key_value=past_kvs, + use_cache=True, + cache_position=cache_position, + output_logits=True + ) + except Exception as err: + print(f"Going through layer failed: {err}") + print(err.__traceback__.tb_lineno) + raise + + hidden_states = layer_outputs[0] + next_kvs = layer_outputs[1] + + if DEBUG >= 3: + print(f"layer_outputs {layer_outputs}") + print(layer_outputs[1:]) + + if self.shard.is_last_layer(): + hs_norm = self.norm(hidden_states).to(self.device) + # hs_lm_head = self.base_model.lm_head(hs_norm).float() + + # Use the sampling function with default settings + with torch.no_grad(): + logits = self.lm_head( + hs_norm[:, -1:, :] + ).to(self.device).float() + + if DEBUG >= 2: + print(f"hs_norm: {hs_norm}") + # print(f"hs_lm_head: {hs_lm_head}") + print(f"logits: {logits}") + print(f"logits.shape: {logits.shape}") + + # output_token = sample_logits( + # logits, + # TEMP, + # TOP_P, + # TOP_K + # ).unsqueeze(0).unsqueeze(0).long() + + output_token = torch.distributions.Categorical( + logits=logits + ).sample(sample_shape=(1,)) + + if DEBUG >= 2: + print(f"output_token: {output_token}") + + return (output_token.numpy(force=True), next_kvs) + + with torch.no_grad(): + out_hidden_states = hidden_states.float().numpy(force=True) + + return ( + out_hidden_states, + next_kvs + ) \ No newline at end of file diff --git a/exo/inference/pytorch/model/utils.py b/exo/inference/pytorch/model/archive/utils.py similarity index 100% rename from exo/inference/pytorch/model/utils.py rename to exo/inference/pytorch/model/archive/utils.py diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index ed9e6ae17..0812af6ed 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -1,155 +1,56 @@ import torch +import torch.nn as nn import numpy as np -from transformers import AutoModelForCausalLM, DynamicCache, Cache +from transformers import AutoModelForCausalLM from exo.inference.shard import Shard from exo.helpers import DEBUG +from exo.inference.inference_engine import InferenceEngine +from exo.download.shard_download import ShardDownloader from typing import Tuple, Optional, Union, List -from exo.inference.pytorch.model.utils import sample_logits -TOP_P = 0.9 #0.95 -TOP_K = 25 -TEMP = 0.85 - -class ShardedHuggingFaceModel(torch.nn.Module): - def __init__(self, shard: Shard, tokenizer: any): - super(ShardedHuggingFaceModel, self).__init__() +class ShardedHuggingFaceModel(InferenceEngine): + def __init__(self, shard: Shard): + self.shard = shard if torch.cuda.is_available(): self.device = torch.device("cuda") - else: + self.torch_dtype = torch.float32 + elif torch.backends.mps.is_available(): + self.device = torch.device("mps") + self.torch_dtype = torch.float32 + else: self.device = torch.device("cpu") + self.torch_dtype = torch.float16 - self.shard = shard - self.tokenizer = tokenizer - - # Load the model try: - self.llm_model = AutoModelForCausalLM.from_pretrained( + self.base_model = AutoModelForCausalLM.from_pretrained( shard.model_id, torch_dtype=torch.float32, - device_map="auto", - # offload_buffers=True + device_map="auto" ) - - # disk_offload(model=self.llm_model, offload_dir="./.offload") - - self.base_model = self.llm_model.model except Exception as err: - print(f"Error loading model: {err}") + print(f"error loading model: {err}") raise - if DEBUG >= 2: - print(f"\nShardedHuggingFaceModel init with shard {shard}") - print(f"self.llm_model: {self.llm_model}") - print(f"self.base_model: {self.base_model}") - - if DEBUG >= 2: - print(f"full_model.model layer: {len(self.base_model.layers)}") - - # Embeddings and final layer norm - # used for doing what forward LlamaModel does in transformers - self.norm = self.base_model.norm - self.lm_head = self.llm_model.lm_head - self.embed_tokens = self.base_model.embed_tokens - def forward( self, - input_ids: torch.tensor, - past_kvs: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + input_ids: torch.tensor ) -> Tuple[np.ndarray, any]: """ Forward through layers using the base model Args: input_ids: tensor input - past_kvs: past key value stores for cache - use_cache: use cache - - Returns: - hidden_states: numpy of states between layers - or logits: numpy of normalization and linearization of last hidden state - past_kvs: DynamicCache of past key values if use_cache is true - Ref: - https://github.com/huggingface/transformers/blob/v4.44.2/src/transformers/models/qwen2/modeling_qwen2.py#L804 - https://github.com/huggingface/transformers/blob/v4.44.2/src/transformers/models/llama/modeling_llama.py#L887 + Returns: + generator_ids: token ids from generation """ - if DEBUG >= 4: - print("forward called") - print(f"input_ids: {input_ids}\n") - print(f"layer_count: {self.shard.get_layer_count()}") - print(f"is_first_layer: {self.shard.is_first_layer()}") - print(f"is_last_layer: {self.shard.is_last_layer()}") - - past_kvs = DynamicCache.from_legacy_cache(past_kvs) - past_seen_tokens = past_kvs.get_seq_length() if past_kvs is not None else 0 - - cache_position = torch.arange( - past_seen_tokens, - past_seen_tokens + input_ids.shape[1], - device=input_ids.device - ).to(self.device) - - position_ids = cache_position.unsqueeze(0).to(self.device) - - try: - position_embeddings = self.base_model.rotary_emb( - input_ids, - position_ids - ) - except Exception as err: - print(f"rotary_emb not found in base_model") - position_embeddings = None - - # progress through layers - for i in range(self.shard.start_layer, self.shard.end_layer + 1): - decoder_layer = self.base_model.layers[i] - - if DEBUG >= 4: - print("Going through layer") - print(f"{decoder_layer}") - print("input_ids") - print(f"{input_ids}") - - layer_outputs = decoder_layer( - input_ids, - position_ids=position_ids if not position_embeddings else None, - position_embeddings=position_embeddings, - past_key_value=past_kvs, - use_cache=True, - cache_position=cache_position, - ) - - hidden_states = layer_outputs[0] - next_kvs = layer_outputs[1] - - if DEBUG >= 3: - print(f"layer_outputs {layer_outputs}") - - if self.shard.is_last_layer(): - hs_norm = self.norm(hidden_states) - hs_lm_head = self.llm_model.lm_head(hs_norm).float() - - # Use the sampling function with default settings - with torch.no_grad(): - output_token = sample_logits( - hs_lm_head[:, -1, :], - TEMP, - TOP_P, - TOP_K - ).numpy(force=True).flatten() - if DEBUG >= 2: - print(f"hs_norm: {hs_norm}") - print(f"hs_lm_head: {hs_lm_head}") - print(f"output_token: {output_token}") + torch_dtype = + self.model = AutoModelForCausalLM.from_pretrained( + self.shard.model_id, + torch_dtype=torch.float32, + device_map="auto", + ) - return (output_token, next_kvs) - - with torch.no_grad(): - out_hidden_states = hidden_states.numpy(force=True) - return ( - out_hidden_states, - next_kvs - ) \ No newline at end of file diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/test_inference_engine.py index 4bad37c26..9b8a19ef6 100644 --- a/exo/inference/pytorch/test_inference_engine.py +++ b/exo/inference/pytorch/test_inference_engine.py @@ -26,7 +26,9 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e prompt=prompt ) - print(f"resp_full: {resp_full}") + print("\n------------resp_full---------------\n") + print(resp_full) + print("\n------------resp_full---------------\n") next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( "A", @@ -35,7 +37,9 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e inference_state=inference_state_full, ) - print(f"next_resp_full: {next_resp_full}") + print("\n------------next_resp_full---------------\n") + print(next_resp_full) + print("\n------------next_resp_full---------------\n") pp = int(n_layers/2) @@ -59,6 +63,11 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e prompt=prompt ) + print("\n------------resp1---------------\n") + print(resp1) + print("\n------------resp1---------------\n") + + resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( "B", shard=resp_shard2, @@ -66,6 +75,10 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e inference_state=inference_state_1, ) + print("\n------------resp2---------------\n") + print(resp2) + print("\n------------resp2---------------\n") + resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( "B", shard=resp_shard, @@ -73,6 +86,10 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e inference_state=inference_state_2, ) + print("\n------------resp3---------------\n") + print(resp3) + print("\n------------resp3---------------\n") + resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( "B", shard=resp_shard2, @@ -80,6 +97,10 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e inference_state=inference_state_3, ) + print("\n------------resp4---------------\n") + print(resp4) + print("\n------------resp4---------------\n") + assert np.array_equal(resp_full, resp2) assert np.array_equal(next_resp_full, resp4) @@ -106,16 +127,16 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e # except Exception as err: # print(f"\n\n !!!!!!!!!!! LLAMA3-1B-Base TEST FAILED \n{err}\n") - try: - print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") - asyncio.run(test_inference_engine( - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "meta-llama/Meta-Llama-3.1-8B", - 32 - )) - except Exception as err: - print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") + # try: + # print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "meta-llama/Meta-Llama-3.1-8B", + # 32 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") # try: # print(f"\n\n ------- TEST Chickaboo/ChickaQ-Large -----\n\n") @@ -129,13 +150,13 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e # print(f"\n\n !!!!!!!!!!! Chickaboo/ChickaQ-Large TEST FAILED \n{err}\n") try: - print(f"\n\n --------- TEST ambrosfitz/TinyLlama-1.1B-Chat-yawp -------\n\n") + print(f"\n\n --------- TEST TinyLlama/TinyLlama_v1.1 -------\n\n") asyncio.run(test_inference_engine( PyTorchDynamicShardInferenceEngine(HFShardDownloader()), PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "ambrosfitz/TinyLlama-1.1B-Chat-yawp", + "TinyLlama/TinyLlama_v1.1", 22 )) except Exception as err: - print(f"\n\n !!!!!!!!!!! ambrosfitz/TinyLlama-1.1B-Chat-yawp TEST FAILED \n{err}\n") + print(f"\n\n !!!!!!!!!!! TinyLlama/TinyLlama_v1.1 TEST FAILED \n{err}\n") diff --git a/exo/inference/pytorch/test_inference_loop.py b/exo/inference/pytorch/test_inference_loop.py new file mode 100644 index 000000000..a61b43427 --- /dev/null +++ b/exo/inference/pytorch/test_inference_loop.py @@ -0,0 +1,105 @@ + +import asyncio +from exo.inference.shard import Shard +from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine +from exo.download.hf.hf_shard_download import HFShardDownloader +from exo.inference.inference_engine import InferenceEngine +from exo.inference.shard import Shard +from exo.helpers import DEBUG +import os +import numpy as np + +async def test_inference_engine(inference_engine_1: InferenceEngine, inference_engine_2: InferenceEngine, model_id: str, n_layers: int): + # prompt = "Why is the sky blue?" + prompt = "In a single word only, what is the last name of the current president of the USA?" + + shard = Shard( + model_id=model_id, + start_layer=0, + end_layer=n_layers-1, + n_layers=n_layers + ) + + resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( + "A", + shard=shard, + prompt=prompt + ) + + print("\n------------resp_full---------------\n") + print(resp_full) + print("\n------------resp_full---------------\n") + + next_resp_full = resp_full + is_finished = False + while not is_finished: + next_resp_full, _next_inference_state_full, is_finished = await inference_engine_1.infer_tensor( + "A", + shard=shard, + input_data=next_resp_full, + inference_state=inference_state_full, + ) + + print("\n------------next_resp_full---------------\n") + print(next_resp_full) + print("\n------------next_resp_full---------------\n") + + + + +if __name__ == '__main__': + # try: + # print(f"\n\n -------- TEST QWEN2 -------- \n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "Qwen/Qwen2-0.5B-Instruct", + # 24 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") + + # try: + # print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "andrijdavid/Llama3-1B-Base", + # 3 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! LLAMA3-1B-Base TEST FAILED \n{err}\n") + + # try: + # print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "meta-llama/Meta-Llama-3.1-8B", + # 32 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") + + # try: + # print(f"\n\n ------- TEST Chickaboo/ChickaQ-Large -----\n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "Chickaboo/ChickaQ-Large", + # 24 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! Chickaboo/ChickaQ-Large TEST FAILED \n{err}\n") + + try: + print(f"\n\n --------- TEST ambrosfitz/TinyLlama-1.1B-Chat-yawp -------\n\n") + asyncio.run(test_inference_engine( + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + "ambrosfitz/TinyLlama-1.1B-Chat-yawp", + 22 + )) + except Exception as err: + print(f"\n\n !!!!!!!!!!! ambrosfitz/TinyLlama-1.1B-Chat-yawp TEST FAILED \n{err}\n") + diff --git a/exo/inference/pytorch/test_split_model.py b/exo/inference/pytorch/test_split_model.py new file mode 100644 index 000000000..35104fc16 --- /dev/null +++ b/exo/inference/pytorch/test_split_model.py @@ -0,0 +1,108 @@ +import torch +import torch.nn as nn +import asyncio +from transformers import AutoModelForCausalLM, AutoConfig +from exo.api.chatgpt_api import resolve_tokenizer + +async def model_split_test(prompt: str, model_id: str, layers: int): + # inference + tokenizer = await resolve_tokenizer(model_id) + max_length = tokenizer.model_max_length + + # get full model + model = AutoModelForCausalLM.from_pretrained( + model_id, + torch_dtype=torch.float32, + device_map="auto", + ) + + half_layers = int(layers/2) + + # Create a copy of all the layers + model_layers = model.model.layers + copy_layers = [] + for i in range(half_layers): + print(f"Copying layer {i}") + layer_to_copy = model_layers[i] + print(layer_to_copy) + + copy_layers.append(layer_to_copy) + + # load half layers back into model + module_copy_list = nn.ModuleList(copy_layers).to("cuda") + model.model.layers.load_state_dict( + module_copy_list.state_dict(), + strict=False + ) + + # generate first half + inputs = tokenizer(prompt, return_tensors="pt") + fhalf_generate_ids = model.generate( + inputs.input_ids.to("cuda"), + max_new_tokens=max_length/2 + ).to("cuda") + + print("fhalf_generate_ids") + print(fhalf_generate_ids) + + # generate other half + copy_layers = [] + for i in range(half_layers, layers): + print(f"Copying layer {i}") + layer_to_copy = model_layers[i] + print(layer_to_copy) + + copy_layers.append(layer_to_copy) + + # load half layers back into model + module_copy_list = nn.ModuleList(copy_layers).to("cuda") + model.model.layers.load_state_dict( + module_copy_list.state_dict(), + strict=False + ) + + # generate second half with first half + shalf_generate_ids = model.generate( + fhalf_generate_ids + ).to("cuda") + + print("generate_ids") + print(shalf_generate_ids) + print(tokenizer.eos_token_id) + + # decode second half + decode = tokenizer.batch_decode( + shalf_generate_ids, + skip_special_tokens=True, + clean_up_tokenization_spaces=False + )[0] + + print("decode") + print(decode) + +if __name__ == "__main__": + prompt = "In a single word only, what is the last name of the current president of the USA?" + + print("\n-------- Test TinyLlama/TinyLlama_v1.1 ----------\n") + model_id = "TinyLlama/TinyLlama_v1.1" + model_layers = 22 + + asyncio.run( + model_split_test( + prompt=prompt, + model_id=model_id, + layers=model_layers + ) + ) + + print("\n-------- Test meta-llama/Meta-Llama-3.1-8B ----------\n") + model_id = "meta-llama/Meta-Llama-3.1-8B" + model_layers = 32 + + asyncio.run( + model_split_test( + prompt=prompt, + model_id=model_id, + layers=model_layers + ) + ) From 626b2235074d7f047c0de7ff6b087cab30b27bcb Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 1 Sep 2024 11:17:09 -0800 Subject: [PATCH 328/589] building out new hf.py class, testing qwen and llama3 8b --- exo/inference/pytorch/model/hf.py | 28 +++-- exo/inference/pytorch/test_split_model.py | 121 ++++++++++++++++------ 2 files changed, 108 insertions(+), 41 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 0812af6ed..2a5eefd33 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -9,7 +9,7 @@ from typing import Tuple, Optional, Union, List class ShardedHuggingFaceModel(InferenceEngine): - def __init__(self, shard: Shard): + def __init__(self, shard: Shard, ): self.shard = shard if torch.cuda.is_available(): @@ -25,11 +25,23 @@ def __init__(self, shard: Shard): try: self.base_model = AutoModelForCausalLM.from_pretrained( shard.model_id, - torch_dtype=torch.float32, + torch_dtype=self.torch_dtype, device_map="auto" ) + + # build layers from shard + layers = self.base_model.model.layers + copy_layers = nn.ModuleList( + [layers[i] for i in range(self.shard.start_layer, self.shard.end_layer + 1)] + ) + + # apply layers back to model + self.base_model.model.layers.load_state_dict( + copy_layers.state_dict(), + strict=False + ) except Exception as err: - print(f"error loading model: {err}") + print(f"error loading and splitting model: {err}") raise def forward( @@ -46,11 +58,7 @@ def forward( generator_ids: token ids from generation """ - torch_dtype = - self.model = AutoModelForCausalLM.from_pretrained( - self.shard.model_id, - torch_dtype=torch.float32, - device_map="auto", - ) - + generate_ids = self.base_model.generate( + input_ids, + ) \ No newline at end of file diff --git a/exo/inference/pytorch/test_split_model.py b/exo/inference/pytorch/test_split_model.py index 35104fc16..4046bb21e 100644 --- a/exo/inference/pytorch/test_split_model.py +++ b/exo/inference/pytorch/test_split_model.py @@ -1,20 +1,36 @@ import torch import torch.nn as nn import asyncio -from transformers import AutoModelForCausalLM, AutoConfig +import gc +from transformers import AutoModelForCausalLM, AutoConfig, Qwen2ForCausalLM from exo.api.chatgpt_api import resolve_tokenizer +import re async def model_split_test(prompt: str, model_id: str, layers: int): # inference tokenizer = await resolve_tokenizer(model_id) - max_length = tokenizer.model_max_length + max_length = 512 #tokenizer.model_max_length # get full model - model = AutoModelForCausalLM.from_pretrained( - model_id, - torch_dtype=torch.float32, - device_map="auto", - ) + if re.match(r"^Qwen|qwen", model_id): + model = Qwen2ForCausalLM.from_pretrained( + model_id, + torch_dtype=torch.float32, + device_map="auto", + # attn_implementation="eager" + ) + else: + model = AutoModelForCausalLM.from_pretrained( + model_id, + torch_dtype=torch.float32, + device_map="auto", + ) + + # add pad token if none + # this is for llama based models, will add a check + if tokenizer.pad_token == None and re.match(r"Llama|llama", model_id): + tokenizer.add_special_tokens({"pad_token":""}) + model.resize_token_embeddings(len(tokenizer)) half_layers = int(layers/2) @@ -22,12 +38,14 @@ async def model_split_test(prompt: str, model_id: str, layers: int): model_layers = model.model.layers copy_layers = [] for i in range(half_layers): - print(f"Copying layer {i}") + # print(f"Copying layer {i}") layer_to_copy = model_layers[i] - print(layer_to_copy) + # print(layer_to_copy) copy_layers.append(layer_to_copy) + print(f"loading {len(copy_layers)} layers back to model") + # load half layers back into model module_copy_list = nn.ModuleList(copy_layers).to("cuda") model.model.layers.load_state_dict( @@ -36,24 +54,43 @@ async def model_split_test(prompt: str, model_id: str, layers: int): ) # generate first half - inputs = tokenizer(prompt, return_tensors="pt") + messages = [{"role": "user", "content": prompt}] + txt = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) + + print(f"Generating from chat template\n{txt}") + + inputs = tokenizer([txt], return_tensors="pt") + input_ids = inputs.input_ids.to("cuda") + input_attention_mask = inputs.attention_mask.to("cuda") fhalf_generate_ids = model.generate( - inputs.input_ids.to("cuda"), - max_new_tokens=max_length/2 + input_ids, + # attention_mask=input_attention_mask, + max_length=int(max_length/2), + output_hidden_states=True + # output_attentions=True ).to("cuda") print("fhalf_generate_ids") print(fhalf_generate_ids) + # nptest = fhalf_generate_ids.numpy(force=True) + # print(f"nptest: {nptest}") + # generate other half copy_layers = [] for i in range(half_layers, layers): - print(f"Copying layer {i}") + # print(f"Copying layer {i}") layer_to_copy = model_layers[i] - print(layer_to_copy) + # print(layer_to_copy) copy_layers.append(layer_to_copy) + print(f"loading {len(copy_layers)} layers back to model") + # load half layers back into model module_copy_list = nn.ModuleList(copy_layers).to("cuda") model.model.layers.load_state_dict( @@ -62,13 +99,16 @@ async def model_split_test(prompt: str, model_id: str, layers: int): ) # generate second half with first half + print(f"Generating from hidden layers output fhalf_generate_ids") shalf_generate_ids = model.generate( - fhalf_generate_ids + fhalf_generate_ids, + # attention_mask=input_attention_mask, + max_length=max_length ).to("cuda") - print("generate_ids") + print("shalf_generate_ids") print(shalf_generate_ids) - print(tokenizer.eos_token_id) + # print(tokenizer.eos_token_id) # decode second half decode = tokenizer.batch_decode( @@ -80,12 +120,42 @@ async def model_split_test(prompt: str, model_id: str, layers: int): print("decode") print(decode) + # free model from memory + del model + gc.collect() + torch.cuda.empty_cache() + + if __name__ == "__main__": prompt = "In a single word only, what is the last name of the current president of the USA?" - print("\n-------- Test TinyLlama/TinyLlama_v1.1 ----------\n") - model_id = "TinyLlama/TinyLlama_v1.1" - model_layers = 22 + # print("\n-------- Test TinyLlama/TinyLlama_v1.1 ----------\n") + # model_id = "TinyLlama/TinyLlama_v1.1" + # model_layers = 22 + + # asyncio.run( + # model_split_test( + # prompt=prompt, + # model_id=model_id, + # layers=model_layers + # ) + # ) + + # print("\n-------- Test meta-llama/Meta-Llama-3.1-8B ----------\n") + # model_id = "meta-llama/Meta-Llama-3.1-8B" + # model_layers = 32 + + # asyncio.run( + # model_split_test( + # prompt=prompt, + # model_id=model_id, + # layers=model_layers + # ) + # ) + + print("\n-------- Test Qwen/Qwen2-0.5B-Instruct ----------\n") + model_id = "Qwen/Qwen2-0.5B-Instruct" + model_layers = 24 asyncio.run( model_split_test( @@ -95,14 +165,3 @@ async def model_split_test(prompt: str, model_id: str, layers: int): ) ) - print("\n-------- Test meta-llama/Meta-Llama-3.1-8B ----------\n") - model_id = "meta-llama/Meta-Llama-3.1-8B" - model_layers = 32 - - asyncio.run( - model_split_test( - prompt=prompt, - model_id=model_id, - layers=model_layers - ) - ) From f983e9347eac426ed357f011e8ee9e391d84c007 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 3 Sep 2024 19:26:36 -0800 Subject: [PATCH 329/589] trying to load in weights but transformers/pytorch doesnt allow that unless wanting to rebuild the whole model --- exo/inference/pytorch/.gitignore | 1 + exo/inference/pytorch/model/archive/utils.py | 83 ------ exo/inference/pytorch/test_simple_model.py | 40 +++ exo/inference/pytorch/test_split_model.py | 277 +++++++++++-------- exo/inference/pytorch/test_weight_load.py | 206 ++++++++++++++ exo/inference/pytorch/utils.py | 185 +++++++++++++ 6 files changed, 597 insertions(+), 195 deletions(-) create mode 100644 exo/inference/pytorch/.gitignore delete mode 100644 exo/inference/pytorch/model/archive/utils.py create mode 100644 exo/inference/pytorch/test_simple_model.py create mode 100644 exo/inference/pytorch/test_weight_load.py create mode 100644 exo/inference/pytorch/utils.py diff --git a/exo/inference/pytorch/.gitignore b/exo/inference/pytorch/.gitignore new file mode 100644 index 000000000..8fce60300 --- /dev/null +++ b/exo/inference/pytorch/.gitignore @@ -0,0 +1 @@ +data/ diff --git a/exo/inference/pytorch/model/archive/utils.py b/exo/inference/pytorch/model/archive/utils.py deleted file mode 100644 index df84b3977..000000000 --- a/exo/inference/pytorch/model/archive/utils.py +++ /dev/null @@ -1,83 +0,0 @@ -import torch -from torch.nn import functional as F - -def top_p_sampling(scaled_logits: torch.Tensor, top_p: float) -> torch.Tensor: - """ - Apply top-p (nucleus) sampling to logits. - - Args: - scaled_logits (torch.Tensor): The scaled logits from the model's output. - top_p (float): The cumulative probability threshold for top-p filtering. - temp (float): Temperature parameter for softmax distribution reshaping. - - Returns: - torch.Tensor: Token selected based on the top-p criterion. - - Ref: - https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/sample_utils.py#L67C1-L97C17 - """ - scaled_logits = torch.where(torch.isnan(scaled_logits), torch.zeros_like(scaled_logits), scaled_logits) - scaled_logits = torch.where(torch.isinf(scaled_logits), torch.full_like(scaled_logits, 1e6), scaled_logits) - - probs = torch.softmax(scaled_logits, dim=-1) - - sorted_probs, sorted_indices = torch.sort( - probs, - descending=True, - dim=-1 - ) - - cumulative_probs = torch.cumsum(sorted_probs, dim=-1) - mask = cumulative_probs > top_p - - top_probs = torch.where(mask, torch.zeros_like(sorted_probs), sorted_probs) - sum_probs = top_probs.sum(dim=-1, keepdim=True) - top_probs = torch.where(sum_probs > 0, top_probs / sum_probs, torch.ones_like(top_probs) / top_probs.size(-1)) - - if torch.isnan(top_probs).any() or torch.isinf(top_probs).any(): - print("Warning: Top probabilities contain NaN or Inf values after normalization") - top_probs = torch.where(torch.isnan(top_probs) | torch.isinf(top_probs), - 1.0 / top_probs.size(-1), - top_probs) - - sorted_token = torch.multinomial(top_probs, num_samples=1) - - token = sorted_indices.gather(-1, sorted_token) - - return token.squeeze(-1) - -def sample_logits(logits, temp, top_p, top_k): - """ - Sample tokens from logits using temperature, top-k, and top-p (nucleus) sampling. - - Args: - logits (torch.Tensor): The logits distribution to sample from. - temp (float): temp for scaling logits. - top_p (float): The cumulative probability threshold for nucleus sampling. - - Returns: - torch.Tensor: The selected token index. - """ - - # Ensure logits are float - logits = logits.float() - - # If temp is very low, just use argmax - if temp == 0: - return logits.argmax(dim=-1) - - scaled_logits = logits/temp - - # top k - if top_k > 0: - top_values, top_indices = torch.topk(scaled_logits, top_k, dim=-1) - scaled_logits = torch.zeros_like(logits).scatter_(-1, top_indices, top_values) - - # Top-p sampling - if 0 < top_p < 1.0: - return top_p_sampling(scaled_logits, top_p) - else: - # random distribution selection - probs = torch.softmax(scaled_logits, dim=-1) - rand_sample = torch.distributions.Categorical(probs) - return rand_sample.sample().squeeze() \ No newline at end of file diff --git a/exo/inference/pytorch/test_simple_model.py b/exo/inference/pytorch/test_simple_model.py new file mode 100644 index 000000000..81009d08e --- /dev/null +++ b/exo/inference/pytorch/test_simple_model.py @@ -0,0 +1,40 @@ +from transformers import AutoModelForCausalLM, AutoTokenizer +device = "cuda" # the device to load the model onto + +model = AutoModelForCausalLM.from_pretrained( + "Qwen/Qwen2-0.5B-Instruct", + torch_dtype="auto", + device_map="auto" +) +tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") + +prompt = "In a single word only, what is the last name of the current president of the USA?" + +messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": prompt} +] +text = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True +) +model_inputs = tokenizer([text], return_tensors="pt").to(device) + +generated_ids = model.generate( + model_inputs.input_ids, + attention_mask=model_inputs.attention_mask, + max_new_tokens=512, + do_sample=True, + top_k=20 + #num_beams=5, + #early_stopping=True +) +generated_ids = [ + output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) +] + +response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] + +print(f"Prompt: {prompt}\n") +print(f"Response: {response}\n") diff --git a/exo/inference/pytorch/test_split_model.py b/exo/inference/pytorch/test_split_model.py index 4046bb21e..242e5f484 100644 --- a/exo/inference/pytorch/test_split_model.py +++ b/exo/inference/pytorch/test_split_model.py @@ -4,9 +4,73 @@ import gc from transformers import AutoModelForCausalLM, AutoConfig, Qwen2ForCausalLM from exo.api.chatgpt_api import resolve_tokenizer +from typing import Tuple, Optional import re +from exo.inference.pytorch.utils import sample_logits, top_k_sampling + +TEMP = 0.6 +TOP_K = 60 + +class OnionHuggingFaceLM(): + def __init__(self, layers, is_last=False): + self.layers = layers + self.is_last = is_last + + def forward( + self, + model, + input_ids: torch.tensor=None, + hidden_states: torch.tensor=None, + attention_mask: torch.tensor=None, + **kwargs + ) -> Tuple[Optional[torch.tensor], Optional[torch.tensor]]: + + # set base model + base_model = model.model + + if input_ids is not None and hidden_states is not None: + print("You must either pass a hidden_state or input_ids but not both") + assert ValueError + + if input_ids is not None: + # embed + hidden_states = base_model.embed_tokens(input_ids) + position_ids = torch.arange( + 0, + input_ids.size(1), + device=input_ids.device + ).unsqueeze(0) + + if hidden_states is not None: + hidden_states = hidden_states + position_ids = torch.arange( + 0, + hidden_states.size(1), + device=hidden_states.device + ).unsqueeze(0) + + for layer in self.layers: + print(f"Processing hidden state from layer\n{layer}\n") + hidden_states = layer( + hidden_states, + position_ids=position_ids + )[0] + + if self.is_last: + norm_states = base_model.norm(hidden_states).to("cuda") + logits = model.lm_head(norm_states).to("cuda") + + return (None, logits) + + return (hidden_states, None) + +async def model_half_split_test(prompt: str, model_id: str, layers: int): + """ + Test for splitting in half + """ + + half_layers = int(layers / 2) -async def model_split_test(prompt: str, model_id: str, layers: int): # inference tokenizer = await resolve_tokenizer(model_id) max_length = 512 #tokenizer.model_max_length @@ -15,43 +79,31 @@ async def model_split_test(prompt: str, model_id: str, layers: int): if re.match(r"^Qwen|qwen", model_id): model = Qwen2ForCausalLM.from_pretrained( model_id, - torch_dtype=torch.float32, + torch_dtype="auto", device_map="auto", # attn_implementation="eager" + # low_cpu_mem_usage=True ) else: model = AutoModelForCausalLM.from_pretrained( model_id, - torch_dtype=torch.float32, + torch_dtype="auto", device_map="auto", + # low_cpu_mem_usage=True ) - # add pad token if none - # this is for llama based models, will add a check - if tokenizer.pad_token == None and re.match(r"Llama|llama", model_id): - tokenizer.add_special_tokens({"pad_token":""}) - model.resize_token_embeddings(len(tokenizer)) + print(model.hf_device_map) - half_layers = int(layers/2) + # add pad token if none, depending on model + #if tokenizer.pad_token == None: + # if re.match(r"Llama|llama", model_id): + # tokenizer.add_special_tokens({"pad_token":""}) + # model.resize_token_embeddings(len(tokenizer)) - # Create a copy of all the layers - model_layers = model.model.layers - copy_layers = [] - for i in range(half_layers): - # print(f"Copying layer {i}") - layer_to_copy = model_layers[i] - # print(layer_to_copy) + shard_layers = nn.ModuleList(model.model.layers[:half_layers])#.to("cuda") + sharded_model = OnionHuggingFaceLM(layers=shard_layers) - copy_layers.append(layer_to_copy) - - print(f"loading {len(copy_layers)} layers back to model") - - # load half layers back into model - module_copy_list = nn.ModuleList(copy_layers).to("cuda") - model.model.layers.load_state_dict( - module_copy_list.state_dict(), - strict=False - ) + print(model) # generate first half messages = [{"role": "user", "content": prompt}] @@ -66,59 +118,60 @@ async def model_split_test(prompt: str, model_id: str, layers: int): inputs = tokenizer([txt], return_tensors="pt") input_ids = inputs.input_ids.to("cuda") input_attention_mask = inputs.attention_mask.to("cuda") - fhalf_generate_ids = model.generate( - input_ids, - # attention_mask=input_attention_mask, - max_length=int(max_length/2), - output_hidden_states=True - # output_attentions=True - ).to("cuda") - - print("fhalf_generate_ids") - print(fhalf_generate_ids) - - # nptest = fhalf_generate_ids.numpy(force=True) - # print(f"nptest: {nptest}") - - # generate other half - copy_layers = [] - for i in range(half_layers, layers): - # print(f"Copying layer {i}") - layer_to_copy = model_layers[i] - # print(layer_to_copy) - - copy_layers.append(layer_to_copy) - - print(f"loading {len(copy_layers)} layers back to model") - - # load half layers back into model - module_copy_list = nn.ModuleList(copy_layers).to("cuda") - model.model.layers.load_state_dict( - module_copy_list.state_dict(), - strict=False + + # add if first layer of model check + shard_hidden_states, shard_logits = sharded_model.forward( + model=model, + input_ids=input_ids ) - # generate second half with first half - print(f"Generating from hidden layers output fhalf_generate_ids") - shalf_generate_ids = model.generate( - fhalf_generate_ids, - # attention_mask=input_attention_mask, - max_length=max_length - ).to("cuda") - - print("shalf_generate_ids") - print(shalf_generate_ids) - # print(tokenizer.eos_token_id) + print(f"shard_hidden_states\n{shard_hidden_states}") + print(f"shard_logits\n{shard_logits}") + + + # second half + print("Using first half hidden state for last half of model") + shard_layers = nn.ModuleList(model.model.layers[half_layers:]).to("cuda") + sharded_model.layers = shard_layers + sharded_model.is_last = True - # decode second half - decode = tokenizer.batch_decode( - shalf_generate_ids, - skip_special_tokens=True, - clean_up_tokenization_spaces=False - )[0] + if shard_hidden_states is not None: + # add if last layer of model or in the middle check + shard_hidden_states, shard_logits = sharded_model.forward( + model=model, + hidden_states=shard_hidden_states + ) - print("decode") - print(decode) + print(f"shard_hidden_states\n{shard_hidden_states}") + print(f"shard_logits\n{shard_logits}") + else: + print("Sharded hidden states not found, error") + raise ValueError + + + print("generate from logits") + if shard_logits is not None: + print(shard_logits.dim()) + #print(shard_logits[0]) + + generated_ids = sample_logits(shard_logits, 0.1, 0.95, 30) + #generated_ids = torch.argmax(shard_logits/0.7, dim=-1) + #generated_ids = model.generate(logits) + + print("generated_ids") + print(generated_ids) + + generated_text = tokenizer.batch_decode( + generated_ids, + skip_special_tokens=True, + clean_up_tokenization_spaces=False + )[0] + + print("Generated text:") + print(generated_text) + else: + print("Sharded logits missing from last layer run, error") + raise ValueError # free model from memory del model @@ -127,41 +180,41 @@ async def model_split_test(prompt: str, model_id: str, layers: int): if __name__ == "__main__": - prompt = "In a single word only, what is the last name of the current president of the USA?" - - # print("\n-------- Test TinyLlama/TinyLlama_v1.1 ----------\n") - # model_id = "TinyLlama/TinyLlama_v1.1" - # model_layers = 22 - - # asyncio.run( - # model_split_test( - # prompt=prompt, - # model_id=model_id, - # layers=model_layers - # ) - # ) - - # print("\n-------- Test meta-llama/Meta-Llama-3.1-8B ----------\n") - # model_id = "meta-llama/Meta-Llama-3.1-8B" - # model_layers = 32 - - # asyncio.run( - # model_split_test( - # prompt=prompt, - # model_id=model_id, - # layers=model_layers - # ) - # ) - - print("\n-------- Test Qwen/Qwen2-0.5B-Instruct ----------\n") - model_id = "Qwen/Qwen2-0.5B-Instruct" - model_layers = 24 - - asyncio.run( - model_split_test( - prompt=prompt, - model_id=model_id, - layers=model_layers - ) - ) + prompt = "In a single word only, what is the last name of the current president of the USA?" + + print("\n-------- Test TinyLlama/TinyLlama_v1.1 ----------\n") + model_id = "TinyLlama/TinyLlama_v1.1" + model_layers = 22 + + asyncio.run( + model_half_split_test( + prompt=prompt, + model_id=model_id, + layers=model_layers + ) + ) + + #print("\n-------- Test meta-llama/Meta-Llama-3.1-8B ----------\n") + #model_id = "meta-llama/Meta-Llama-3.1-8B" + #model_layers = 32 + + #asyncio.run( + # model_half_split_test( + # prompt=prompt, + # model_id=model_id, + # layers=model_layers + # ) + #) + + #print("\n-------- Test Qwen/Qwen2-57B-A14B-Instruct ----------\n") + #model_id = "Qwen/Qwen2-57B-A14B-Instruct" + #model_layers = 28 + + #asyncio.run( + # model_half_split_test( + # prompt=prompt, + # model_id=model_id, + # layers=model_layers + # ) + #) diff --git a/exo/inference/pytorch/test_weight_load.py b/exo/inference/pytorch/test_weight_load.py new file mode 100644 index 000000000..7eb8142f9 --- /dev/null +++ b/exo/inference/pytorch/test_weight_load.py @@ -0,0 +1,206 @@ +import torch +import torch.nn as nn +import asyncio +import gc +import json +from transformers import AutoConfig, AutoModel +from safetensors import safe_open +from typing import Tuple, Optional +import re +from exo.inference.pytorch.utils import sample_logits, top_k_sampling +from exo.api.chatgpt_api import resolve_tokenizer + +TEMP = 0.6 +TOP_K = 60 + +class OnionHuggingFaceLM(): + def __init__(self, layers, safetensor_index_file, safetensor_directory, is_last=False): + self.layers = layers + self.is_last = is_last + self.safetensor_index_file = safetensor_index_file + self.safetensor_directory = safetensor_directory + + # Load the safetensor index JSON + with open(safetensor_index_file, "r") as f: + self.index_data = json.load(f) + self.weight_map = self.index_data['weight_map'] + self.safetensors_metadata = self.index_data['safetensors_metadata'] + + def load_layer_weights(self, model, layer_index): + layer_tensors = {} + for param_name, file_name in self.weight_map.items(): + if param_name.startswith(f"model.layers.{layer_index}"): + file_path = f"{self.safetensor_directory}/{file_name}" + print(f"loading safetensor\n{file_path}\nfor layer\n{layer_index}") + offsets = self.safetensors_metadata[file_name]['offsets'][param_name] + dtype = self.safetensors_metadata[file_name]['dtype'] + shape = self.safetensors_metadata[file_name]['shape'] + + with safe_open(file_path, framework="pt", device="cuda") as f: + tensor = f.get_tensor_slice(offsets[0], offsets[1]) + tensor = tensor.view(shape) # Reshape to the correct shape + + layer_tensors[param_name] = tensor + + # Assign these tensors to the model's layer + for param_name, tensor in layer_tensors.items(): + param_pointer = model + param_parts = param_name.split('.') + for attr in param_parts[:-1]: + if attr.isdigit(): + attr = int(attr) + param_pointer = getattr(param_pointer, attr) + setattr(param_pointer, param_parts[-1], tensor) + + def forward( + self, + model, + input_ids: torch.tensor=None, + hidden_states: torch.tensor=None, + attention_mask: torch.tensor=None, + **kwargs + ) -> Tuple[Optional[torch.tensor], Optional[torch.tensor]]: + + base_model = model.model + + if input_ids is not None and hidden_states is not None: + print("You must either pass a hidden_state or input_ids but not both") + raise ValueError + + if input_ids is not None: + hidden_states = base_model.embed_tokens(input_ids) + position_ids = torch.arange( + 0, + input_ids.size(1), + device=input_ids.device + ).unsqueeze(0) + + if hidden_states is not None: + position_ids = torch.arange( + 0, + hidden_states.size(1), + device=hidden_states.device + ).unsqueeze(0) + + for idx, layer in enumerate(self.layers): + print(f"Loading weights for layer {idx}") + self.load_layer_weights(model, idx) # Load weights for the current layer + print(f"Processing hidden state from layer {idx}\n") + hidden_states = layer( + hidden_states, + position_ids=position_ids + )[0] + + if self.is_last: + norm_states = base_model.norm(hidden_states).to("cuda") + logits = model.lm_head(norm_states).to("cuda") + + return (None, logits) + + return (hidden_states, None) + +async def model_half_split_test( + prompt: str, + model_id: str, + layers: int, + safetensor_index_file: str, + safetensor_directory: str): + + half_layers = int(layers / 2) + + print("loading tokenizer") + tokenizer = await resolve_tokenizer(model_id) + max_length = 512 + + print("loading config and model") + config = AutoConfig.from_pretrained(model_id, local_files_only=True) + model = AutoModel.from_config(config).to("cuda") + + print(model.hf_device_map) + + shard_layers = nn.ModuleList(model.model.layers[:half_layers]) + sharded_model = OnionHuggingFaceLM( + layers=shard_layers, + safetensor_index_file=safetensor_index_file, + safetensor_directory=safetensor_directory + ) + + print(model) + + messages = [{"role": "user", "content": prompt}] + txt = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) + + print(f"Generating from chat template\n{txt}") + + inputs = tokenizer([txt], return_tensors="pt") + input_ids = inputs.input_ids.to("cuda") + input_attention_mask = inputs.attention_mask.to("cuda") + + shard_hidden_states, shard_logits = sharded_model.forward( + model=model, + input_ids=input_ids + ) + + print(f"shard_hidden_states\n{shard_hidden_states}") + print(f"shard_logits\n{shard_logits}") + + print("Using first half hidden state for last half of model") + shard_layers = nn.ModuleList(model.model.layers[half_layers:]).to("cuda") + sharded_model.layers = shard_layers + sharded_model.is_last = True + + if shard_hidden_states is not None: + shard_hidden_states, shard_logits = sharded_model.forward( + model=model, + hidden_states=shard_hidden_states + ) + + print(f"shard_hidden_states\n{shard_hidden_states}") + print(f"shard_logits\n{shard_logits}") + else: + print("Sharded hidden states not found, error") + raise ValueError + + print("generate from logits") + if shard_logits is not None: + generated_ids = sample_logits(shard_logits, TEMP, 0.95, TOP_K) + print("generated_ids") + print(generated_ids) + + generated_text = tokenizer.batch_decode( + generated_ids, + skip_special_tokens=True, + clean_up_tokenization_spaces=False + )[0] + + print("Generated text:") + print(generated_text) + else: + print("Sharded logits missing from last layer run, error") + raise ValueError + + del model + gc.collect() + torch.cuda.empty_cache() + +if __name__ == "__main__": + prompt = "In a single word only, what is the last name of the current president of the USA?" + + print("\n-------- Test Qwen/Qwen2-7B-Instruct ----------\n") + model_id = "Qwen/Qwen2-7B-Instruct" + model_layers = 22 + + asyncio.run( + model_half_split_test( + prompt=prompt, + model_id=model_id, + layers=model_layers, + safetensor_index_file="./data/qwen2_7B_Instruct/model.safetensors.index.json", + safetensor_directory="./data/qwen2_7B_Instruct/" + ) + ) + diff --git a/exo/inference/pytorch/utils.py b/exo/inference/pytorch/utils.py new file mode 100644 index 000000000..e4062da96 --- /dev/null +++ b/exo/inference/pytorch/utils.py @@ -0,0 +1,185 @@ +import torch +from torch.nn import functional as F + +def top_k_sampling(logits, thres): + num_logits = logits.shape[-1] + val, ind = torch.topk(logits, thres, dim=-1, largest=True, sorted=True) + mask = torch.zeros_like(logits) + mask.scatter_(-1, ind, 1) + logits = logits * mask + + return logits + +def top_p_sampling(logits, thres): + sorted_logits, sorted_indices = torch.sort(logits, descending=True) + print(f"top_p_sampling sorted_logits\n{sorted_logits}\nsorted_indices {sorted_indices}") + softmax_logits = F.softmax(sorted_logits, dim=-1) + print(f"top_p_sampling\nsoftmax_logits {softmax_logits}") + cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) + print(f"top_p_sampling\n{cumulative_probs}") + + + # Remove tokens with cumulative probability above the threshold + sorted_indices_to_remove = cumulative_probs > thres + + # Shift the indices to the right to keep also the first token above the threshold + sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() + sorted_indices_to_remove[..., 0] = 0 + + # scatter sorted tensors to original indexing + indices_to_remove = sorted_indices_to_remove.scatter(dim=-1, index=sorted_indices, src=sorted_indices_to_remove) + print(f"top_p_sampling\nindicies_to_remove: {indices_to_remove}") + logits[indices_to_remove] = float('-inf') + return logits + +def sample_logits(logits, temp, top_p, top_k): + """ + Sample tokens from logits using temperature, top-k, and top-p (nucleus) sampling. + + Args: + logits (torch.Tensor): The logits distribution to sample from. + temp (float): temp for scaling logits. + top_p (float): The cumulative probability threshold for nucleus sampling. + + Returns: + torch.Tensor: The selected token index. + """ + # If temp is very low, just use argmax + if temp == 0: + return logits.argmax(dim=-1) + + print(f"logits {logits}") + + scaled_logits = logits/temp + + print(f"scaled_logits: {scaled_logits}") + + if 0 < top_p < 1.0: + top_p_logits = top_p_sampling(scaled_logits, top_p) + print(f"top_p logits {top_p_logits}") + if top_k > 0: + top_k_logits = top_k_sampling(top_p_logits, top_k) + return top_k_logits.argmax(dim=-1) + elif top_k > 0: + top_k_logits = top_k_sampling(logits, top_k) + print(f"top_k logits {top_k_logits}") + return top_k_logits.argmax(dim=-1) + + return scaled_logits.argmax(dim=-1) + + +# from tinygrad llama model sample +def sample(logits: torch.Tensor, temp: float, k: int, p: float, af: float, ap: float): + assert logits.ndim == 1, "only works on 1D tensors" + assert 0 <= p <= 1, "p must be between 0 and 1" + assert 0 <= k <= logits.numel(), "k must be between 0 and numel" + + # If temperature is very low, just use argmax + if temp < 1e-6: + return logits.argmax().reshape(1) + + # Alpha sampling + if af or ap: + if not hasattr(sample, "alpha_counter"): + sample.alpha_counter = torch.zeros_like(logits, dtype=torch.int32).contiguous() + logits = logits - (sample.alpha_counter * af + (sample.alpha_counter > 0).float() * ap) + + # Replace NaNs with -inf + logits = torch.where(logits != logits, torch.tensor(-float("inf"), device=logits.device), logits) + + # Apply softmax after temperature scaling + t = F.softmax(logits / temp, dim=-1) + + counter = torch.arange(t.numel(), device=logits.device).contiguous() + counter2 = torch.arange(t.numel() - 1, -1, -1, device=logits.device).contiguous() + + # Top-k sampling + if k: + output = torch.zeros(k, device=logits.device).contiguous() + output_indices = torch.zeros(k, device=logits.device, dtype=torch.int32).contiguous() + + for i in range(k): + t_max = t.max() + t_argmax = (t.numel() - ((t == t_max) * counter2).max() - 1).to(torch.int) + output[i] = t_max + output_indices[i] = t_argmax + t = torch.where(counter == t_argmax, torch.tensor(0.0, device=logits.device), t) + + # Approximate top-p sampling + output_cumsum = output.flip(dims=(0,)).cumsum(dim=0).flip(dims=(0,)) + t.sum() + mask = output_cumsum >= (1 - p) + output = output * mask.float() + output_indices = output_indices * mask.int() + + # Sample from the distribution + output_idx = output.multinomial(num_samples=1) + output_token = output_indices[output_idx] + else: + output_token = t.multinomial(num_samples=1) + + # Increase alpha counter + if af or ap: + sample.alpha_counter = torch.where(counter == output_token, sample.alpha_counter + 1, sample.alpha_counter) + + return output_token + + +def sample_3d(logits: torch.Tensor, temp: float, k: int, p: float, af: float, ap: float): + assert logits.ndim == 3, "only works on 3D tensors" + assert 0 <= p <= 1, "p must be between 0 and 1" + assert 0 <= k <= logits.shape[-1], "k must be between 0 and the last dimension size" + + batch_size, seq_len, vocab_size = logits.shape + + # If temperature is very low, just use argmax + if temp < 1e-6: + return logits.argmax(dim=-1) + + # Alpha sampling + if af or ap: + if not hasattr(sample, "alpha_counter"): + sample.alpha_counter = torch.zeros_like(logits, dtype=torch.int32).contiguous() + logits = logits - (sample.alpha_counter * af + (sample.alpha_counter > 0).float() * ap) + + # Replace NaNs with -inf + logits = torch.where(logits != logits, torch.tensor(-float("inf"), device=logits.device), logits) + + # Apply softmax after temperature scaling + t = F.softmax(logits / temp, dim=-1) + + counter = torch.arange(vocab_size, device=logits.device).unsqueeze(0).unsqueeze(0).expand_as(t).contiguous() + counter2 = torch.arange(vocab_size - 1, -1, -1, device=logits.device).unsqueeze(0).unsqueeze(0).expand_as(t).contiguous() + + # Top-k sampling + if k: + output = torch.zeros((batch_size, seq_len, k), device=logits.device).contiguous() + output_indices = torch.zeros((batch_size, seq_len, k), device=logits.device, dtype=torch.int32).contiguous() + + for i in range(k): + t_max, _ = t.max(dim=-1, keepdim=True) + t_argmax = (vocab_size - ((t == t_max) * counter2).max(dim=-1, keepdim=True)[0] - 1).to(torch.int) + output[:, :, i] = t_max.squeeze(-1) + output_indices[:, :, i] = t_argmax.squeeze(-1) + t = torch.where(counter == t_argmax, torch.tensor(0.0, device=logits.device), t) + + # Approximate top-p sampling + output_cumsum = output.flip(dims=(-1,)).cumsum(dim=-1).flip(dims=(-1,)) + t.sum(dim=-1, keepdim=True) + mask = output_cumsum >= (1 - p) + output = output * mask.float() + output_indices = output_indices * mask.int() + + # Sample from the distribution + output_flat = output.view(batch_size * seq_len, -1) + output_idx = output_flat.multinomial(num_samples=1).squeeze(-1) + output_indices_flat = output_indices.view(batch_size * seq_len, -1) + output_token = output_indices_flat.gather(dim=-1, index=output_idx.unsqueeze(-1)).view(batch_size, seq_len) + else: + output_flat = t.view(batch_size * seq_len, -1) + output_token = output_flat.multinomial(num_samples=1).view(batch_size, seq_len) + + # Increase alpha counter + if af or ap: + sample.alpha_counter = torch.where(counter == output_token.unsqueeze(-1), sample.alpha_counter + 1, sample.alpha_counter) + + return output_token + From d142be047ef3f47b516f4badfc0e03b07673f8dc Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 13 Sep 2024 11:10:56 -0800 Subject: [PATCH 330/589] adding more testing, refining logit selection --- exo/inference/pytorch/model/hf.py | 13 +- exo/inference/pytorch/test_simple_model.py | 6 +- exo/inference/pytorch/test_split_model.py | 339 +++++++++++++-------- 3 files changed, 224 insertions(+), 134 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 2a5eefd33..7ef806652 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -44,21 +44,18 @@ def __init__(self, shard: Shard, ): print(f"error loading and splitting model: {err}") raise - def forward( + def run( self, input_ids: torch.tensor ) -> Tuple[np.ndarray, any]: """ - Forward through layers using the base model + Run through a set of model layers Args: input_ids: tensor input + this could be tokens or hidden states from other layers Returns: - generator_ids: token ids from generation + layer_outputs: dict + layer output including hidden states, key values or logits """ - - generate_ids = self.base_model.generate( - input_ids, - - ) \ No newline at end of file diff --git a/exo/inference/pytorch/test_simple_model.py b/exo/inference/pytorch/test_simple_model.py index 81009d08e..1b08a1801 100644 --- a/exo/inference/pytorch/test_simple_model.py +++ b/exo/inference/pytorch/test_simple_model.py @@ -21,12 +21,16 @@ ) model_inputs = tokenizer([text], return_tensors="pt").to(device) +print(f"model_inputs:\n{model_inputs}") + +print(f"generation_config:\n{model.generation_config}") + generated_ids = model.generate( model_inputs.input_ids, attention_mask=model_inputs.attention_mask, max_new_tokens=512, do_sample=True, - top_k=20 + #top_k=20, #num_beams=5, #early_stopping=True ) diff --git a/exo/inference/pytorch/test_split_model.py b/exo/inference/pytorch/test_split_model.py index 242e5f484..d5ceb755d 100644 --- a/exo/inference/pytorch/test_split_model.py +++ b/exo/inference/pytorch/test_split_model.py @@ -2,7 +2,28 @@ import torch.nn as nn import asyncio import gc -from transformers import AutoModelForCausalLM, AutoConfig, Qwen2ForCausalLM +from transformers import ( + AutoModel, + AutoModelForCausalLM, + AutoTokenizer, + DynamicCache, + Cache, + LogitsProcessorList, + #MinLengthLogitsProcessor, + LogitsWarper, + TopKLogitsWarper, + TopPLogitsWarper, + TemperatureLogitsWarper, + StoppingCriteriaList, + MaxLengthCriteria, + MaxTimeCriteria +) + +from transformers.generation.configuration_utils import ( + GenerationConfig, + GenerationMode +) + from exo.api.chatgpt_api import resolve_tokenizer from typing import Tuple, Optional import re @@ -19,50 +40,94 @@ def __init__(self, layers, is_last=False): def forward( self, model, - input_ids: torch.tensor=None, - hidden_states: torch.tensor=None, + llm_model, + input_ids: Optional[torch.tensor], + hidden_states: Optional[torch.tensor], attention_mask: torch.tensor=None, + past_key_values: Cache=DynamicCache(), **kwargs - ) -> Tuple[Optional[torch.tensor], Optional[torch.tensor]]: + ) -> Tuple[Optional[torch.tensor], Optional[torch.tensor], Optional[Cache]]: + + """ + Generate hidden states or logits via passing through set amount of layers of a model + To be passed only input_ids OR hidden_state and not both. This is for connecting the model + layer to generate a complete output + + Args: + input_ids: tensor Optional + hidden_states: tensor Optional - # set base model - base_model = model.model + Returns: + Tuple of + - hidden_states: tensor Optional + - logits: tensor Optional + + """ + is_first = False if input_ids is not None and hidden_states is not None: - print("You must either pass a hidden_state or input_ids but not both") - assert ValueError + raise ValueError if input_ids is not None: - # embed - hidden_states = base_model.embed_tokens(input_ids) - position_ids = torch.arange( - 0, - input_ids.size(1), - device=input_ids.device - ).unsqueeze(0) + # embed input_ids + input_ids = model.embed_tokens(input_ids) + # calculate position_ids + batch_size, seq_length = input_ids.shape[:2] + + is_first = True if hidden_states is not None: - hidden_states = hidden_states - position_ids = torch.arange( - 0, - hidden_states.size(1), - device=hidden_states.device - ).unsqueeze(0) + batch_size, seq_length = hidden_states.shape[:2] + + # cache + past_key_values_length = len(past_key_values) + cache_position = torch.arange( + past_key_values_length, + seq_length + past_key_values_length, + dtype=torch.long, + device=input_ids.device if input_ids is not None else hidden_states.device + ) + + position_ids = cache_position.unsqueeze(0) + + if is_first: + model_inputs = llm_model.prepare_inputs_for_generation( + input_ids, + past_key_values=past_key_values, + position_ids=position_ids, + cache_position=cache_position, + attention_mask=attention_mask + ) + + print(f"model_inputs\n{model_inputs}") + for layer in self.layers: - print(f"Processing hidden state from layer\n{layer}\n") - hidden_states = layer( - hidden_states, - position_ids=position_ids - )[0] + layer_input = input_ids if input_ids is not None else hidden_states + #print(f"INPUT: \n{layer_input}\n") + #print(f"POSITION_IDS: \n{position_ids}\n") + #print(f"LAYER: \n{layer}\n") + layer_outputs = layer( + model_inputs["input_ids"], + position_ids=model_inputs["position_ids"], + #attention_mask=model_inputs["attention_mask"], + past_key_values=model_inputs["past_key_values"], + return_dict=True, + use_cache=True + ) + + hidden_states = layer_outputs[0] + past_key_values = layer_outputs[1] if self.is_last: - norm_states = base_model.norm(hidden_states).to("cuda") - logits = model.lm_head(norm_states).to("cuda") + norm_states = model.norm(hidden_states) + + # lm_head + logits = llm_model.lm_head(norm_states).to("cuda") - return (None, logits) + return (None, logits, past_key_values) - return (hidden_states, None) + return (hidden_states, None, past_key_values) async def model_half_split_test(prompt: str, model_id: str, layers: int): """ @@ -72,40 +137,28 @@ async def model_half_split_test(prompt: str, model_id: str, layers: int): half_layers = int(layers / 2) # inference - tokenizer = await resolve_tokenizer(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id) max_length = 512 #tokenizer.model_max_length - # get full model - if re.match(r"^Qwen|qwen", model_id): - model = Qwen2ForCausalLM.from_pretrained( - model_id, - torch_dtype="auto", - device_map="auto", - # attn_implementation="eager" - # low_cpu_mem_usage=True - ) - else: - model = AutoModelForCausalLM.from_pretrained( - model_id, - torch_dtype="auto", - device_map="auto", - # low_cpu_mem_usage=True - ) - - print(model.hf_device_map) + # get llm model + llm_model = AutoModelForCausalLM.from_pretrained( + model_id, + torch_dtype="auto", + device_map="auto", + use_cache=True + ) + + # get base model + model = llm_model.model # add pad token if none, depending on model - #if tokenizer.pad_token == None: - # if re.match(r"Llama|llama", model_id): - # tokenizer.add_special_tokens({"pad_token":""}) - # model.resize_token_embeddings(len(tokenizer)) + if tokenizer.pad_token == None: + if re.match(r"Llama|llama", model_id): + tokenizer.add_special_tokens({"pad_token":""}) + model.resize_token_embeddings(len(tokenizer)) - shard_layers = nn.ModuleList(model.model.layers[:half_layers])#.to("cuda") - sharded_model = OnionHuggingFaceLM(layers=shard_layers) - - print(model) - - # generate first half + + # generate input_ids messages = [{"role": "user", "content": prompt}] txt = tokenizer.apply_chat_template( messages, @@ -113,65 +166,100 @@ async def model_half_split_test(prompt: str, model_id: str, layers: int): add_generation_prompt=True ) - print(f"Generating from chat template\n{txt}") - inputs = tokenizer([txt], return_tensors="pt") input_ids = inputs.input_ids.to("cuda") - input_attention_mask = inputs.attention_mask.to("cuda") + input_attention_mask = inputs.attention_mask.to("cuda") + batch_size, seq_length = input_ids.shape[:2] + + is_finished = False + unfinished_sequences = torch.ones(batch_size, dtype=torch.long, device=input_ids.device) + logit_runs = 1 - # add if first layer of model check - shard_hidden_states, shard_logits = sharded_model.forward( - model=model, - input_ids=input_ids - ) + raw_logits = None - print(f"shard_hidden_states\n{shard_hidden_states}") - print(f"shard_logits\n{shard_logits}") + while not is_finished: + print(f"\n\nLOGIT RUN {logit_runs}\n\n") + print(f"input_ids:\n{input_ids}\n") + print(input_ids.shape) - # second half - print("Using first half hidden state for last half of model") - shard_layers = nn.ModuleList(model.model.layers[half_layers:]).to("cuda") - sharded_model.layers = shard_layers - sharded_model.is_last = True + #shard_layers = nn.ModuleList(model.layers[:half_layers])#.to("cuda") + shard_layers = nn.ModuleList(model.layers) + sharded_model = OnionHuggingFaceLM(layers=shard_layers) + sharded_model.is_last = True - if shard_hidden_states is not None: - # add if last layer of model or in the middle check - shard_hidden_states, shard_logits = sharded_model.forward( + # generate first half + # add if first layer of model check + shard_hidden_states, shard_logits, shard_past_kvs = sharded_model.forward( model=model, - hidden_states=shard_hidden_states + llm_model=llm_model, + attention_mask=input_attention_mask, + input_ids=input_ids, + hidden_states=None ) - print(f"shard_hidden_states\n{shard_hidden_states}") - print(f"shard_logits\n{shard_logits}") - else: - print("Sharded hidden states not found, error") - raise ValueError - + # second half + #sharded_model.layers = nn.ModuleList(model.layers[half_layers:]) + #sharded_model.is_last = True - print("generate from logits") - if shard_logits is not None: - print(shard_logits.dim()) - #print(shard_logits[0]) + #shard_hidden_states, shard_logits, shard_past_kvs = sharded_model.forward( + # model=model, + # llm_model=llm_model, + # input_ids=None, + # hidden_states=shard_hidden_states, + # past_key_values=shard_past_kvs + #) - generated_ids = sample_logits(shard_logits, 0.1, 0.95, 30) - #generated_ids = torch.argmax(shard_logits/0.7, dim=-1) - #generated_ids = model.generate(logits) + # this part of the generation and _sample functions for transformers GenerationMixin + # ref: https://github.com/huggingface/transformers/blob/0a55d9f7376f72ad3ff296d4249840021b03bcc4/src/transformers/generation/utils.py#L1301 - print("generated_ids") - print(generated_ids) + # clone logit sample + logits = shard_logits[:, -1, :].clone().float() + + raw_logits = logits + + # distribute + logits_processor = LogitsProcessorList([ + TopKLogitsWarper(35), + TemperatureLogitsWarper(0.6), + TopPLogitsWarper(0.8) + ]) + + stopping_critera = StoppingCriteriaList( + [ + MaxLengthCriteria(max_length=50), + MaxTimeCriteria(max_time=10.0), + ] + ) + + next_token_scores = logits_processor(input_ids, logits) + + probs = nn.functional.softmax(next_token_scores, dim=-1) + next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1) + #next_tokens = torch.argmax(next_token_scores, dim=-1) + + # get inputs ready incase not finished + input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1) + + unfinished_sequences = unfinished_sequences & ~stopping_critera(input_ids, None) + is_finished = unfinished_sequences.max() == 0 + + print(f"is_finished?:\n{is_finished}\n") + + logit_runs += 1 + + del logits + del shard_logits - generated_text = tokenizer.batch_decode( - generated_ids, - skip_special_tokens=True, - clean_up_tokenization_spaces=False - )[0] + print(f"model.generation_config\n{llm_model.generation_config}") - print("Generated text:") - print(generated_text) - else: - print("Sharded logits missing from last layer run, error") - raise ValueError + generated_text = tokenizer.batch_decode( + input_ids, + skip_special_tokens=True, + clean_up_tokenization_spaces=False + )[0] + + print(f"generated_text:\n{generated_text}\n") # free model from memory del model @@ -180,19 +268,20 @@ async def model_half_split_test(prompt: str, model_id: str, layers: int): if __name__ == "__main__": - prompt = "In a single word only, what is the last name of the current president of the USA?" + #prompt = "In a single word only, what is the last name of the current president of the USA?" + prompt = "In a single word only, what is the color of an apple?" - print("\n-------- Test TinyLlama/TinyLlama_v1.1 ----------\n") - model_id = "TinyLlama/TinyLlama_v1.1" - model_layers = 22 + #print("\n-------- Test TinyLlama/TinyLlama_v1.1 ----------\n") + #model_id = "TinyLlama/TinyLlama_v1.1" + #model_layers = 22 - asyncio.run( - model_half_split_test( - prompt=prompt, - model_id=model_id, - layers=model_layers - ) - ) + #asyncio.run( + # model_half_split_test( + # prompt=prompt, + # model_id=model_id, + # layers=model_layers + # ) + #) #print("\n-------- Test meta-llama/Meta-Llama-3.1-8B ----------\n") #model_id = "meta-llama/Meta-Llama-3.1-8B" @@ -206,15 +295,15 @@ async def model_half_split_test(prompt: str, model_id: str, layers: int): # ) #) - #print("\n-------- Test Qwen/Qwen2-57B-A14B-Instruct ----------\n") - #model_id = "Qwen/Qwen2-57B-A14B-Instruct" - #model_layers = 28 - - #asyncio.run( - # model_half_split_test( - # prompt=prompt, - # model_id=model_id, - # layers=model_layers - # ) - #) + print("\n-------- Test Qwen/Qwen2-0.5B-Instruct ----------\n") + model_id = "Qwen/Qwen2-0.5B-Instruct" + model_layers = 24 + + asyncio.run( + model_half_split_test( + prompt=prompt, + model_id=model_id, + layers=model_layers + ) + ) From be8d7fbaf6f2a3cb200c15a22e4ac06701ed6506 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 15 Sep 2024 00:19:16 -0800 Subject: [PATCH 331/589] working split model test, updating class --- exo/inference/pytorch/test_split_model.py | 182 ++++++++++++++-------- 1 file changed, 121 insertions(+), 61 deletions(-) diff --git a/exo/inference/pytorch/test_split_model.py b/exo/inference/pytorch/test_split_model.py index d5ceb755d..42e1642ab 100644 --- a/exo/inference/pytorch/test_split_model.py +++ b/exo/inference/pytorch/test_split_model.py @@ -24,8 +24,14 @@ GenerationMode ) +# llama +from transformers.models.llama.modeling_llama import LlamaModel + +# qwen2 +from transformers.models.qwen2.modeling_qwen2 import Qwen2Model + from exo.api.chatgpt_api import resolve_tokenizer -from typing import Tuple, Optional +from typing import Tuple, Optional, Union, List import re from exo.inference.pytorch.utils import sample_logits, top_k_sampling @@ -36,17 +42,27 @@ class OnionHuggingFaceLM(): def __init__(self, layers, is_last=False): self.layers = layers self.is_last = is_last + self.past_key_values = None + self.cache_position = None + self.position_ids = None + self.input_embed = None + self.causal_mask = None + self.position_embeddings = None + self.attention_mask = None + self.input_ids = None + self.hidden_states = None + self.next_decoder_cache = None def forward( self, model, llm_model, - input_ids: Optional[torch.tensor], - hidden_states: Optional[torch.tensor], - attention_mask: torch.tensor=None, - past_key_values: Cache=DynamicCache(), + input_ids: Optional[torch.tensor] = None, + hidden_states: Optional[torch.tensor] = None, + attention_mask: Optional[torch.tensor] = None, + past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, **kwargs - ) -> Tuple[Optional[torch.tensor], Optional[torch.tensor], Optional[Cache]]: + ) -> Tuple[Optional[torch.tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.tensor]]: """ Generate hidden states or logits via passing through set amount of layers of a model @@ -54,80 +70,123 @@ def forward( layer to generate a complete output Args: + model: base llm model tramsformers class + llm_model: llm chat model class input_ids: tensor Optional hidden_states: tensor Optional Returns: Tuple of - hidden_states: tensor Optional + - past_key_values - logits: tensor Optional """ - is_first = False + output_attentions = False # outputting attention not needed + use_legacy_cache = False # some models still use legacy kv store if input_ids is not None and hidden_states is not None: raise ValueError - if input_ids is not None: - # embed input_ids - input_ids = model.embed_tokens(input_ids) - # calculate position_ids - batch_size, seq_length = input_ids.shape[:2] + if hidden_states is not None: + self.hidden_states = hidden_states - is_first = True + if input_ids is not None: + self.input_ids = input_ids - if hidden_states is not None: - batch_size, seq_length = hidden_states.shape[:2] + # embed input_ids + self.inputs_embeds = model.embed_tokens(self.input_ids) - # cache - past_key_values_length = len(past_key_values) - cache_position = torch.arange( - past_key_values_length, - seq_length + past_key_values_length, - dtype=torch.long, - device=input_ids.device if input_ids is not None else hidden_states.device - ) + # cache + if past_key_values and not isinstance(past_key_values, Cache): + print("Using legacy cache") + use_legacy_cache = True + past_key_values = DynamicCache.from_legacy_cache(past_key_values) + + past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 + cache_position = torch.arange( + past_seen_tokens, + past_seen_tokens + self.inputs_embeds.shape[1], + device=self.inputs_embeds.device + ) + + # position id + position_ids = cache_position.unsqueeze(0) + + # causal mask + self.attention_mask = attention_mask + self.causal_mask = model._update_causal_mask( + None, + self.inputs_embeds, + cache_position, + past_key_values, + output_attentions + ) + + #print(f"causal_mask.dim(): {self.causal_mask.dim()}") - position_ids = cache_position.unsqueeze(0) + print(f"\ncausal_mask:{self.causal_mask}\n\n") - if is_first: + # embed positions, some models require and some dont + if isinstance(model, LlamaModel): + self.position_embeddings = model.rotary_emb( + self.inputs_embeds, + position_ids + ) + model_inputs = llm_model.prepare_inputs_for_generation( - input_ids, + self.input_ids, past_key_values=past_key_values, + attention_mask=self.attention_mask, + inputs_embeds=self.inputs_embeds, position_ids=position_ids, - cache_position=cache_position, - attention_mask=attention_mask + cache_position=cache_position ) print(f"model_inputs\n{model_inputs}") + self.hidden_states = self.inputs_embeds + self.position_ids = model_inputs["position_ids"] + self.cache_position = model_inputs["cache_position"] + self.past_key_values = model_inputs["past_key_values"] + + + for decoder_layer in self.layers: + layer_outputs = decoder_layer( + self.hidden_states, + attention_mask=self.causal_mask, + position_ids=self.position_ids, + past_key_values=self.past_key_values, + use_cache=True, + cache_position=self.cache_position - for layer in self.layers: - layer_input = input_ids if input_ids is not None else hidden_states - #print(f"INPUT: \n{layer_input}\n") - #print(f"POSITION_IDS: \n{position_ids}\n") - #print(f"LAYER: \n{layer}\n") - layer_outputs = layer( - model_inputs["input_ids"], - position_ids=model_inputs["position_ids"], - #attention_mask=model_inputs["attention_mask"], - past_key_values=model_inputs["past_key_values"], - return_dict=True, - use_cache=True ) - hidden_states = layer_outputs[0] - past_key_values = layer_outputs[1] + self.hidden_states = layer_outputs[0] + self.next_decoder_cache = layer_outputs[1] if self.is_last: - norm_states = model.norm(hidden_states) + self.hidden_states = model.norm(self.hidden_states) + + if use_legacy_cache: + self.past_key_values = self.next_decoder_cache.to_legacy_cache() + else: + self.past_key_values = self.next_decoder_cache # lm_head - logits = llm_model.lm_head(norm_states).to("cuda") + logits = llm_model.lm_head(self.hidden_states).to("cuda") - return (None, logits, past_key_values) + return ( + None, + None, + logits + ) - return (hidden_states, None, past_key_values) + return ( + self.hidden_states, + self.past_key_values, + None + ) async def model_half_split_test(prompt: str, model_id: str, layers: int): """ @@ -183,14 +242,15 @@ async def model_half_split_test(prompt: str, model_id: str, layers: int): print(f"input_ids:\n{input_ids}\n") print(input_ids.shape) - #shard_layers = nn.ModuleList(model.layers[:half_layers])#.to("cuda") - shard_layers = nn.ModuleList(model.layers) + print("\n first half of layers") + shard_layers = nn.ModuleList(model.layers[:half_layers])#.to("cuda") + #shard_layers = nn.ModuleList(model.layers) sharded_model = OnionHuggingFaceLM(layers=shard_layers) - sharded_model.is_last = True + #sharded_model.is_last = True # generate first half # add if first layer of model check - shard_hidden_states, shard_logits, shard_past_kvs = sharded_model.forward( + shard_hidden_states, shard_past_kvs, shard_logits = sharded_model.forward( model=model, llm_model=llm_model, attention_mask=input_attention_mask, @@ -199,16 +259,16 @@ async def model_half_split_test(prompt: str, model_id: str, layers: int): ) # second half - #sharded_model.layers = nn.ModuleList(model.layers[half_layers:]) - #sharded_model.is_last = True + print(f"\n second half of layers") + sharded_model.layers = nn.ModuleList(model.layers[half_layers:]) + sharded_model.is_last = True - #shard_hidden_states, shard_logits, shard_past_kvs = sharded_model.forward( - # model=model, - # llm_model=llm_model, - # input_ids=None, - # hidden_states=shard_hidden_states, - # past_key_values=shard_past_kvs - #) + shard_hidden_states, shard_past_kvs, shard_logits = sharded_model.forward( + model=model, + llm_model=llm_model, + hidden_states=shard_hidden_states, + past_key_values=shard_past_kvs + ) # this part of the generation and _sample functions for transformers GenerationMixin # ref: https://github.com/huggingface/transformers/blob/0a55d9f7376f72ad3ff296d4249840021b03bcc4/src/transformers/generation/utils.py#L1301 @@ -268,8 +328,8 @@ async def model_half_split_test(prompt: str, model_id: str, layers: int): if __name__ == "__main__": - #prompt = "In a single word only, what is the last name of the current president of the USA?" - prompt = "In a single word only, what is the color of an apple?" + prompt = "In a single word only, what is the last name of the current president of the USA?" + #prompt = "In a single word only, what is the color of an apple?" #print("\n-------- Test TinyLlama/TinyLlama_v1.1 ----------\n") #model_id = "TinyLlama/TinyLlama_v1.1" From 9d1ecdd60f90bd37f5be426199b7a1b7e24dd4fc Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 15 Sep 2024 05:07:39 -0800 Subject: [PATCH 332/589] working on class and inference engine updates --- exo/inference/pytorch/inference.py | 49 ++++--- exo/inference/pytorch/model/hf.py | 206 ++++++++++++++++++++++++----- 2 files changed, 206 insertions(+), 49 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 9334153c6..a1df79669 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -13,6 +13,11 @@ from accelerate import disk_offload from exo.download.shard_download import ShardDownloader +# model value options +TOP_K = 35 +TEMP = 0.6 +TOP_P = 0.8 + class PyTorchDynamicShardInferenceEngine(InferenceEngine): """ PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. @@ -29,7 +34,20 @@ def __init__(self, shard_downloader: ShardDownloader): self.shard_downloader = shard_downloader self.stateful_sharded_model = None self.tokenizer = None - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # setup cuda device + if torch.cuda.is_available(): + self.device = torch.device("cuda") + self.torch_dtype = torch.float32 + elif torch.backends.mps.is_available(): + self.device = torch.device("mps") + self.torch_dtype = torch.float32 + else: + self.device = torch.device("cpu") + self.torch_dtype = torch.float16 + + # setup unfinished sequence + self.unfinished_sequences = torch.ones(batch_size, dtype=torch.long, device=self.device) async def infer_prompt( self, @@ -41,25 +59,24 @@ async def infer_prompt( ) -> Tuple[np.ndarray, str, bool]: if DEBUG >= 4: print("infer_prompt called") - print(f"prompt: {prompt}") await self.ensure_shard(shard) - # need to make this so inference_state is not a string - # cant use it with dynamic cache - - inputs = self.tokenizer(prompt, return_tensors="pt") - input_ids = inputs.input_ids.to(self.device) - - # add pad token if none - if self.tokenizer.pad_token == None: - self.tokenizer.add_special_tokens({"pad_token":""}) - self.stateful_sharded_model.base_model.resize_token_embeddings(len(self.tokenizer)) - - current_kvs = None + # setup prompt input + messages = [{"role": "user", "content": prompt}] + txt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) + inputs = self.tokenizer([txt], return_tensors="pt") + input_ids = inputs.input_ids.to("cuda") + input_attention_mask = inputs.attention_mask.to("cuda") + batch_size, seq_length = input_ids.shape[:2] + if DEBUG >= 4: - print(f"tokens: {input_ids}\n") + print(f"input_ids: {input_ids}\n") print(f"layer_count: {self.shard.get_layer_count()}") print(f"is_first_layer: {self.shard.is_first_layer()}") print(f"is_last_layer: {self.shard.is_last_layer()}") @@ -193,4 +210,4 @@ async def ensure_shard(self, shard: Optional[Shard]): self.shard = shard if DEBUG >= 4: - print(f"Shard loaded successfully: {shard}") \ No newline at end of file + print(f"Shard loaded successfully: {shard}") diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 7ef806652..2805ae67c 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -1,61 +1,201 @@ import torch import torch.nn as nn import numpy as np -from transformers import AutoModelForCausalLM +import gc +from typing import Tuple, Optional, Union, List + from exo.inference.shard import Shard from exo.helpers import DEBUG from exo.inference.inference_engine import InferenceEngine from exo.download.shard_download import ShardDownloader -from typing import Tuple, Optional, Union, List + +from transformers import ( + AutoModel, + AutoModelForCausalLM, + AutoTokenizer, + DynamicCache, + Cache, + LogitsProcessorList, + #MinLengthLogitsProcessor, + LogitsWarper, + TopKLogitsWarper, + TopPLogitsWarper, + TemperatureLogitsWarper, + StoppingCriteriaList, + MaxLengthCriteria, + MaxTimeCriteria +) + +from transformers.generation.configuration_utils import ( + GenerationConfig, + GenerationMode +) + +# llama +from transformers.models.llama.modeling_llama import LlamaModel + +# qwen2 +from transformers.models.qwen2.modeling_qwen2 import Qwen2Model + class ShardedHuggingFaceModel(InferenceEngine): def __init__(self, shard: Shard, ): + # class vars self.shard = shard + self.hidden_states = None + self.input_ids = None + self.inputs_embeds = None + self.attention_mask = None + self.position_embeddings = None + self.past_key_values = None + self.cache_position = None + self.position_ids = None + self.causal_mask = None - if torch.cuda.is_available(): - self.device = torch.device("cuda") - self.torch_dtype = torch.float32 - elif torch.backends.mps.is_available(): - self.device = torch.device("mps") - self.torch_dtype = torch.float32 - else: - self.device = torch.device("cpu") - self.torch_dtype = torch.float16 - + # setup pytorch and transformer llm try: self.base_model = AutoModelForCausalLM.from_pretrained( shard.model_id, torch_dtype=self.torch_dtype, device_map="auto" - ) - - # build layers from shard - layers = self.base_model.model.layers - copy_layers = nn.ModuleList( - [layers[i] for i in range(self.shard.start_layer, self.shard.end_layer + 1)] - ) - - # apply layers back to model - self.base_model.model.layers.load_state_dict( - copy_layers.state_dict(), - strict=False - ) + ) except Exception as err: print(f"error loading and splitting model: {err}") raise - def run( + + def forward( self, - input_ids: torch.tensor - ) -> Tuple[np.ndarray, any]: + shard: Optional[Shard] = None, + model, + llm_model, + input_ids: Optional[torch.tensor] = None, + hidden_states: Optional[torch.tensor] = None, + attention_mask: Optional[torch.tensor] = None, + past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + use_legacy_cache: Optional[bool] = False + ) -> Tuple[Optional[torch.tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.tensor]]: + """ - Run through a set of model layers + Generate hidden states or logits via passing through set amount of layers of a model + To be passed only input_ids OR hidden_state and not both. This is for connecting the model + layer to generate a complete output Args: - input_ids: tensor input - this could be tokens or hidden states from other layers + model: base llm model tramsformers class + llm_model: llm chat model class + input_ids: tensor optional + hidden_states: tensor optional + attention_mask: tensor optional + past_key_values: Cache or list[tensor] optional + use_legacy_cache: bool optional Returns: - layer_outputs: dict - layer output including hidden states, key values or logits + Tuple of + - hidden_states: tensor optional + - past_key_values: Cache or list[tensor] optional + - logits: tensor Optional + """ + + if input_ids is not None and hidden_states is not None: + raise ValueError + + if hidden_states is not None: + self.hidden_states = hidden_states + + if input_ids is not None: + self.input_ids = input_ids + + # embed input_ids + self.inputs_embeds = model.embed_tokens(self.input_ids) + + # cache + if past_key_values and not isinstance(past_key_values, Cache): + print("Using legacy cache") + use_legacy_cache = True + past_key_values = DynamicCache.from_legacy_cache(past_key_values) + + past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 + cache_position = torch.arange( + past_seen_tokens, + past_seen_tokens + self.inputs_embeds.shape[1], + device=self.inputs_embeds.device + ) + + # position id + position_ids = cache_position.unsqueeze(0) + + # casual mask and attention_mask + self.attention_mask = attention_mask + self.causal_mask = model._update_causal_mask( + None, + self.inputs_embeds, + cache_position, + past_key_values, + output_attentions + ) + + # embed positions, some models require and some dont + if isinstance(model, LlamaModel): + self.position_embeddings = model.rotary_emb( + self.inputs_embeds, + position_ids + ) + + # prepare inputs for decoder layers + model_inputs = llm_model.prepare_inputs_for_generation( + self.input_ids, + past_key_values=past_key_values, + attention_mask=self.attention_mask, + inputs_embeds=self.inputs_embeds, + position_ids=position_ids, + cache_position=cache_position + ) + + self.hidden_states = self.inputs_embeds + self.position_ids = model_inputs["position_ids"] + self.cache_position = model_inputs["cache_position"] + self.past_key_values = model_inputs["past_key_values"] + + # run through decoder layers + layer_amt = range(self.shard.start_layer, self.shard.end_layer + 1) + for i in layer_amt: + decoder_layer = self.model.layers[i] + layer_outputs = decoder_layer( + self.hidden_states, + attention_mask=self.causal_mask, + position_ids=self.position_ids, + past_key_values=self.past_key_values, + use_cache=True, + cache_position=self.cache_position + ) + + self.hidden_states = layer_outputs[0] + self.next_decoder_cache = layer_outputs[1] + + + # handle last layer to get logits + if self.is_last: + self.hidden_states = model.norm(self.hidden_states) + + if use_legacy_cache: + self.past_key_values = self.next_decoder_cache.to_legacy_cache() + else: + self.past_key_values = self.next_decoder_cache + + # lm_head + logits = llm_model.lm_head(self.hidden_states).to(self.device) + + return ( + None, + None, + logits + ) + + return ( + self.hidden_states, + self.past_key_values, + None + ) + From 4b0df06dabaa345e238006c8af29483ac64244a5 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 15 Sep 2024 07:23:16 -0800 Subject: [PATCH 333/589] building out inference engine test --- exo/inference/pytorch/inference.py | 131 +++++++----------- exo/inference/pytorch/model/hf.py | 90 +++++++++--- exo/inference/pytorch/tests/__init__.py | 0 .../{ => tests}/test_inference_engine.py | 0 .../{ => tests}/test_inference_loop.py | 40 +++--- .../pytorch/{ => tests}/test_simple_model.py | 0 .../pytorch/{ => tests}/test_split_model.py | 1 - .../pytorch/{ => tests}/test_weight_load.py | 0 exo/inference/pytorch/{ => tests}/utils.py | 0 9 files changed, 139 insertions(+), 123 deletions(-) create mode 100644 exo/inference/pytorch/tests/__init__.py rename exo/inference/pytorch/{ => tests}/test_inference_engine.py (100%) rename exo/inference/pytorch/{ => tests}/test_inference_loop.py (77%) rename exo/inference/pytorch/{ => tests}/test_simple_model.py (100%) rename exo/inference/pytorch/{ => tests}/test_split_model.py (99%) rename exo/inference/pytorch/{ => tests}/test_weight_load.py (100%) rename exo/inference/pytorch/{ => tests}/utils.py (100%) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index a1df79669..3bb7afd72 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -1,12 +1,11 @@ # experimental, based off of tinygrad/inference.py import numpy as np import torch -import numpy as np import json from typing import Optional, Tuple from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine -from exo.inference.pytorch.model.archive.hf_manual import ShardedHuggingFaceModel +from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel from exo.api.chatgpt_api import resolve_tokenizer from exo.helpers import DEBUG from transformers import DynamicCache @@ -47,7 +46,7 @@ def __init__(self, shard_downloader: ShardDownloader): self.torch_dtype = torch.float16 # setup unfinished sequence - self.unfinished_sequences = torch.ones(batch_size, dtype=torch.long, device=self.device) + self.unfinished_sequences = torch.ones(1, dtype=torch.long, device=self.device) async def infer_prompt( self, @@ -71,41 +70,42 @@ async def infer_prompt( ) inputs = self.tokenizer([txt], return_tensors="pt") - input_ids = inputs.input_ids.to("cuda") - input_attention_mask = inputs.attention_mask.to("cuda") + input_ids = inputs.input_ids.to(self.device) + input_attention_mask = inputs.attention_mask.to(self.device) batch_size, seq_length = input_ids.shape[:2] - + + if DEBUG >= 4: print(f"input_ids: {input_ids}\n") - print(f"layer_count: {self.shard.get_layer_count()}") - print(f"is_first_layer: {self.shard.is_first_layer()}") - print(f"is_last_layer: {self.shard.is_last_layer()}") - output_data = self.stateful_sharded_model.forward( - input_ids + shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( + input_ids=input_ids, + attention_mask=input_attention_mask ) - is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] - if DEBUG >= 4: - print(f"output_data: {output_data}\n") - print(f"output_data.size {output_data.size}\n") - - print(f"finished: {is_finished}") - print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") - print(f"output_data[-1] {output_data[-1]}") + print(f"\nshard_hidden_states: {shard_hidden_states}\n") + print(f"\nshard_past_kvs {shard_past_kvs}\n") + print(f"\nshard_logits: {shard_logits}") - if output_data.size == 1: - print(f"size 1 output_data.item() {output_data.item()}") - print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") + if shard_logits is not None: + input_ids = self.stateful_sharded_model.logits_sample(input_ids, shard_logits) + print(input_ids) + + if shard_past_kvs is not None: + cache_dict = { + 'key_cache': [tensor.tolist() for tensor in shard_past_kvs.key_cache], + 'value_cache': [tensor.tolist() for tensor in shard_past_kvs_kvs.value_cache] + } + else: + cache_dict = None - cache_dict = { - 'key_cache': [tensor.tolist() for tensor in current_kvs.key_cache], - 'value_cache': [tensor.tolist() for tensor in current_kvs.value_cache] - } + stopping_critera = self.stateful_sharded_model.stopping_critera + self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) + is_finished = self.unfinished_sequences.max() == 0 return ( - output_data, + input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states, json.dumps(cache_dict), is_finished ) @@ -117,7 +117,7 @@ async def infer_tensor( input_data: np.ndarray, inference_state: Optional[str] = None ) -> Tuple[np.ndarray, str, bool]: - if DEBUG >= 3: + if DEBUG >= 4: print("infer_tensor called") print(f"input_data: {input_data}") print(f"input_data.size: {input_data.size}") @@ -126,69 +126,34 @@ async def infer_tensor( await self.ensure_shard(shard) - current_kvs = None - - - if input_data.size == 1: - in_tensor = torch.tensor([[input_data.item()]]).to(self.device) - else: - in_tensor = torch.tensor(input_data).to(self.device) - - # in_tensor = torch.tensor(input_data).to(self.device) - - # in_tensor = self.stateful_sharded_model.embed_tokens(in_tensor) - - # convert inference_state or cache from json to DynamicCache - past_kv = DynamicCache() - if inference_state != None: - try: - cache_dict = json.loads(inference_state) - past_kv.key_cache = [torch.tensor(data).to(self.device) for data in cache_dict['key_cache']] - past_kv.value_cache = [torch.tensor(data).to(self.device) for data in cache_dict['value_cache']] - past_kv_length = past_kv[0][0].shape[2] - except json.JSONDecodeError: - print(f"ERROR DECODING INFERENCE STATE") - - if DEBUG >= 3: - # print(f"input_tensor: {in_tensor}") - print(f"layer_count: {self.shard.get_layer_count()}") - print(f"is_first_layer: {self.shard.is_first_layer()}") - print(f"is_last_layer: {self.shard.is_last_layer()}") - print(f"input_data.shape: {input_data.shape}") + hidden_states = torch.tensor(input_data) - print(f"in_tensor: {in_tensor}") - output_data, current_kvs = self.stateful_sharded_model.forward( - in_tensor, - None, - past_kv + shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( + hidden_states=hidden_states ) - is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] - - if DEBUG >= 3: - print(f"output_data: {output_data}\n") - print(f"output_data.size {output_data.size}\n") - print(f"finished: {is_finished}") - print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") - print(f"output_data[-1] {output_data[-1]}") - print("====================================================") - - if output_data.size == 1: - print(f"size 1 output_data.item() {output_data.item()}") - print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") + if shard_logits is not None: + input_ids = self.stateful_sharded_model.logits_sample(shard_logits) + + if shard_past_kvs is not None: + cache_dict = { + 'key_cache': [tensor.tolist() for tensor in shard_past_kvs.key_cache], + 'value_cache': [tensor.tolist() for tensor in shard_past_kvs_kvs.value_cache] + } + else: + cache_dict = None - - cache_dict = { - 'key_cache': [tensor.tolist() for tensor in current_kvs.key_cache], - 'value_cache': [tensor.tolist() for tensor in current_kvs.value_cache] - } + stopping_critera = self.stateful_sharded_model.stopping_critera + self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) + is_finished = self.unfinished_sequences.max() == 0 return ( - output_data, + input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states, json.dumps(cache_dict), is_finished ) - + + async def ensure_shard(self, shard: Optional[Shard]): """ Ensure the model shard is loaded and ready for inference. @@ -206,7 +171,7 @@ async def ensure_shard(self, shard: Optional[Shard]): # model_path = await self.shard_downloader.ensure_shard(shard) self.tokenizer = await resolve_tokenizer(shard.model_id) - self.stateful_sharded_model = ShardedHuggingFaceModel(shard) + self.stateful_sharded_model = ShardedHuggingFaceModel(shard, self.device, self.torch_dtype) self.shard = shard if DEBUG >= 4: diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 2805ae67c..be928aa3d 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -38,8 +38,8 @@ from transformers.models.qwen2.modeling_qwen2 import Qwen2Model -class ShardedHuggingFaceModel(InferenceEngine): - def __init__(self, shard: Shard, ): +class ShardedHuggingFaceModel: + def __init__(self, shard: Shard, device, dtype): # class vars self.shard = shard self.hidden_states = None @@ -50,15 +50,35 @@ def __init__(self, shard: Shard, ): self.past_key_values = None self.cache_position = None self.position_ids = None - self.causal_mask = None + self.causal_mask = None + + # setup logit processors + self.logits_processor = LogitsProcessorList([ + TopKLogitsWarper(35), + TemperatureLogitsWarper(0.6), + TopPLogitsWarper(0.8) + ]) + + # setup stopping critera for generation + self.stopping_critera = StoppingCriteriaList( + [ + MaxLengthCriteria(max_length=50), + MaxTimeCriteria(max_time=10.0), + ] + ) + + self.device = device + self.torch_dtype = dtype # setup pytorch and transformer llm try: - self.base_model = AutoModelForCausalLM.from_pretrained( + self.llm_model = AutoModelForCausalLM.from_pretrained( shard.model_id, torch_dtype=self.torch_dtype, device_map="auto" - ) + ) + + self.model = self.llm_model.model except Exception as err: print(f"error loading and splitting model: {err}") raise @@ -67,8 +87,6 @@ def __init__(self, shard: Shard, ): def forward( self, shard: Optional[Shard] = None, - model, - llm_model, input_ids: Optional[torch.tensor] = None, hidden_states: Optional[torch.tensor] = None, attention_mask: Optional[torch.tensor] = None, @@ -108,7 +126,7 @@ def forward( self.input_ids = input_ids # embed input_ids - self.inputs_embeds = model.embed_tokens(self.input_ids) + self.inputs_embeds = self.model.embed_tokens(self.input_ids) # cache if past_key_values and not isinstance(past_key_values, Cache): @@ -128,23 +146,23 @@ def forward( # casual mask and attention_mask self.attention_mask = attention_mask - self.causal_mask = model._update_causal_mask( + self.causal_mask = self.model._update_causal_mask( None, self.inputs_embeds, cache_position, past_key_values, - output_attentions + False # dont out attentions ) # embed positions, some models require and some dont - if isinstance(model, LlamaModel): - self.position_embeddings = model.rotary_emb( + if isinstance(self.model, LlamaModel): + self.position_embeddings = self.model.rotary_emb( self.inputs_embeds, position_ids ) # prepare inputs for decoder layers - model_inputs = llm_model.prepare_inputs_for_generation( + model_inputs = self.llm_model.prepare_inputs_for_generation( self.input_ids, past_key_values=past_key_values, attention_mask=self.attention_mask, @@ -175,27 +193,61 @@ def forward( self.next_decoder_cache = layer_outputs[1] - # handle last layer to get logits - if self.is_last: - self.hidden_states = model.norm(self.hidden_states) - + # handle last layer to get logits + if self.shard.is_last_layer(): + self.hidden_states = self.model.norm(self.hidden_states) if use_legacy_cache: self.past_key_values = self.next_decoder_cache.to_legacy_cache() else: self.past_key_values = self.next_decoder_cache # lm_head - logits = llm_model.lm_head(self.hidden_states).to(self.device) + logits = self.llm_model.lm_head(self.hidden_states).to(self.device) return ( None, None, logits ) - + print("199") return ( self.hidden_states, self.past_key_values, None ) + + def logits_sample( + self, + input_ids: torch.tensor, + logits: torch.tensor, + use_max: Optional[bool] = False + ) -> torch.tensor: + """ + Get a sample of the logits from end of model run + + Args: + logits: tensor + use_max: bool, if function should sample with argmax + + Returns: + input_ids: tensor + """ + + # get a single cloned logit + logits = logits[:, 1, :].clone().float() + + + next_token_scores = self.logits_processor(input_ids, logits) + + if not use_max: + probs = nn.functional.softmax(next_token_scores, dim=-1) + next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1) + else: + next_tokens = torch.argmax(next_token_scores, dim=-1) + + # get inputs_ids from token sample + input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1) + + return input_ids + diff --git a/exo/inference/pytorch/tests/__init__.py b/exo/inference/pytorch/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/exo/inference/pytorch/test_inference_engine.py b/exo/inference/pytorch/tests/test_inference_engine.py similarity index 100% rename from exo/inference/pytorch/test_inference_engine.py rename to exo/inference/pytorch/tests/test_inference_engine.py diff --git a/exo/inference/pytorch/test_inference_loop.py b/exo/inference/pytorch/tests/test_inference_loop.py similarity index 77% rename from exo/inference/pytorch/test_inference_loop.py rename to exo/inference/pytorch/tests/test_inference_loop.py index a61b43427..b9cdd0053 100644 --- a/exo/inference/pytorch/test_inference_loop.py +++ b/exo/inference/pytorch/tests/test_inference_loop.py @@ -48,16 +48,16 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e if __name__ == '__main__': - # try: - # print(f"\n\n -------- TEST QWEN2 -------- \n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "Qwen/Qwen2-0.5B-Instruct", - # 24 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") + try: + print(f"\n\n -------- TEST Qwen/Qwen2-0.5B-Instruct -------- \n\n") + asyncio.run(test_inference_engine( + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + "Qwen/Qwen2-0.5B-Instruct", + 24 + )) + except Exception as err: + print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") # try: # print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") @@ -92,14 +92,14 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e # except Exception as err: # print(f"\n\n !!!!!!!!!!! Chickaboo/ChickaQ-Large TEST FAILED \n{err}\n") - try: - print(f"\n\n --------- TEST ambrosfitz/TinyLlama-1.1B-Chat-yawp -------\n\n") - asyncio.run(test_inference_engine( - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "ambrosfitz/TinyLlama-1.1B-Chat-yawp", - 22 - )) - except Exception as err: - print(f"\n\n !!!!!!!!!!! ambrosfitz/TinyLlama-1.1B-Chat-yawp TEST FAILED \n{err}\n") + #try: + # print(f"\n\n --------- TEST ambrosfitz/TinyLlama-1.1B-Chat-yawp -------\n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "ambrosfitz/TinyLlama-1.1B-Chat-yawp", + # 22 + # )) + #except Exception as err: + # print(f"\n\n !!!!!!!!!!! ambrosfitz/TinyLlama-1.1B-Chat-yawp TEST FAILED \n{err}\n") diff --git a/exo/inference/pytorch/test_simple_model.py b/exo/inference/pytorch/tests/test_simple_model.py similarity index 100% rename from exo/inference/pytorch/test_simple_model.py rename to exo/inference/pytorch/tests/test_simple_model.py diff --git a/exo/inference/pytorch/test_split_model.py b/exo/inference/pytorch/tests/test_split_model.py similarity index 99% rename from exo/inference/pytorch/test_split_model.py rename to exo/inference/pytorch/tests/test_split_model.py index 42e1642ab..7830b53ee 100644 --- a/exo/inference/pytorch/test_split_model.py +++ b/exo/inference/pytorch/tests/test_split_model.py @@ -33,7 +33,6 @@ from exo.api.chatgpt_api import resolve_tokenizer from typing import Tuple, Optional, Union, List import re -from exo.inference.pytorch.utils import sample_logits, top_k_sampling TEMP = 0.6 TOP_K = 60 diff --git a/exo/inference/pytorch/test_weight_load.py b/exo/inference/pytorch/tests/test_weight_load.py similarity index 100% rename from exo/inference/pytorch/test_weight_load.py rename to exo/inference/pytorch/tests/test_weight_load.py diff --git a/exo/inference/pytorch/utils.py b/exo/inference/pytorch/tests/utils.py similarity index 100% rename from exo/inference/pytorch/utils.py rename to exo/inference/pytorch/tests/utils.py From 623468caee02c98395956fcfb87141e6cf705bbb Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 15 Sep 2024 16:52:28 -0800 Subject: [PATCH 334/589] adding working tests, update to forward function to just use input_ids, update infer to pass logits and hidden_states --- exo/inference/pytorch/inference.py | 41 ++++- exo/inference/pytorch/model/hf.py | 144 ++++++++++-------- .../pytorch/tests/test_inference_engine.py | 56 ++++--- .../pytorch/tests/test_inference_loop.py | 28 ++-- 4 files changed, 161 insertions(+), 108 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 3bb7afd72..5ce6f2a23 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -2,6 +2,7 @@ import numpy as np import torch import json +import gc from typing import Optional, Tuple from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine @@ -75,12 +76,19 @@ async def infer_prompt( batch_size, seq_length = input_ids.shape[:2] + if inference_state is not None: + past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) + else: + past_kvs = None + + if DEBUG >= 4: print(f"input_ids: {input_ids}\n") shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( input_ids=input_ids, - attention_mask=input_attention_mask + attention_mask=input_attention_mask, + past_key_values=past_kvs ) if DEBUG >= 4: @@ -105,7 +113,7 @@ async def infer_prompt( is_finished = self.unfinished_sequences.max() == 0 return ( - input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states, + input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), json.dumps(cache_dict), is_finished ) @@ -126,14 +134,29 @@ async def infer_tensor( await self.ensure_shard(shard) - hidden_states = torch.tensor(input_data) + if input_data.size == 1: + hidden_states = torch.tensor(input_data).to(self.device) + hidden_states = hidden_states.unsqueeze(0) + else: + hidden_states = torch.tensor(input_data).long().to(self.device) + + if inference_state is not None: + past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) + else: + past_kvs = None + + if DEBUG >= 4: + print(f"hidden_states: {hidden_states}") + print(f"inference_state: {inference_state}") shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( - hidden_states=hidden_states + input_ids=hidden_states, + past_key_values=past_kvs, + infer_tensor=True ) if shard_logits is not None: - input_ids = self.stateful_sharded_model.logits_sample(shard_logits) + input_ids = self.stateful_sharded_model.logits_sample(hidden_states, shard_logits) if shard_past_kvs is not None: cache_dict = { @@ -148,7 +171,7 @@ async def infer_tensor( is_finished = self.unfinished_sequences.max() == 0 return ( - input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states, + input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), json.dumps(cache_dict), is_finished ) @@ -170,6 +193,12 @@ async def ensure_shard(self, shard: Optional[Shard]): # need to build in shard downloader # model_path = await self.shard_downloader.ensure_shard(shard) + if self.stateful_sharded_model: + print("Deleting model") + del self.stateful_sharded_model + # gc.collect() + # torch.cuda.empty_cache() + self.tokenizer = await resolve_tokenizer(shard.model_id) self.stateful_sharded_model = ShardedHuggingFaceModel(shard, self.device, self.torch_dtype) self.shard = shard diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index be928aa3d..a59eb1a7a 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -1,7 +1,6 @@ import torch import torch.nn as nn import numpy as np -import gc from typing import Tuple, Optional, Union, List from exo.inference.shard import Shard @@ -75,7 +74,8 @@ def __init__(self, shard: Shard, device, dtype): self.llm_model = AutoModelForCausalLM.from_pretrained( shard.model_id, torch_dtype=self.torch_dtype, - device_map="auto" + device_map="auto", + offload_buffers=True ) self.model = self.llm_model.model @@ -88,10 +88,10 @@ def forward( self, shard: Optional[Shard] = None, input_ids: Optional[torch.tensor] = None, - hidden_states: Optional[torch.tensor] = None, attention_mask: Optional[torch.tensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, - use_legacy_cache: Optional[bool] = False + use_legacy_cache: Optional[bool] = False, + infer_tensor: Optional[bool] = False ) -> Tuple[Optional[torch.tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.tensor]]: """ @@ -116,70 +116,79 @@ def forward( """ - if input_ids is not None and hidden_states is not None: - raise ValueError - - if hidden_states is not None: - self.hidden_states = hidden_states - - if input_ids is not None: - self.input_ids = input_ids - - # embed input_ids - self.inputs_embeds = self.model.embed_tokens(self.input_ids) - - # cache - if past_key_values and not isinstance(past_key_values, Cache): - print("Using legacy cache") - use_legacy_cache = True - past_key_values = DynamicCache.from_legacy_cache(past_key_values) - - past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 - cache_position = torch.arange( - past_seen_tokens, - past_seen_tokens + self.inputs_embeds.shape[1], - device=self.inputs_embeds.device - ) + self.input_ids = input_ids + + # embed input_ids + self.inputs_embeds = self.model.embed_tokens(self.input_ids) + + #if DEBUG >= 4: + # print("forward called") + # print(f"input_ids: {self.input_ids}") + # print(f"inputs_embeds: {self.inputs_embeds}") + + # cache + if past_key_values and not isinstance(past_key_values, Cache): + print("Using legacy cache") + use_legacy_cache = True + past_key_values = DynamicCache.from_legacy_cache(past_key_values) + + past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 + cache_position = torch.arange( + past_seen_tokens, + past_seen_tokens + self.inputs_embeds.shape[1], + device=self.inputs_embeds.device + ) - # position id - position_ids = cache_position.unsqueeze(0) + # position id + position_ids = cache_position.unsqueeze(0) + + # casual mask and attention_mask + self.attention_mask = attention_mask + self.causal_mask = self.model._update_causal_mask( + None, + self.inputs_embeds, + cache_position, + past_key_values, + False # dont out attentions + ) - # casual mask and attention_mask - self.attention_mask = attention_mask - self.causal_mask = self.model._update_causal_mask( - None, + # embed positions, some models require and some dont + if isinstance(self.model, LlamaModel): + self.position_embeddings = self.model.rotary_emb( self.inputs_embeds, - cache_position, - past_key_values, - False # dont out attentions - ) - - # embed positions, some models require and some dont - if isinstance(self.model, LlamaModel): - self.position_embeddings = self.model.rotary_emb( - self.inputs_embeds, - position_ids - ) - - # prepare inputs for decoder layers - model_inputs = self.llm_model.prepare_inputs_for_generation( - self.input_ids, - past_key_values=past_key_values, - attention_mask=self.attention_mask, - inputs_embeds=self.inputs_embeds, - position_ids=position_ids, - cache_position=cache_position + position_ids ) + + # prepare inputs for decoder layers + model_inputs = self.llm_model.prepare_inputs_for_generation( + self.input_ids, + past_key_values=past_key_values, + attention_mask=self.attention_mask, + inputs_embeds=self.inputs_embeds, + position_ids=position_ids, + cache_position=cache_position + ) - self.hidden_states = self.inputs_embeds - self.position_ids = model_inputs["position_ids"] - self.cache_position = model_inputs["cache_position"] - self.past_key_values = model_inputs["past_key_values"] + self.hidden_states = self.inputs_embeds if not infer_tensor else self.input_ids + self.position_ids = model_inputs["position_ids"] + self.cache_position = model_inputs["cache_position"] + self.past_key_values = model_inputs["past_key_values"] # run through decoder layers - layer_amt = range(self.shard.start_layer, self.shard.end_layer + 1) + layer_amt = range(self.shard.start_layer, self.shard.end_layer + 1) + + if DEBUG >= 4: + print(f"hidden_states: {self.hidden_states}") + print(f"model_inputs: {model_inputs}") + print(f"layer_amt: {layer_amt}") + for i in layer_amt: decoder_layer = self.model.layers[i] + if DEBUG >= 5: + print("decoder_layer before") + print(f"decoder_layer: {decoder_layer}") + print(f"hidden_states: {self.hidden_states}") + layer_outputs = decoder_layer( self.hidden_states, attention_mask=self.causal_mask, @@ -192,8 +201,14 @@ def forward( self.hidden_states = layer_outputs[0] self.next_decoder_cache = layer_outputs[1] + if DEBUG >= 5: + print("decoder_layer after") + print(f"hidden_states: {self.hidden_states}") + print(f"next_decoder_cache: {self.next_decoder_cache}") + # handle last layer to get logits + # shard is last layer says true at the start and not detecting last layer correctly if self.shard.is_last_layer(): self.hidden_states = self.model.norm(self.hidden_states) if use_legacy_cache: @@ -209,6 +224,7 @@ def forward( None, logits ) + print("199") return ( self.hidden_states, @@ -223,7 +239,7 @@ def logits_sample( use_max: Optional[bool] = False ) -> torch.tensor: """ - Get a sample of the logits from end of model run + Get a sample of the logits from end of model run for next token Args: logits: tensor @@ -234,7 +250,7 @@ def logits_sample( """ # get a single cloned logit - logits = logits[:, 1, :].clone().float() + logits = logits[:, -1, :].clone().float() next_token_scores = self.logits_processor(input_ids, logits) @@ -245,9 +261,11 @@ def logits_sample( else: next_tokens = torch.argmax(next_token_scores, dim=-1) + print(f"next_tokens: {next_tokens[:, None]}") + # get inputs_ids from token sample - input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1) + # input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1) - return input_ids + return next_tokens[:, None] diff --git a/exo/inference/pytorch/tests/test_inference_engine.py b/exo/inference/pytorch/tests/test_inference_engine.py index 9b8a19ef6..7e64c137a 100644 --- a/exo/inference/pytorch/tests/test_inference_engine.py +++ b/exo/inference/pytorch/tests/test_inference_engine.py @@ -8,8 +8,14 @@ from exo.helpers import DEBUG import os import numpy as np +import time + +async def test_inference_engine( + inference_engine_1: InferenceEngine, + inference_engine_2: InferenceEngine, + model_id: str, + n_layers: int): -async def test_inference_engine(inference_engine_1: InferenceEngine, inference_engine_2: InferenceEngine, model_id: str, n_layers: int): # prompt = "Why is the sky blue?" prompt = "In a single word only, what is the last name of the current president of the USA?" @@ -30,6 +36,8 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e print(resp_full) print("\n------------resp_full---------------\n") + time.sleep(5) + next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( "A", shard=shard, @@ -41,8 +49,10 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e print(next_resp_full) print("\n------------next_resp_full---------------\n") + time.sleep(5) + pp = int(n_layers/2) - + resp_shard = Shard( model_id=model_id, start_layer=0, @@ -67,6 +77,8 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e print(resp1) print("\n------------resp1---------------\n") + time.sleep(5) + resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( "B", @@ -105,16 +117,16 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e assert np.array_equal(next_resp_full, resp4) if __name__ == '__main__': - # try: - # print(f"\n\n -------- TEST QWEN2 -------- \n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "Qwen/Qwen2-0.5B-Instruct", - # 24 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") + try: + print(f"\n\n -------- TEST QWEN2 -------- \n\n") + asyncio.run(test_inference_engine( + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + "Qwen/Qwen2-0.5B-Instruct", + 24 + )) + except Exception as err: + print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") # try: # print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") @@ -149,14 +161,14 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e # except Exception as err: # print(f"\n\n !!!!!!!!!!! Chickaboo/ChickaQ-Large TEST FAILED \n{err}\n") - try: - print(f"\n\n --------- TEST TinyLlama/TinyLlama_v1.1 -------\n\n") - asyncio.run(test_inference_engine( - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "TinyLlama/TinyLlama_v1.1", - 22 - )) - except Exception as err: - print(f"\n\n !!!!!!!!!!! TinyLlama/TinyLlama_v1.1 TEST FAILED \n{err}\n") + #try: + # print(f"\n\n --------- TEST TinyLlama/TinyLlama_v1.1 -------\n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "TinyLlama/TinyLlama_v1.1", + # 22 + # )) + #except Exception as err: + # print(f"\n\n !!!!!!!!!!! TinyLlama/TinyLlama_v1.1 TEST FAILED \n{err}\n") diff --git a/exo/inference/pytorch/tests/test_inference_loop.py b/exo/inference/pytorch/tests/test_inference_loop.py index b9cdd0053..d9b038d81 100644 --- a/exo/inference/pytorch/tests/test_inference_loop.py +++ b/exo/inference/pytorch/tests/test_inference_loop.py @@ -21,8 +21,8 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e ) resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( - "A", - shard=shard, + "A", + shard=shard, prompt=prompt ) @@ -30,22 +30,16 @@ async def test_inference_engine(inference_engine_1: InferenceEngine, inference_e print(resp_full) print("\n------------resp_full---------------\n") - next_resp_full = resp_full - is_finished = False - while not is_finished: - next_resp_full, _next_inference_state_full, is_finished = await inference_engine_1.infer_tensor( - "A", - shard=shard, - input_data=next_resp_full, - inference_state=inference_state_full, - ) - - print("\n------------next_resp_full---------------\n") - print(next_resp_full) - print("\n------------next_resp_full---------------\n") - + next_resp_full, _next_inference_state_full, is_finished = await inference_engine_1.infer_tensor( + "A", + shard=shard, + input_data=resp_full, + inference_state=inference_state_full, + ) - + print("\n------------next_resp_full---------------\n") + print(next_resp_full) + print("\n------------next_resp_full---------------\n") if __name__ == '__main__': try: From 19b322dee8d94ff7969a182be65245f59f4e1eb8 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 16 Sep 2024 13:10:58 -0800 Subject: [PATCH 335/589] cleaning up code and tests, debugging and adding in cleaned up logging, added model value options --- exo/inference/pytorch/.gitignore | 1 + exo/inference/pytorch/inference.py | 33 ++- exo/inference/pytorch/model/hf.py | 48 ++-- .../pytorch/tests/test_inference_loop.py | 99 --------- .../pytorch/tests/test_split_model.py | 3 +- .../pytorch/tests/test_weight_load.py | 206 ------------------ 6 files changed, 52 insertions(+), 338 deletions(-) delete mode 100644 exo/inference/pytorch/tests/test_inference_loop.py delete mode 100644 exo/inference/pytorch/tests/test_weight_load.py diff --git a/exo/inference/pytorch/.gitignore b/exo/inference/pytorch/.gitignore index 8fce60300..6d76c24de 100644 --- a/exo/inference/pytorch/.gitignore +++ b/exo/inference/pytorch/.gitignore @@ -1 +1,2 @@ data/ +model/archive/ diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 5ce6f2a23..3e6dc2666 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -14,13 +14,15 @@ from exo.download.shard_download import ShardDownloader # model value options -TOP_K = 35 -TEMP = 0.6 -TOP_P = 0.8 +TOP_K = 25 +TEMP = 0.7 +TOP_P = 0.9 +MAX_LENGTH = 125 +MAX_TIME = 10.0 class PyTorchDynamicShardInferenceEngine(InferenceEngine): """ - PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. + PyTorch Dynamic Shard Inference Engine for performing model inference with sharded Pytorch/HF based models. """ def __init__(self, shard_downloader: ShardDownloader): @@ -190,17 +192,24 @@ async def ensure_shard(self, shard: Optional[Shard]): if DEBUG >= 4: print(f"Loading new shard: {shard}") - # need to build in shard downloader + # -- TO DO -- + # Build in shard downloader but requires pulling + # apart how TrainedModel loads weight in its __init__ + # function in the transformer library # model_path = await self.shard_downloader.ensure_shard(shard) - if self.stateful_sharded_model: - print("Deleting model") - del self.stateful_sharded_model - # gc.collect() - # torch.cuda.empty_cache() - self.tokenizer = await resolve_tokenizer(shard.model_id) - self.stateful_sharded_model = ShardedHuggingFaceModel(shard, self.device, self.torch_dtype) + self.stateful_sharded_model = ShardedHuggingFaceModel( + shard=shard, + device=self.device, + dtype=self.torch_dtype, + top_k=TOP_K, + temp=TEMP, + top_p=TOP_P, + max_length=MAX_LENGTH, + max_time=MAX_TIME + ) + self.shard = shard if DEBUG >= 4: diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index a59eb1a7a..93fc7ad31 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -38,7 +38,17 @@ class ShardedHuggingFaceModel: - def __init__(self, shard: Shard, device, dtype): + def __init__( + self, + shard: Shard, + device, + dtype, + top_k: int = 25, + temp: float = 0.7, + top_p: float = 0.9, + max_length: int = 50, + max_time: float = 10.0 + ): # class vars self.shard = shard self.hidden_states = None @@ -53,16 +63,16 @@ def __init__(self, shard: Shard, device, dtype): # setup logit processors self.logits_processor = LogitsProcessorList([ - TopKLogitsWarper(35), - TemperatureLogitsWarper(0.6), - TopPLogitsWarper(0.8) + TopKLogitsWarper(top_k), + TemperatureLogitsWarper(temp), + TopPLogitsWarper(top_p) ]) # setup stopping critera for generation self.stopping_critera = StoppingCriteriaList( [ - MaxLengthCriteria(max_length=50), - MaxTimeCriteria(max_time=10.0), + MaxLengthCriteria(max_length=max_length), + MaxTimeCriteria(max_time=max_time), ] ) @@ -103,10 +113,10 @@ def forward( model: base llm model tramsformers class llm_model: llm chat model class input_ids: tensor optional - hidden_states: tensor optional attention_mask: tensor optional past_key_values: Cache or list[tensor] optional - use_legacy_cache: bool optional + use_legacy_cache: bool optional + infer_tensor: bool optional, lets forward know to handle tensors Returns: Tuple of @@ -120,15 +130,9 @@ def forward( # embed input_ids self.inputs_embeds = self.model.embed_tokens(self.input_ids) - - #if DEBUG >= 4: - # print("forward called") - # print(f"input_ids: {self.input_ids}") - # print(f"inputs_embeds: {self.inputs_embeds}") - + # cache if past_key_values and not isinstance(past_key_values, Cache): - print("Using legacy cache") use_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) @@ -219,13 +223,19 @@ def forward( # lm_head logits = self.llm_model.lm_head(self.hidden_states).to(self.device) + if DEBUG >= 4: + print(f"logits: {logits}") + return ( None, None, logits ) - print("199") + if DEBUG >= 4: + print(f"hidden_states: {self.hidden_states}") + print(f"past_key_values: {self.past_key_values}") + return ( self.hidden_states, self.past_key_values, @@ -261,10 +271,8 @@ def logits_sample( else: next_tokens = torch.argmax(next_token_scores, dim=-1) - print(f"next_tokens: {next_tokens[:, None]}") - - # get inputs_ids from token sample - # input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1) + if DEBUG >= 4: + print(f"next_tokens: {next_tokens[:, None]}") return next_tokens[:, None] diff --git a/exo/inference/pytorch/tests/test_inference_loop.py b/exo/inference/pytorch/tests/test_inference_loop.py deleted file mode 100644 index d9b038d81..000000000 --- a/exo/inference/pytorch/tests/test_inference_loop.py +++ /dev/null @@ -1,99 +0,0 @@ - -import asyncio -from exo.inference.shard import Shard -from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine -from exo.download.hf.hf_shard_download import HFShardDownloader -from exo.inference.inference_engine import InferenceEngine -from exo.inference.shard import Shard -from exo.helpers import DEBUG -import os -import numpy as np - -async def test_inference_engine(inference_engine_1: InferenceEngine, inference_engine_2: InferenceEngine, model_id: str, n_layers: int): - # prompt = "Why is the sky blue?" - prompt = "In a single word only, what is the last name of the current president of the USA?" - - shard = Shard( - model_id=model_id, - start_layer=0, - end_layer=n_layers-1, - n_layers=n_layers - ) - - resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( - "A", - shard=shard, - prompt=prompt - ) - - print("\n------------resp_full---------------\n") - print(resp_full) - print("\n------------resp_full---------------\n") - - next_resp_full, _next_inference_state_full, is_finished = await inference_engine_1.infer_tensor( - "A", - shard=shard, - input_data=resp_full, - inference_state=inference_state_full, - ) - - print("\n------------next_resp_full---------------\n") - print(next_resp_full) - print("\n------------next_resp_full---------------\n") - -if __name__ == '__main__': - try: - print(f"\n\n -------- TEST Qwen/Qwen2-0.5B-Instruct -------- \n\n") - asyncio.run(test_inference_engine( - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "Qwen/Qwen2-0.5B-Instruct", - 24 - )) - except Exception as err: - print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") - - # try: - # print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "andrijdavid/Llama3-1B-Base", - # 3 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! LLAMA3-1B-Base TEST FAILED \n{err}\n") - - # try: - # print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "meta-llama/Meta-Llama-3.1-8B", - # 32 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") - - # try: - # print(f"\n\n ------- TEST Chickaboo/ChickaQ-Large -----\n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "Chickaboo/ChickaQ-Large", - # 24 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! Chickaboo/ChickaQ-Large TEST FAILED \n{err}\n") - - #try: - # print(f"\n\n --------- TEST ambrosfitz/TinyLlama-1.1B-Chat-yawp -------\n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "ambrosfitz/TinyLlama-1.1B-Chat-yawp", - # 22 - # )) - #except Exception as err: - # print(f"\n\n !!!!!!!!!!! ambrosfitz/TinyLlama-1.1B-Chat-yawp TEST FAILED \n{err}\n") - diff --git a/exo/inference/pytorch/tests/test_split_model.py b/exo/inference/pytorch/tests/test_split_model.py index 7830b53ee..827bdec2e 100644 --- a/exo/inference/pytorch/tests/test_split_model.py +++ b/exo/inference/pytorch/tests/test_split_model.py @@ -327,7 +327,8 @@ async def model_half_split_test(prompt: str, model_id: str, layers: int): if __name__ == "__main__": - prompt = "In a single word only, what is the last name of the current president of the USA?" + #prompt = "In a single word only, what is the last name of the current president of the USA?" + prompt = "What color is the sky? Explain why" #prompt = "In a single word only, what is the color of an apple?" #print("\n-------- Test TinyLlama/TinyLlama_v1.1 ----------\n") diff --git a/exo/inference/pytorch/tests/test_weight_load.py b/exo/inference/pytorch/tests/test_weight_load.py deleted file mode 100644 index 7eb8142f9..000000000 --- a/exo/inference/pytorch/tests/test_weight_load.py +++ /dev/null @@ -1,206 +0,0 @@ -import torch -import torch.nn as nn -import asyncio -import gc -import json -from transformers import AutoConfig, AutoModel -from safetensors import safe_open -from typing import Tuple, Optional -import re -from exo.inference.pytorch.utils import sample_logits, top_k_sampling -from exo.api.chatgpt_api import resolve_tokenizer - -TEMP = 0.6 -TOP_K = 60 - -class OnionHuggingFaceLM(): - def __init__(self, layers, safetensor_index_file, safetensor_directory, is_last=False): - self.layers = layers - self.is_last = is_last - self.safetensor_index_file = safetensor_index_file - self.safetensor_directory = safetensor_directory - - # Load the safetensor index JSON - with open(safetensor_index_file, "r") as f: - self.index_data = json.load(f) - self.weight_map = self.index_data['weight_map'] - self.safetensors_metadata = self.index_data['safetensors_metadata'] - - def load_layer_weights(self, model, layer_index): - layer_tensors = {} - for param_name, file_name in self.weight_map.items(): - if param_name.startswith(f"model.layers.{layer_index}"): - file_path = f"{self.safetensor_directory}/{file_name}" - print(f"loading safetensor\n{file_path}\nfor layer\n{layer_index}") - offsets = self.safetensors_metadata[file_name]['offsets'][param_name] - dtype = self.safetensors_metadata[file_name]['dtype'] - shape = self.safetensors_metadata[file_name]['shape'] - - with safe_open(file_path, framework="pt", device="cuda") as f: - tensor = f.get_tensor_slice(offsets[0], offsets[1]) - tensor = tensor.view(shape) # Reshape to the correct shape - - layer_tensors[param_name] = tensor - - # Assign these tensors to the model's layer - for param_name, tensor in layer_tensors.items(): - param_pointer = model - param_parts = param_name.split('.') - for attr in param_parts[:-1]: - if attr.isdigit(): - attr = int(attr) - param_pointer = getattr(param_pointer, attr) - setattr(param_pointer, param_parts[-1], tensor) - - def forward( - self, - model, - input_ids: torch.tensor=None, - hidden_states: torch.tensor=None, - attention_mask: torch.tensor=None, - **kwargs - ) -> Tuple[Optional[torch.tensor], Optional[torch.tensor]]: - - base_model = model.model - - if input_ids is not None and hidden_states is not None: - print("You must either pass a hidden_state or input_ids but not both") - raise ValueError - - if input_ids is not None: - hidden_states = base_model.embed_tokens(input_ids) - position_ids = torch.arange( - 0, - input_ids.size(1), - device=input_ids.device - ).unsqueeze(0) - - if hidden_states is not None: - position_ids = torch.arange( - 0, - hidden_states.size(1), - device=hidden_states.device - ).unsqueeze(0) - - for idx, layer in enumerate(self.layers): - print(f"Loading weights for layer {idx}") - self.load_layer_weights(model, idx) # Load weights for the current layer - print(f"Processing hidden state from layer {idx}\n") - hidden_states = layer( - hidden_states, - position_ids=position_ids - )[0] - - if self.is_last: - norm_states = base_model.norm(hidden_states).to("cuda") - logits = model.lm_head(norm_states).to("cuda") - - return (None, logits) - - return (hidden_states, None) - -async def model_half_split_test( - prompt: str, - model_id: str, - layers: int, - safetensor_index_file: str, - safetensor_directory: str): - - half_layers = int(layers / 2) - - print("loading tokenizer") - tokenizer = await resolve_tokenizer(model_id) - max_length = 512 - - print("loading config and model") - config = AutoConfig.from_pretrained(model_id, local_files_only=True) - model = AutoModel.from_config(config).to("cuda") - - print(model.hf_device_map) - - shard_layers = nn.ModuleList(model.model.layers[:half_layers]) - sharded_model = OnionHuggingFaceLM( - layers=shard_layers, - safetensor_index_file=safetensor_index_file, - safetensor_directory=safetensor_directory - ) - - print(model) - - messages = [{"role": "user", "content": prompt}] - txt = tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True - ) - - print(f"Generating from chat template\n{txt}") - - inputs = tokenizer([txt], return_tensors="pt") - input_ids = inputs.input_ids.to("cuda") - input_attention_mask = inputs.attention_mask.to("cuda") - - shard_hidden_states, shard_logits = sharded_model.forward( - model=model, - input_ids=input_ids - ) - - print(f"shard_hidden_states\n{shard_hidden_states}") - print(f"shard_logits\n{shard_logits}") - - print("Using first half hidden state for last half of model") - shard_layers = nn.ModuleList(model.model.layers[half_layers:]).to("cuda") - sharded_model.layers = shard_layers - sharded_model.is_last = True - - if shard_hidden_states is not None: - shard_hidden_states, shard_logits = sharded_model.forward( - model=model, - hidden_states=shard_hidden_states - ) - - print(f"shard_hidden_states\n{shard_hidden_states}") - print(f"shard_logits\n{shard_logits}") - else: - print("Sharded hidden states not found, error") - raise ValueError - - print("generate from logits") - if shard_logits is not None: - generated_ids = sample_logits(shard_logits, TEMP, 0.95, TOP_K) - print("generated_ids") - print(generated_ids) - - generated_text = tokenizer.batch_decode( - generated_ids, - skip_special_tokens=True, - clean_up_tokenization_spaces=False - )[0] - - print("Generated text:") - print(generated_text) - else: - print("Sharded logits missing from last layer run, error") - raise ValueError - - del model - gc.collect() - torch.cuda.empty_cache() - -if __name__ == "__main__": - prompt = "In a single word only, what is the last name of the current president of the USA?" - - print("\n-------- Test Qwen/Qwen2-7B-Instruct ----------\n") - model_id = "Qwen/Qwen2-7B-Instruct" - model_layers = 22 - - asyncio.run( - model_half_split_test( - prompt=prompt, - model_id=model_id, - layers=model_layers, - safetensor_index_file="./data/qwen2_7B_Instruct/model.safetensors.index.json", - safetensor_directory="./data/qwen2_7B_Instruct/" - ) - ) - From cc2c14cf87b83c3cde2a3c8a166fa5126bd54e92 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 16 Sep 2024 15:26:26 -0800 Subject: [PATCH 336/589] getting infer and stop token issues --- exo/inference/pytorch/inference.py | 7 +++---- exo/inference/pytorch/model/hf.py | 15 +++++++++------ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 3e6dc2666..d14132e07 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -18,7 +18,7 @@ TEMP = 0.7 TOP_P = 0.9 MAX_LENGTH = 125 -MAX_TIME = 10.0 +MAX_TIME = 60.0 class PyTorchDynamicShardInferenceEngine(InferenceEngine): """ @@ -99,8 +99,7 @@ async def infer_prompt( print(f"\nshard_logits: {shard_logits}") if shard_logits is not None: - input_ids = self.stateful_sharded_model.logits_sample(input_ids, shard_logits) - print(input_ids) + input_ids = self.stateful_sharded_model.logits_sample(shard_logits) if shard_past_kvs is not None: cache_dict = { @@ -158,7 +157,7 @@ async def infer_tensor( ) if shard_logits is not None: - input_ids = self.stateful_sharded_model.logits_sample(hidden_states, shard_logits) + input_ids = self.stateful_sharded_model.logits_sample(shard_logits) if shard_past_kvs is not None: cache_dict = { diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 93fc7ad31..bf40f919b 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -71,7 +71,7 @@ def __init__( # setup stopping critera for generation self.stopping_critera = StoppingCriteriaList( [ - MaxLengthCriteria(max_length=max_length), + #MaxLengthCriteria(max_length=max_length), MaxTimeCriteria(max_time=max_time), ] ) @@ -173,7 +173,7 @@ def forward( cache_position=cache_position ) - self.hidden_states = self.inputs_embeds if not infer_tensor else self.input_ids + self.hidden_states = self.inputs_embeds self.position_ids = model_inputs["position_ids"] self.cache_position = model_inputs["cache_position"] self.past_key_values = model_inputs["past_key_values"] @@ -244,7 +244,6 @@ def forward( def logits_sample( self, - input_ids: torch.tensor, logits: torch.tensor, use_max: Optional[bool] = False ) -> torch.tensor: @@ -263,17 +262,21 @@ def logits_sample( logits = logits[:, -1, :].clone().float() - next_token_scores = self.logits_processor(input_ids, logits) + next_token_scores = self.logits_processor(self.input_ids, logits) if not use_max: probs = nn.functional.softmax(next_token_scores, dim=-1) - next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1) + next_tokens = torch.multinomial(probs, num_samples=1) else: next_tokens = torch.argmax(next_token_scores, dim=-1) if DEBUG >= 4: + print(f"input_ids: {self.input_ids}") print(f"next_tokens: {next_tokens[:, None]}") - return next_tokens[:, None] + input_ids = torch.cat([self.input_ids, next_tokens[:, None].squeeze(-1)], dim=-1) + + return input_ids + #return next_tokens[:, None].squeeze(-1) From 583629c0c3349b9fa6daebd5b9ceb7319290e387 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 16 Sep 2024 16:35:29 -0800 Subject: [PATCH 337/589] add tracking of next token and other logits into the full input_ids so it will stop, context was also dropping due to not having all the logits --- exo/api/chatgpt_api.py | 9 +++++++- exo/inference/pytorch/inference.py | 35 ++++++++++++++++++------------ exo/inference/pytorch/model/hf.py | 14 +++++------- 3 files changed, 34 insertions(+), 24 deletions(-) diff --git a/exo/api/chatgpt_api.py b/exo/api/chatgpt_api.py index 2619d1635..320377fbf 100644 --- a/exo/api/chatgpt_api.py +++ b/exo/api/chatgpt_api.py @@ -70,9 +70,16 @@ def generate_completion( } choice = completion["choices"][0] + print(f"\nchoice {choice}") if object_type.startswith("chat.completion"): key_name = "delta" if stream else "message" - choice[key_name] = {"role": "assistant", "content": tokenizer.decode(tokens)} + + token_decode = tokenizer.batch_decode( + tokens, + skip_special_tokens=True, + clean_up_tokenization_spaces=False + ) + choice[key_name] = {"role": "assistant", "content": token_decode} elif object_type == "text_completion": choice["text"] = tokenizer.decode(tokens) else: diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index d14132e07..f98ac4d03 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -14,8 +14,8 @@ from exo.download.shard_download import ShardDownloader # model value options -TOP_K = 25 -TEMP = 0.7 +TOP_K = 20 +TEMP = 0.6 TOP_P = 0.9 MAX_LENGTH = 125 MAX_TIME = 60.0 @@ -37,6 +37,11 @@ def __init__(self, shard_downloader: ShardDownloader): self.stateful_sharded_model = None self.tokenizer = None + # the whole history with new logits need to + # be passed to the model to reach the end token + # even with caching + self.past_input_ids = None + # setup cuda device if torch.cuda.is_available(): self.device = torch.device("cuda") @@ -99,8 +104,10 @@ async def infer_prompt( print(f"\nshard_logits: {shard_logits}") if shard_logits is not None: - input_ids = self.stateful_sharded_model.logits_sample(shard_logits) - + next_token = self.stateful_sharded_model.logits_sample(shard_logits) + self.past_input_ids = torch.cat([input_ids, next_token[:, None].squeeze(-1)], dim=-1) + input_ids = next_token + if shard_past_kvs is not None: cache_dict = { 'key_cache': [tensor.tolist() for tensor in shard_past_kvs.key_cache], @@ -111,7 +118,10 @@ async def infer_prompt( stopping_critera = self.stateful_sharded_model.stopping_critera self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) - is_finished = self.unfinished_sequences.max() == 0 + is_finished = self.unfinished_sequences.max() == 0 or input_ids.item() == self.tokenizer.eos_token_id + + if is_finished: + self.past_input_ids = None return ( input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), @@ -134,12 +144,9 @@ async def infer_tensor( print(f"shard: {self.shard}") await self.ensure_shard(shard) - - if input_data.size == 1: - hidden_states = torch.tensor(input_data).to(self.device) - hidden_states = hidden_states.unsqueeze(0) - else: - hidden_states = torch.tensor(input_data).long().to(self.device) + + input_ids = torch.tensor(input_data).long().to(self.device) + self.past_input_ids = torch.cat([self.past_input_ids, input_ids], dim=-1) if inference_state is not None: past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) @@ -147,11 +154,11 @@ async def infer_tensor( past_kvs = None if DEBUG >= 4: - print(f"hidden_states: {hidden_states}") + print(f"input_ids: {input_ids}") print(f"inference_state: {inference_state}") shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( - input_ids=hidden_states, + input_ids=self.past_input_ids, past_key_values=past_kvs, infer_tensor=True ) @@ -169,7 +176,7 @@ async def infer_tensor( stopping_critera = self.stateful_sharded_model.stopping_critera self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) - is_finished = self.unfinished_sequences.max() == 0 + is_finished = self.unfinished_sequences.max() == 0 or input_ids.item() == self.tokenizer.eos_token_id return ( input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index bf40f919b..066d643c1 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -255,28 +255,24 @@ def logits_sample( use_max: bool, if function should sample with argmax Returns: - input_ids: tensor + next_token: tensor """ # get a single cloned logit logits = logits[:, -1, :].clone().float() - next_token_scores = self.logits_processor(self.input_ids, logits) if not use_max: probs = nn.functional.softmax(next_token_scores, dim=-1) - next_tokens = torch.multinomial(probs, num_samples=1) + next_token = torch.multinomial(probs, num_samples=1) else: - next_tokens = torch.argmax(next_token_scores, dim=-1) + next_token = torch.argmax(next_token_scores, dim=-1) if DEBUG >= 4: print(f"input_ids: {self.input_ids}") - print(f"next_tokens: {next_tokens[:, None]}") + print(f"next_token: {next_token}") - input_ids = torch.cat([self.input_ids, next_tokens[:, None].squeeze(-1)], dim=-1) - - return input_ids - #return next_tokens[:, None].squeeze(-1) + return next_token[:, None].squeeze(-1) From 7ec5bb8409db41f0a411538e78ff5a0226b4bc33 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 16 Sep 2024 17:19:03 -0800 Subject: [PATCH 338/589] grpc testing --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index f98ac4d03..5d3fa5064 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -124,7 +124,7 @@ async def infer_prompt( self.past_input_ids = None return ( - input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), + input_ids.numpy(force=True), json.dumps(cache_dict), is_finished ) @@ -179,7 +179,7 @@ async def infer_tensor( is_finished = self.unfinished_sequences.max() == 0 or input_ids.item() == self.tokenizer.eos_token_id return ( - input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), + input_ids.numpy(force=True), json.dumps(cache_dict), is_finished ) From 5903e6342d241fdfa0fd9853873038d176fb0ad3 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 16 Sep 2024 17:37:15 -0800 Subject: [PATCH 339/589] grpc testing --- exo/inference/pytorch/inference.py | 31 +++++++++++++++++++------ exo/inference/pytorch/model/hf.py | 3 +-- exo/networking/grpc/grpc_peer_handle.py | 3 +++ 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 5d3fa5064..8a9f32a7b 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -123,12 +123,17 @@ async def infer_prompt( if is_finished: self.past_input_ids = None - return ( - input_ids.numpy(force=True), + return_values = ( + input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), json.dumps(cache_dict), is_finished ) + if DEBUG >= 4: + print(f"return_values: {return_values}") + + return return_values + async def infer_tensor( self, request_id: str, @@ -146,7 +151,11 @@ async def infer_tensor( await self.ensure_shard(shard) input_ids = torch.tensor(input_data).long().to(self.device) - self.past_input_ids = torch.cat([self.past_input_ids, input_ids], dim=-1) + + if self.past_input_ids is not None: + self.past_input_ids = torch.cat([self.past_input_ids, input_ids], dim=-1) + else: + self.past_input_ids = input_ids if inference_state is not None: past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) @@ -159,8 +168,7 @@ async def infer_tensor( shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( input_ids=self.past_input_ids, - past_key_values=past_kvs, - infer_tensor=True + past_key_values=past_kvs ) if shard_logits is not None: @@ -178,11 +186,20 @@ async def infer_tensor( self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) is_finished = self.unfinished_sequences.max() == 0 or input_ids.item() == self.tokenizer.eos_token_id - return ( - input_ids.numpy(force=True), + if DEBUG >= 4: + print(f"\nshard_hidden_states: {shard_hidden_states}\n") + print(f"\nshard_past_kvs {shard_past_kvs}\n") + print(f"\nshard_logits: {shard_logits}") + + return_values = ( + input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), json.dumps(cache_dict), is_finished ) + + print(f"return_values: {return_values}") + + return return_values async def ensure_shard(self, shard: Optional[Shard]): diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 066d643c1..d040418f8 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -100,8 +100,7 @@ def forward( input_ids: Optional[torch.tensor] = None, attention_mask: Optional[torch.tensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, - use_legacy_cache: Optional[bool] = False, - infer_tensor: Optional[bool] = False + use_legacy_cache: Optional[bool] = False ) -> Tuple[Optional[torch.tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.tensor]]: """ diff --git a/exo/networking/grpc/grpc_peer_handle.py b/exo/networking/grpc/grpc_peer_handle.py index 0629dc777..757de9fa0 100644 --- a/exo/networking/grpc/grpc_peer_handle.py +++ b/exo/networking/grpc/grpc_peer_handle.py @@ -11,6 +11,7 @@ from exo.topology.topology import Topology from exo.topology.device_capabilities import DeviceCapabilities +from exo.helpers import DEBUG class GRPCPeerHandle(PeerHandle): def __init__(self, _id: str, address: str, device_capabilities: DeviceCapabilities): @@ -52,6 +53,8 @@ async def send_prompt(self, shard: Shard, prompt: str, image_str: Optional[str] request_id=request_id, inference_state=inference_state, ) + + print(f"request: {request}") response = await self.stub.SendPrompt(request) if not response.tensor_data or not response.shape or not response.dtype: From e7a3fd0da3740ca832936c4691bf235e37108892 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 16 Sep 2024 17:40:41 -0800 Subject: [PATCH 340/589] grpc testing --- exo/inference/pytorch/inference.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 8a9f32a7b..fce410fcf 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -197,7 +197,8 @@ async def infer_tensor( is_finished ) - print(f"return_values: {return_values}") + if DEBUG >= 4: + print(f"return_values: {return_values}") return return_values From f6eec5ab5e6671bb928bc968cdba784d3cfdf22d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 16 Sep 2024 18:00:06 -0800 Subject: [PATCH 341/589] grpc testing --- exo/inference/pytorch/inference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index fce410fcf..cc6a48542 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -117,6 +117,7 @@ async def infer_prompt( cache_dict = None stopping_critera = self.stateful_sharded_model.stopping_critera + print("set stopping critera") self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) is_finished = self.unfinished_sequences.max() == 0 or input_ids.item() == self.tokenizer.eos_token_id From d441a51d9ab317dc04bf544b6090ce59b7dea4a5 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 16 Sep 2024 18:03:29 -0800 Subject: [PATCH 342/589] grpc testing --- exo/inference/pytorch/inference.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index cc6a48542..27cf634e9 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -106,6 +106,10 @@ async def infer_prompt( if shard_logits is not None: next_token = self.stateful_sharded_model.logits_sample(shard_logits) self.past_input_ids = torch.cat([input_ids, next_token[:, None].squeeze(-1)], dim=-1) + + stopping_critera = self.stateful_sharded_model.stopping_critera + print("set stopping critera") + self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) input_ids = next_token if shard_past_kvs is not None: @@ -116,9 +120,6 @@ async def infer_prompt( else: cache_dict = None - stopping_critera = self.stateful_sharded_model.stopping_critera - print("set stopping critera") - self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) is_finished = self.unfinished_sequences.max() == 0 or input_ids.item() == self.tokenizer.eos_token_id if is_finished: From e7f6dcb68227d7892c6b4d2889b725849bdd6bcb Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 16 Sep 2024 18:08:01 -0800 Subject: [PATCH 343/589] grpc testing --- exo/inference/pytorch/inference.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 27cf634e9..bb50dc0cf 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -106,10 +106,6 @@ async def infer_prompt( if shard_logits is not None: next_token = self.stateful_sharded_model.logits_sample(shard_logits) self.past_input_ids = torch.cat([input_ids, next_token[:, None].squeeze(-1)], dim=-1) - - stopping_critera = self.stateful_sharded_model.stopping_critera - print("set stopping critera") - self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) input_ids = next_token if shard_past_kvs is not None: @@ -120,11 +116,16 @@ async def infer_prompt( else: cache_dict = None + stopping_critera = self.stateful_sharded_model.stopping_critera + print("set stopping critera") + self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) is_finished = self.unfinished_sequences.max() == 0 or input_ids.item() == self.tokenizer.eos_token_id if is_finished: self.past_input_ids = None + print(f"shard as numpy: {shard_hidden_states.numpy(force=True)}") + return_values = ( input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), json.dumps(cache_dict), From ba5b00566b8f2652cd9188dc9b26f00636a449f2 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 16 Sep 2024 18:37:35 -0800 Subject: [PATCH 344/589] grpc testing --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index bb50dc0cf..49991410c 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -124,7 +124,7 @@ async def infer_prompt( if is_finished: self.past_input_ids = None - print(f"shard as numpy: {shard_hidden_states.numpy(force=True)}") + print(f"shard as numpy: {shard_hidden_states.detach().cpu().numpy()}") return_values = ( input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), From 6242d762c113e65414c7e6a901316707944b9338 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 16 Sep 2024 18:42:40 -0800 Subject: [PATCH 345/589] grpc testing --- exo/inference/pytorch/model/hf.py | 36 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index d040418f8..61928fe32 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -212,24 +212,24 @@ def forward( # handle last layer to get logits # shard is last layer says true at the start and not detecting last layer correctly - if self.shard.is_last_layer(): - self.hidden_states = self.model.norm(self.hidden_states) - if use_legacy_cache: - self.past_key_values = self.next_decoder_cache.to_legacy_cache() - else: - self.past_key_values = self.next_decoder_cache - - # lm_head - logits = self.llm_model.lm_head(self.hidden_states).to(self.device) - - if DEBUG >= 4: - print(f"logits: {logits}") - - return ( - None, - None, - logits - ) + #if self.shard.is_last_layer(): + self.hidden_states = self.model.norm(self.hidden_states) + if use_legacy_cache: + self.past_key_values = self.next_decoder_cache.to_legacy_cache() + else: + self.past_key_values = self.next_decoder_cache + + # lm_head + logits = self.llm_model.lm_head(self.hidden_states).to(self.device) + + if DEBUG >= 4: + print(f"logits: {logits}") + + return ( + None, + None, + logits + ) if DEBUG >= 4: print(f"hidden_states: {self.hidden_states}") From 563073104a7376b9163cbae86278113498bf7275 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 16 Sep 2024 18:45:17 -0800 Subject: [PATCH 346/589] grpc testing --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 49991410c..f25b980d5 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -124,7 +124,7 @@ async def infer_prompt( if is_finished: self.past_input_ids = None - print(f"shard as numpy: {shard_hidden_states.detach().cpu().numpy()}") + #print(f"shard as numpy: {shard_hidden_states.detach().cpu().numpy()}") return_values = ( input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), From 4a292680814039cd07d551ea78d56095cc179f49 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 00:46:21 -0800 Subject: [PATCH 347/589] testing passing hidden states in inference_state --- exo/inference/pytorch/inference.py | 55 +++++++----- exo/inference/pytorch/model/hf.py | 133 +++++++++++++++-------------- 2 files changed, 104 insertions(+), 84 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index f25b980d5..ff9f5c8d2 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -66,6 +66,8 @@ async def infer_prompt( ) -> Tuple[np.ndarray, str, bool]: if DEBUG >= 4: print("infer_prompt called") + print(f"prompt: {prompt}") + print(f"shard: {shard}") await self.ensure_shard(shard) @@ -103,6 +105,10 @@ async def infer_prompt( print(f"\nshard_past_kvs {shard_past_kvs}\n") print(f"\nshard_logits: {shard_logits}") + hidden_dict = None + if shard_hidden_states is not None: + hidden_dict = {"hidden_states": shard_hidden_states.tolist()} + if shard_logits is not None: next_token = self.stateful_sharded_model.logits_sample(shard_logits) self.past_input_ids = torch.cat([input_ids, next_token[:, None].squeeze(-1)], dim=-1) @@ -117,18 +123,15 @@ async def infer_prompt( cache_dict = None stopping_critera = self.stateful_sharded_model.stopping_critera - print("set stopping critera") self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) is_finished = self.unfinished_sequences.max() == 0 or input_ids.item() == self.tokenizer.eos_token_id if is_finished: self.past_input_ids = None - #print(f"shard as numpy: {shard_hidden_states.detach().cpu().numpy()}") - return_values = ( - input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), - json.dumps(cache_dict), + input_ids.numpy(force=True), #if shard_logits is not None else shard_hidden_states.numpy(force=True), + json.dumps([cache_dict, hidden_dict]), is_finished ) @@ -147,33 +150,47 @@ async def infer_tensor( if DEBUG >= 4: print("infer_tensor called") print(f"input_data: {input_data}") - print(f"input_data.size: {input_data.size}") - print(f"input_data.shape: {input_data.shape}") - print(f"shard: {self.shard}") + print(f"shard: {shard}") await self.ensure_shard(shard) - - input_ids = torch.tensor(input_data).long().to(self.device) - if self.past_input_ids is not None: - self.past_input_ids = torch.cat([self.past_input_ids, input_ids], dim=-1) - else: - self.past_input_ids = input_ids + infer_state = json.loads(inference_state) if inference_state else None - if inference_state is not None: - past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) + # if in the middle of generation, pass an empty (1,0) array + # while using hidden_states passed via inference_state + hidden_states = None + if input_data.shape == (1,0) and infer_state is not None: + # set hidden_states to input_ids + hidden_states = torch.tensor(infer_state[1]["hidden_states"]) + input_ids = torch.tensor([[]]).to(self.device) # empty tensor else: - past_kvs = None + input_ids = torch.tensor(input_data).long().to(self.device) + + if self.past_input_ids is not None: + self.past_input_ids = torch.cat([self.past_input_ids, input_ids], dim=-1) + else: + self.past_input_ids = input_ids + + if inference_state is not None: + past_kvs = DynamicCache.from_legacy_cache(infer_state[0]) + else: + past_kvs = None if DEBUG >= 4: print(f"input_ids: {input_ids}") print(f"inference_state: {inference_state}") + print(f"infer_state: {infer_state}") shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( input_ids=self.past_input_ids, + hidden_states=hidden_states, past_key_values=past_kvs ) + hidden_dict = None + if shard_hidden_states is not None: + hidden_dict = {"hidden_states": shard_hidden_states.tolist()} + if shard_logits is not None: input_ids = self.stateful_sharded_model.logits_sample(shard_logits) @@ -195,8 +212,8 @@ async def infer_tensor( print(f"\nshard_logits: {shard_logits}") return_values = ( - input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), - json.dumps(cache_dict), + input_ids.numpy(force=True), #if shard_logits is not None else shard_hidden_states.numpy(force=True), + json.dumps([cache_dict, hidden_dict]), is_finished ) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 61928fe32..7898a5de3 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -98,6 +98,7 @@ def forward( self, shard: Optional[Shard] = None, input_ids: Optional[torch.tensor] = None, + hidden_states: Optional[torch.tensor] = None, attention_mask: Optional[torch.tensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, use_legacy_cache: Optional[bool] = False @@ -124,58 +125,60 @@ def forward( - logits: tensor Optional """ + if hidden_states is not None: + self.hidden_states = hidden_states + else: + self.input_ids = input_ids - self.input_ids = input_ids - - # embed input_ids - self.inputs_embeds = self.model.embed_tokens(self.input_ids) + # embed input_ids + self.inputs_embeds = self.model.embed_tokens(self.input_ids) - # cache - if past_key_values and not isinstance(past_key_values, Cache): - use_legacy_cache = True - past_key_values = DynamicCache.from_legacy_cache(past_key_values) - - past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 - cache_position = torch.arange( - past_seen_tokens, - past_seen_tokens + self.inputs_embeds.shape[1], - device=self.inputs_embeds.device - ) - - # position id - position_ids = cache_position.unsqueeze(0) - - # casual mask and attention_mask - self.attention_mask = attention_mask - self.causal_mask = self.model._update_causal_mask( - None, - self.inputs_embeds, - cache_position, - past_key_values, - False # dont out attentions - ) - - # embed positions, some models require and some dont - if isinstance(self.model, LlamaModel): - self.position_embeddings = self.model.rotary_emb( + # cache + if past_key_values and not isinstance(past_key_values, Cache): + use_legacy_cache = True + past_key_values = DynamicCache.from_legacy_cache(past_key_values) + + past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 + cache_position = torch.arange( + past_seen_tokens, + past_seen_tokens + self.inputs_embeds.shape[1], + device=self.inputs_embeds.device + ) + + # position id + position_ids = cache_position.unsqueeze(0) + + # casual mask and attention_mask + self.attention_mask = attention_mask + self.causal_mask = self.model._update_causal_mask( + None, self.inputs_embeds, - position_ids + cache_position, + past_key_values, + False # dont out attentions ) - - # prepare inputs for decoder layers - model_inputs = self.llm_model.prepare_inputs_for_generation( - self.input_ids, - past_key_values=past_key_values, - attention_mask=self.attention_mask, - inputs_embeds=self.inputs_embeds, - position_ids=position_ids, - cache_position=cache_position - ) - self.hidden_states = self.inputs_embeds - self.position_ids = model_inputs["position_ids"] - self.cache_position = model_inputs["cache_position"] - self.past_key_values = model_inputs["past_key_values"] + # embed positions, some models require and some dont + if isinstance(self.model, LlamaModel): + self.position_embeddings = self.model.rotary_emb( + self.inputs_embeds, + position_ids + ) + + # prepare inputs for decoder layers + model_inputs = self.llm_model.prepare_inputs_for_generation( + self.input_ids, + past_key_values=past_key_values, + attention_mask=self.attention_mask, + inputs_embeds=self.inputs_embeds, + position_ids=position_ids, + cache_position=cache_position + ) + + self.hidden_states = self.inputs_embeds + self.position_ids = model_inputs["position_ids"] + self.cache_position = model_inputs["cache_position"] + self.past_key_values = model_inputs["past_key_values"] # run through decoder layers layer_amt = range(self.shard.start_layer, self.shard.end_layer + 1) @@ -212,24 +215,24 @@ def forward( # handle last layer to get logits # shard is last layer says true at the start and not detecting last layer correctly - #if self.shard.is_last_layer(): - self.hidden_states = self.model.norm(self.hidden_states) - if use_legacy_cache: - self.past_key_values = self.next_decoder_cache.to_legacy_cache() - else: - self.past_key_values = self.next_decoder_cache - - # lm_head - logits = self.llm_model.lm_head(self.hidden_states).to(self.device) - - if DEBUG >= 4: - print(f"logits: {logits}") - - return ( - None, - None, - logits - ) + if self.shard.is_last_layer(): + self.hidden_states = self.model.norm(self.hidden_states) + if use_legacy_cache: + self.past_key_values = self.next_decoder_cache.to_legacy_cache() + else: + self.past_key_values = self.next_decoder_cache + + # lm_head + logits = self.llm_model.lm_head(self.hidden_states).to(self.device) + + if DEBUG >= 4: + print(f"logits: {logits}") + + return ( + None, + None, + logits + ) if DEBUG >= 4: print(f"hidden_states: {self.hidden_states}") From 2daf65f78f15df57accc7b54582d4ba67c6afa93 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 00:52:42 -0800 Subject: [PATCH 348/589] testing passing hidden states in inference_state --- exo/inference/pytorch/inference.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index ff9f5c8d2..75017208f 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -126,8 +126,12 @@ async def infer_prompt( self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) is_finished = self.unfinished_sequences.max() == 0 or input_ids.item() == self.tokenizer.eos_token_id - if is_finished: - self.past_input_ids = None + out_infer_state = json.dumps([cache_dict, hidden_dict]) + if DEBUG >= 4: + print(f"\nshard_hidden_states: {shard_hidden_states}\n") + print(f"\nshard_past_kvs {shard_past_kvs}\n") + print(f"\nshard_logits: {shard_logits}") + print(f"\nout_infer_state: {out_infer_state}") return_values = ( input_ids.numpy(force=True), #if shard_logits is not None else shard_hidden_states.numpy(force=True), @@ -206,14 +210,16 @@ async def infer_tensor( self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) is_finished = self.unfinished_sequences.max() == 0 or input_ids.item() == self.tokenizer.eos_token_id + out_infer_state = json.dumps([cache_dict, hidden_dict]) if DEBUG >= 4: print(f"\nshard_hidden_states: {shard_hidden_states}\n") print(f"\nshard_past_kvs {shard_past_kvs}\n") print(f"\nshard_logits: {shard_logits}") + print(f"\nout_infer_state: {out_infer_state}") return_values = ( input_ids.numpy(force=True), #if shard_logits is not None else shard_hidden_states.numpy(force=True), - json.dumps([cache_dict, hidden_dict]), + out_infer_state, is_finished ) From 36d5cde3805e5faccee5d029dc4543271e84b59d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 01:17:15 -0800 Subject: [PATCH 349/589] fixing scalar issue, reversing passing hidden_states --- exo/inference/pytorch/inference.py | 64 ++++++++++++++---------------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 75017208f..f4d3e323d 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -70,7 +70,7 @@ async def infer_prompt( print(f"shard: {shard}") await self.ensure_shard(shard) - + # setup prompt input messages = [{"role": "user", "content": prompt}] txt = self.tokenizer.apply_chat_template( @@ -90,7 +90,6 @@ async def infer_prompt( else: past_kvs = None - if DEBUG >= 4: print(f"input_ids: {input_ids}\n") @@ -105,10 +104,7 @@ async def infer_prompt( print(f"\nshard_past_kvs {shard_past_kvs}\n") print(f"\nshard_logits: {shard_logits}") - hidden_dict = None - if shard_hidden_states is not None: - hidden_dict = {"hidden_states": shard_hidden_states.tolist()} - + next_token = None if shard_logits is not None: next_token = self.stateful_sharded_model.logits_sample(shard_logits) self.past_input_ids = torch.cat([input_ids, next_token[:, None].squeeze(-1)], dim=-1) @@ -124,18 +120,22 @@ async def infer_prompt( stopping_critera = self.stateful_sharded_model.stopping_critera self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) - is_finished = self.unfinished_sequences.max() == 0 or input_ids.item() == self.tokenizer.eos_token_id - out_infer_state = json.dumps([cache_dict, hidden_dict]) + hit_eos = False + if next_token is not None: + hit_eos = next_token.item() == self.tokenizer.eos_token_id + + is_finished = self.unfinished_sequences.max() == 0 or hit_eos + if DEBUG >= 4: + print(f"\ninput_ids: {input_ids}") print(f"\nshard_hidden_states: {shard_hidden_states}\n") print(f"\nshard_past_kvs {shard_past_kvs}\n") print(f"\nshard_logits: {shard_logits}") - print(f"\nout_infer_state: {out_infer_state}") return_values = ( - input_ids.numpy(force=True), #if shard_logits is not None else shard_hidden_states.numpy(force=True), - json.dumps([cache_dict, hidden_dict]), + input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), + json.dumps(cache_dict), is_finished ) @@ -158,27 +158,17 @@ async def infer_tensor( await self.ensure_shard(shard) - infer_state = json.loads(inference_state) if inference_state else None + input_ids = torch.tensor(input_data).long().to(self.device) - # if in the middle of generation, pass an empty (1,0) array - # while using hidden_states passed via inference_state - hidden_states = None - if input_data.shape == (1,0) and infer_state is not None: - # set hidden_states to input_ids - hidden_states = torch.tensor(infer_state[1]["hidden_states"]) - input_ids = torch.tensor([[]]).to(self.device) # empty tensor + if self.past_input_ids is not None: + self.past_input_ids = torch.cat([self.past_input_ids, input_ids], dim=-1) else: - input_ids = torch.tensor(input_data).long().to(self.device) + self.past_input_ids = input_ids - if self.past_input_ids is not None: - self.past_input_ids = torch.cat([self.past_input_ids, input_ids], dim=-1) - else: - self.past_input_ids = input_ids - - if inference_state is not None: - past_kvs = DynamicCache.from_legacy_cache(infer_state[0]) - else: - past_kvs = None + if inference_state is not None: + past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) + else: + past_kvs = None if DEBUG >= 4: print(f"input_ids: {input_ids}") @@ -195,8 +185,10 @@ async def infer_tensor( if shard_hidden_states is not None: hidden_dict = {"hidden_states": shard_hidden_states.tolist()} + next_token is not None if shard_logits is not None: - input_ids = self.stateful_sharded_model.logits_sample(shard_logits) + next_token = self.stateful_sharded_model.logits_sample(shard_logits) + input_ids = next_token if shard_past_kvs is not None: cache_dict = { @@ -208,18 +200,22 @@ async def infer_tensor( stopping_critera = self.stateful_sharded_model.stopping_critera self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) - is_finished = self.unfinished_sequences.max() == 0 or input_ids.item() == self.tokenizer.eos_token_id + + hit_eos = False + if next_token is not None: + hit_eos = next_token.item() == self.tokenizer.eos_token_id + + is_finished = self.unfinished_sequences.max() == 0 or hit_eos - out_infer_state = json.dumps([cache_dict, hidden_dict]) if DEBUG >= 4: + print(f"\ninput_ids: {input_ids}") print(f"\nshard_hidden_states: {shard_hidden_states}\n") print(f"\nshard_past_kvs {shard_past_kvs}\n") print(f"\nshard_logits: {shard_logits}") - print(f"\nout_infer_state: {out_infer_state}") return_values = ( input_ids.numpy(force=True), #if shard_logits is not None else shard_hidden_states.numpy(force=True), - out_infer_state, + json.dumps(cache_dict), is_finished ) From 6917f303b2b549e8ca4ec027231d199ea05b8b1a Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 01:19:40 -0800 Subject: [PATCH 350/589] inference bug fix, grpc testing --- exo/inference/pytorch/inference.py | 1 - 1 file changed, 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index f4d3e323d..0d148bc86 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -173,7 +173,6 @@ async def infer_tensor( if DEBUG >= 4: print(f"input_ids: {input_ids}") print(f"inference_state: {inference_state}") - print(f"infer_state: {infer_state}") shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( input_ids=self.past_input_ids, From adab336431fb64483634e0c330f4a8acfae90f36 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 01:25:59 -0800 Subject: [PATCH 351/589] inference bug fix, grpc testing --- exo/inference/pytorch/inference.py | 1 - exo/inference/pytorch/model/hf.py | 95 +++++++++++++++--------------- 2 files changed, 46 insertions(+), 50 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 0d148bc86..e8920e083 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -176,7 +176,6 @@ async def infer_tensor( shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( input_ids=self.past_input_ids, - hidden_states=hidden_states, past_key_values=past_kvs ) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 7898a5de3..d040418f8 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -98,7 +98,6 @@ def forward( self, shard: Optional[Shard] = None, input_ids: Optional[torch.tensor] = None, - hidden_states: Optional[torch.tensor] = None, attention_mask: Optional[torch.tensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, use_legacy_cache: Optional[bool] = False @@ -125,60 +124,58 @@ def forward( - logits: tensor Optional """ - if hidden_states is not None: - self.hidden_states = hidden_states - else: - self.input_ids = input_ids - # embed input_ids - self.inputs_embeds = self.model.embed_tokens(self.input_ids) + self.input_ids = input_ids + + # embed input_ids + self.inputs_embeds = self.model.embed_tokens(self.input_ids) - # cache - if past_key_values and not isinstance(past_key_values, Cache): - use_legacy_cache = True - past_key_values = DynamicCache.from_legacy_cache(past_key_values) - - past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 - cache_position = torch.arange( - past_seen_tokens, - past_seen_tokens + self.inputs_embeds.shape[1], - device=self.inputs_embeds.device - ) - - # position id - position_ids = cache_position.unsqueeze(0) + # cache + if past_key_values and not isinstance(past_key_values, Cache): + use_legacy_cache = True + past_key_values = DynamicCache.from_legacy_cache(past_key_values) + + past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 + cache_position = torch.arange( + past_seen_tokens, + past_seen_tokens + self.inputs_embeds.shape[1], + device=self.inputs_embeds.device + ) + + # position id + position_ids = cache_position.unsqueeze(0) + + # casual mask and attention_mask + self.attention_mask = attention_mask + self.causal_mask = self.model._update_causal_mask( + None, + self.inputs_embeds, + cache_position, + past_key_values, + False # dont out attentions + ) - # casual mask and attention_mask - self.attention_mask = attention_mask - self.causal_mask = self.model._update_causal_mask( - None, + # embed positions, some models require and some dont + if isinstance(self.model, LlamaModel): + self.position_embeddings = self.model.rotary_emb( self.inputs_embeds, - cache_position, - past_key_values, - False # dont out attentions - ) - - # embed positions, some models require and some dont - if isinstance(self.model, LlamaModel): - self.position_embeddings = self.model.rotary_emb( - self.inputs_embeds, - position_ids - ) - - # prepare inputs for decoder layers - model_inputs = self.llm_model.prepare_inputs_for_generation( - self.input_ids, - past_key_values=past_key_values, - attention_mask=self.attention_mask, - inputs_embeds=self.inputs_embeds, - position_ids=position_ids, - cache_position=cache_position + position_ids ) + + # prepare inputs for decoder layers + model_inputs = self.llm_model.prepare_inputs_for_generation( + self.input_ids, + past_key_values=past_key_values, + attention_mask=self.attention_mask, + inputs_embeds=self.inputs_embeds, + position_ids=position_ids, + cache_position=cache_position + ) - self.hidden_states = self.inputs_embeds - self.position_ids = model_inputs["position_ids"] - self.cache_position = model_inputs["cache_position"] - self.past_key_values = model_inputs["past_key_values"] + self.hidden_states = self.inputs_embeds + self.position_ids = model_inputs["position_ids"] + self.cache_position = model_inputs["cache_position"] + self.past_key_values = model_inputs["past_key_values"] # run through decoder layers layer_amt = range(self.shard.start_layer, self.shard.end_layer + 1) From 73146dd0c862f18bfda73c5068482fb079b70022 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 01:47:12 -0800 Subject: [PATCH 352/589] fixing hf model for hidden_states --- exo/inference/pytorch/model/hf.py | 98 ++++++++++++++++--------------- 1 file changed, 51 insertions(+), 47 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index d040418f8..c734f5daa 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -98,6 +98,7 @@ def forward( self, shard: Optional[Shard] = None, input_ids: Optional[torch.tensor] = None, + hidden_states: Optional[torch.tensor] = None, attention_mask: Optional[torch.tensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, use_legacy_cache: Optional[bool] = False @@ -124,58 +125,61 @@ def forward( - logits: tensor Optional """ - - self.input_ids = input_ids - - # embed input_ids - self.inputs_embeds = self.model.embed_tokens(self.input_ids) - - # cache - if past_key_values and not isinstance(past_key_values, Cache): - use_legacy_cache = True - past_key_values = DynamicCache.from_legacy_cache(past_key_values) - - past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 - cache_position = torch.arange( - past_seen_tokens, - past_seen_tokens + self.inputs_embeds.shape[1], - device=self.inputs_embeds.device - ) - # position id - position_ids = cache_position.unsqueeze(0) - - # casual mask and attention_mask - self.attention_mask = attention_mask - self.causal_mask = self.model._update_causal_mask( - None, - self.inputs_embeds, - cache_position, - past_key_values, - False # dont out attentions - ) + if hidden_states is not None: + self.hidden_states = hidden_states + else: + self.input_ids = input_ids + + # embed input_ids + self.inputs_embeds = self.model.embed_tokens(self.input_ids) + + # cache + if past_key_values and not isinstance(past_key_values, Cache): + use_legacy_cache = True + past_key_values = DynamicCache.from_legacy_cache(past_key_values) + + past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 + cache_position = torch.arange( + past_seen_tokens, + past_seen_tokens + self.inputs_embeds.shape[1], + device=self.inputs_embeds.device + ) + + # position id + position_ids = cache_position.unsqueeze(0) - # embed positions, some models require and some dont - if isinstance(self.model, LlamaModel): - self.position_embeddings = self.model.rotary_emb( + # casual mask and attention_mask + self.attention_mask = attention_mask + self.causal_mask = self.model._update_causal_mask( + None, self.inputs_embeds, - position_ids + cache_position, + past_key_values, + False # dont out attentions + ) + + # embed positions, some models require and some dont + if isinstance(self.model, LlamaModel): + self.position_embeddings = self.model.rotary_emb( + self.inputs_embeds, + position_ids + ) + + # prepare inputs for decoder layers + model_inputs = self.llm_model.prepare_inputs_for_generation( + self.input_ids, + past_key_values=past_key_values, + attention_mask=self.attention_mask, + inputs_embeds=self.inputs_embeds, + position_ids=position_ids, + cache_position=cache_position ) - - # prepare inputs for decoder layers - model_inputs = self.llm_model.prepare_inputs_for_generation( - self.input_ids, - past_key_values=past_key_values, - attention_mask=self.attention_mask, - inputs_embeds=self.inputs_embeds, - position_ids=position_ids, - cache_position=cache_position - ) - self.hidden_states = self.inputs_embeds - self.position_ids = model_inputs["position_ids"] - self.cache_position = model_inputs["cache_position"] - self.past_key_values = model_inputs["past_key_values"] + self.hidden_states = self.inputs_embeds + self.position_ids = model_inputs["position_ids"] + self.cache_position = model_inputs["cache_position"] + self.past_key_values = model_inputs["past_key_values"] # run through decoder layers layer_amt = range(self.shard.start_layer, self.shard.end_layer + 1) From 929386d0049baf1c348c1e1dad7cb194f49c3632 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 01:49:19 -0800 Subject: [PATCH 353/589] fixing hf model for hidden_states --- exo/inference/pytorch/inference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index e8920e083..0d148bc86 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -176,6 +176,7 @@ async def infer_tensor( shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( input_ids=self.past_input_ids, + hidden_states=hidden_states, past_key_values=past_kvs ) From 32b8f67af5d426011d536b54589b2e8888548135 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 01:58:13 -0800 Subject: [PATCH 354/589] fixing hf model for hidden_states --- exo/inference/pytorch/inference.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 0d148bc86..683a81237 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -160,10 +160,15 @@ async def infer_tensor( input_ids = torch.tensor(input_data).long().to(self.device) - if self.past_input_ids is not None: - self.past_input_ids = torch.cat([self.past_input_ids, input_ids], dim=-1) + # detect if hidden_states or not + hidden_states = None + if input_ids.size()[-1] > 1: + hidden_states = input_ids else: - self.past_input_ids = input_ids + if self.past_input_ids is not None: + self.past_input_ids = torch.cat([self.past_input_ids, input_ids], dim=-1) + else: + self.past_input_ids = input_ids if inference_state is not None: past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) From c86facb91529d2f883ed0b06a0b97bd9dc278d87 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 02:01:33 -0800 Subject: [PATCH 355/589] fixing hf model for hidden_states --- exo/inference/pytorch/model/hf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index c734f5daa..edd105e53 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -126,6 +126,8 @@ def forward( """ + model_inputs = None + if hidden_states is not None: self.hidden_states = hidden_states else: From d15b20d551df00821516f9bd3bc1945ace10feaf Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 02:03:17 -0800 Subject: [PATCH 356/589] fixing hf model for hidden_states --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 683a81237..cefd01700 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -189,7 +189,7 @@ async def infer_tensor( if shard_hidden_states is not None: hidden_dict = {"hidden_states": shard_hidden_states.tolist()} - next_token is not None + next_token = None if shard_logits is not None: next_token = self.stateful_sharded_model.logits_sample(shard_logits) input_ids = next_token From 5e41bc4d5315e2664cb9f0a1bfe3565fa7c66df3 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 02:06:33 -0800 Subject: [PATCH 357/589] fixing hf model for hidden_states --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index cefd01700..b2414677b 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -113,7 +113,7 @@ async def infer_prompt( if shard_past_kvs is not None: cache_dict = { 'key_cache': [tensor.tolist() for tensor in shard_past_kvs.key_cache], - 'value_cache': [tensor.tolist() for tensor in shard_past_kvs_kvs.value_cache] + 'value_cache': [tensor.tolist() for tensor in shard_past_kvs.value_cache] } else: cache_dict = None @@ -197,7 +197,7 @@ async def infer_tensor( if shard_past_kvs is not None: cache_dict = { 'key_cache': [tensor.tolist() for tensor in shard_past_kvs.key_cache], - 'value_cache': [tensor.tolist() for tensor in shard_past_kvs_kvs.value_cache] + 'value_cache': [tensor.tolist() for tensor in shard_past_kvs.value_cache] } else: cache_dict = None From b29c5f807aa0b5c37db0cae3046720f3a6137710 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 02:21:30 -0800 Subject: [PATCH 358/589] fixing hf model for hidden_states --- exo/inference/pytorch/inference.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index b2414677b..033e02f1a 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -68,6 +68,7 @@ async def infer_prompt( print("infer_prompt called") print(f"prompt: {prompt}") print(f"shard: {shard}") + print(f"inference_state: {inference_state}") await self.ensure_shard(shard) @@ -118,14 +119,9 @@ async def infer_prompt( else: cache_dict = None - stopping_critera = self.stateful_sharded_model.stopping_critera - self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) - - hit_eos = False + is_finished = False if next_token is not None: - hit_eos = next_token.item() == self.tokenizer.eos_token_id - - is_finished = self.unfinished_sequences.max() == 0 or hit_eos + is_finished = next_token.item() == self.tokenizer.eos_token_id if DEBUG >= 4: print(f"\ninput_ids: {input_ids}") @@ -155,6 +151,7 @@ async def infer_tensor( print("infer_tensor called") print(f"input_data: {input_data}") print(f"shard: {shard}") + print(f"inference_state: {inference_state}") await self.ensure_shard(shard) @@ -202,14 +199,14 @@ async def infer_tensor( else: cache_dict = None - stopping_critera = self.stateful_sharded_model.stopping_critera - self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) + #stopping_critera = self.stateful_sharded_model.stopping_critera + #self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) - hit_eos = False + is_finished = False if next_token is not None: - hit_eos = next_token.item() == self.tokenizer.eos_token_id + is_finished = next_token.item() == self.tokenizer.eos_token_id - is_finished = self.unfinished_sequences.max() == 0 or hit_eos + #is_finished = self.unfinished_sequences.max() == 0 or hit_eos if DEBUG >= 4: print(f"\ninput_ids: {input_ids}") From ddaa79c5354cc05595756cf97645be033efc5b06 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 02:32:24 -0800 Subject: [PATCH 359/589] fixing kvcache issue --- exo/inference/pytorch/inference.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 033e02f1a..4c334f3a2 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -87,7 +87,11 @@ async def infer_prompt( if inference_state is not None: - past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) + infer_state = json.loads(inference_state) + if len(infer_state["key_cache"]) == 0: + past_kvs = DynamicCache() + else: + past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) else: past_kvs = None @@ -168,7 +172,11 @@ async def infer_tensor( self.past_input_ids = input_ids if inference_state is not None: - past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) + infer_state = json.loads(inference_state) + if len(infer_state["key_cache"]) == 0: + past_kvs = DynamicCache() + else: + past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) else: past_kvs = None From 3164d3859aab1f8f20674495b6eb8b2a14afc8fb Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 02:36:53 -0800 Subject: [PATCH 360/589] fixing kvcache issue --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 4c334f3a2..a2f7efb4f 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -88,7 +88,7 @@ async def infer_prompt( if inference_state is not None: infer_state = json.loads(inference_state) - if len(infer_state["key_cache"]) == 0: + if not infer_state: past_kvs = DynamicCache() else: past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) @@ -173,7 +173,7 @@ async def infer_tensor( if inference_state is not None: infer_state = json.loads(inference_state) - if len(infer_state["key_cache"]) == 0: + if not infer_state: past_kvs = DynamicCache() else: past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) From e8532bc5934f30d41c500b6c1d16549799c921bf Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 02:43:03 -0800 Subject: [PATCH 361/589] fixing kvcache issue --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index a2f7efb4f..d98717b99 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -88,7 +88,7 @@ async def infer_prompt( if inference_state is not None: infer_state = json.loads(inference_state) - if not infer_state: + if not infer_state or (infer_state and len(infer_state["key_cache"] == 0)): past_kvs = DynamicCache() else: past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) @@ -173,7 +173,7 @@ async def infer_tensor( if inference_state is not None: infer_state = json.loads(inference_state) - if not infer_state: + if not infer_state or (infer_state and len(infer_state["key_cache"] == 0)): past_kvs = DynamicCache() else: past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) From 6a5b8db2d657d344e3fa4b56139f4442807888c4 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 02:45:16 -0800 Subject: [PATCH 362/589] fixing kvcache issue --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index d98717b99..ecb027983 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -88,7 +88,7 @@ async def infer_prompt( if inference_state is not None: infer_state = json.loads(inference_state) - if not infer_state or (infer_state and len(infer_state["key_cache"] == 0)): + if not infer_state or (infer_state and len(infer_state["key_cache"]) == 0): past_kvs = DynamicCache() else: past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) @@ -173,7 +173,7 @@ async def infer_tensor( if inference_state is not None: infer_state = json.loads(inference_state) - if not infer_state or (infer_state and len(infer_state["key_cache"] == 0)): + if not infer_state or (infer_state and len(infer_state["key_cache"]) == 0): past_kvs = DynamicCache() else: past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) From 515687d76c7eb951441f1ca0da06f64994358d36 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 17 Sep 2024 23:38:39 -0800 Subject: [PATCH 363/589] working on passing past input_ids between infers and nodes --- exo/inference/pytorch/inference.py | 54 +++++++++++++++++++++++------- exo/inference/pytorch/model/hf.py | 9 ++++- 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index ecb027983..7fc09a759 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -161,25 +161,48 @@ async def infer_tensor( input_ids = torch.tensor(input_data).long().to(self.device) + # setup cache and cached input_ids + past_kvs = None + past_iids = None + cached_iids = None + if inference_state is not None: + try: + infer_state = json.loads(inference_state) + except ValueError: + infer_state = None + + if infer_state is not None: + # setup cache + cached_kvs = infer_state[0] + if not cached_kvs or (cached_kvs and len(cached_kvs["key_cache"]) == 0): + past_kvs = DynamicCache() + else: + past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) + + # setup cached input_ids with one coming in, if any cached + cached_iids = infer_state[1] + if cached_iids is not None: + past_iids = None + if len(cached_iids) > 0: + cached_iids_tensor = torch.tensor(cached_iids["input_ids"]).to(self.device) + past_iids = torch.cat([cached_iids_tensor, input_ids], dim=-1).to(self.device) + cached_iids = {"input_ids": past_iids.tolist()} + + if DEBUG >= 4: + print(f"past_kvs: {past_kvs}") + print(f"cached_iids: {cached_iids}") + # detect if hidden_states or not hidden_states = None if input_ids.size()[-1] > 1: hidden_states = input_ids + self.past_input_ids = None else: - if self.past_input_ids is not None: - self.past_input_ids = torch.cat([self.past_input_ids, input_ids], dim=-1) + if past_iids is not None: + self.past_input_ids = past_iids else: self.past_input_ids = input_ids - - if inference_state is not None: - infer_state = json.loads(inference_state) - if not infer_state or (infer_state and len(infer_state["key_cache"]) == 0): - past_kvs = DynamicCache() - else: - past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) - else: - past_kvs = None - + if DEBUG >= 4: print(f"input_ids: {input_ids}") print(f"inference_state: {inference_state}") @@ -199,6 +222,7 @@ async def infer_tensor( next_token = self.stateful_sharded_model.logits_sample(shard_logits) input_ids = next_token + #cache if shard_past_kvs is not None: cache_dict = { 'key_cache': [tensor.tolist() for tensor in shard_past_kvs.key_cache], @@ -207,6 +231,10 @@ async def infer_tensor( else: cache_dict = None + if self.past_input_ids is not None: + next_cached_logits = torch.cat([self.past_input_ids, input_ids], dim=-1).to(self.device) + cached_iids = {"input_ids": next_cached_logits.tolist()} + #stopping_critera = self.stateful_sharded_model.stopping_critera #self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) @@ -224,7 +252,7 @@ async def infer_tensor( return_values = ( input_ids.numpy(force=True), #if shard_logits is not None else shard_hidden_states.numpy(force=True), - json.dumps(cache_dict), + json.dumps([cache_dict, cached_iids]), is_finished ) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index edd105e53..62e30a081 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -101,7 +101,7 @@ def forward( hidden_states: Optional[torch.tensor] = None, attention_mask: Optional[torch.tensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, - use_legacy_cache: Optional[bool] = False + use_legacy_cache: bool = False ) -> Tuple[Optional[torch.tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.tensor]]: """ @@ -198,6 +198,11 @@ def forward( print(f"decoder_layer: {decoder_layer}") print(f"hidden_states: {self.hidden_states}") + # TODO: fix caching as decoder layer is not returning + # present_key_value from attention layer on models + # might have some other generation functions needed to do it + # see https://github.com/huggingface/transformers/blob/main/src/transformers/generation/utils.py#L2917 + # for qwen2 exhttps://github.com/huggingface/transformers/blob/main/src/transformers/models/qwen2/modeling_qwen2.py#L291 layer_outputs = decoder_layer( self.hidden_states, attention_mask=self.causal_mask, @@ -212,6 +217,8 @@ def forward( if DEBUG >= 5: print("decoder_layer after") + print(f"layer_outputs: {layer_outputs}\n") + print(f"self.next_decoder_cache: {self.next_decoder_cache}") print(f"hidden_states: {self.hidden_states}") print(f"next_decoder_cache: {self.next_decoder_cache}") From 92ebdd5f0d1bf464dfe53cc671c18fa33cbc309b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 19 Sep 2024 03:34:03 -0800 Subject: [PATCH 364/589] implemented infer caching and passing cache information via inference_state --- exo/inference/pytorch/inference.py | 110 +++++++++++++++++------------ exo/inference/pytorch/model/hf.py | 5 +- 2 files changed, 67 insertions(+), 48 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 7fc09a759..302597a34 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -9,7 +9,7 @@ from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel from exo.api.chatgpt_api import resolve_tokenizer from exo.helpers import DEBUG -from transformers import DynamicCache +from transformers import DynamicCache, Cache from accelerate import disk_offload from exo.download.shard_download import ShardDownloader @@ -56,6 +56,47 @@ def __init__(self, shard_downloader: ShardDownloader): # setup unfinished sequence self.unfinished_sequences = torch.ones(1, dtype=torch.long, device=self.device) + def infer_caching( + self, + inference_state: Optional[str] = None + ) -> Tuple[Optional[Cache], Optional[torch.tensor], Optional[dict]]: + """ + inference caching for past_kvs and cached input_ids + user json inference_state + """ + # setup cache and cached input_ids + past_kvs = None + past_iids = None + cached_iids = None + if inference_state is not None: + try: + infer_state = json.loads(inference_state) + except ValueError: + infer_state = None + + if infer_state is not None: + # setup cache + cached_kvs = infer_state[0] + if not cached_kvs or (cached_kvs and len(cached_kvs["key_cache"]) == 0): + past_kvs = DynamicCache() + else: + past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) + + # setup cached input_ids with one coming in, if any cached + cached_iids = infer_state[1] + if cached_iids is not None: + past_iids = None + if len(cached_iids) > 0: + past_iids = torch.tensor(cached_iids["input_ids"]).to(self.device) + cached_iids = {"input_ids": past_iids.tolist()} + + if DEBUG >= 4: + print(f"past_kvs: {past_kvs}") + print(f"cached_iids: {cached_iids}") + + return (past_kvs, past_iids, cached_iids) + + async def infer_prompt( self, request_id: str, @@ -86,20 +127,19 @@ async def infer_prompt( batch_size, seq_length = input_ids.shape[:2] - if inference_state is not None: - infer_state = json.loads(inference_state) - if not infer_state or (infer_state and len(infer_state["key_cache"]) == 0): - past_kvs = DynamicCache() - else: - past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) + # get cache from inference_state + past_kvs, past_iids, cached_iids = self.infer_caching(inference_state) + + if past_iids is not None: + self.past_input_ids = past_iids, else: - past_kvs = None + self.past_input_ids = input_ids if DEBUG >= 4: - print(f"input_ids: {input_ids}\n") + print(f"past_input_ids: {self.past_input_ids}\n") shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( - input_ids=input_ids, + input_ids=self.past_input_ids, attention_mask=input_attention_mask, past_key_values=past_kvs ) @@ -115,6 +155,7 @@ async def infer_prompt( self.past_input_ids = torch.cat([input_ids, next_token[:, None].squeeze(-1)], dim=-1) input_ids = next_token + # cache if shard_past_kvs is not None: cache_dict = { 'key_cache': [tensor.tolist() for tensor in shard_past_kvs.key_cache], @@ -123,6 +164,9 @@ async def infer_prompt( else: cache_dict = None + if self.past_input_ids is not None: + cached_iids = {"input_ids": self.past_input_ids.tolist()} + is_finished = False if next_token is not None: is_finished = next_token.item() == self.tokenizer.eos_token_id @@ -135,7 +179,7 @@ async def infer_prompt( return_values = ( input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), - json.dumps(cache_dict), + json.dumps([cache_dict, cached_iids]), is_finished ) @@ -161,47 +205,19 @@ async def infer_tensor( input_ids = torch.tensor(input_data).long().to(self.device) - # setup cache and cached input_ids - past_kvs = None - past_iids = None - cached_iids = None - if inference_state is not None: - try: - infer_state = json.loads(inference_state) - except ValueError: - infer_state = None - - if infer_state is not None: - # setup cache - cached_kvs = infer_state[0] - if not cached_kvs or (cached_kvs and len(cached_kvs["key_cache"]) == 0): - past_kvs = DynamicCache() - else: - past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) - - # setup cached input_ids with one coming in, if any cached - cached_iids = infer_state[1] - if cached_iids is not None: - past_iids = None - if len(cached_iids) > 0: - cached_iids_tensor = torch.tensor(cached_iids["input_ids"]).to(self.device) - past_iids = torch.cat([cached_iids_tensor, input_ids], dim=-1).to(self.device) - cached_iids = {"input_ids": past_iids.tolist()} - - if DEBUG >= 4: - print(f"past_kvs: {past_kvs}") - print(f"cached_iids: {cached_iids}") - + # get cache from inference_state + past_kvs, past_iids, cached_iids = self.infer_caching(inference_state) + # detect if hidden_states or not hidden_states = None if input_ids.size()[-1] > 1: hidden_states = input_ids - self.past_input_ids = None + #self.past_input_ids = None + #else: + if past_iids is not None: + self.past_input_ids = past_iids else: - if past_iids is not None: - self.past_input_ids = past_iids - else: - self.past_input_ids = input_ids + self.past_input_ids = input_ids if DEBUG >= 4: print(f"input_ids: {input_ids}") diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 62e30a081..1481c40cc 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -127,6 +127,7 @@ def forward( """ model_inputs = None + self.hidden_states = None if hidden_states is not None: self.hidden_states = hidden_states @@ -183,12 +184,14 @@ def forward( self.cache_position = model_inputs["cache_position"] self.past_key_values = model_inputs["past_key_values"] + if DEBUG >= 4: + print(f"model_inputs: {model_inputs}") + # run through decoder layers layer_amt = range(self.shard.start_layer, self.shard.end_layer + 1) if DEBUG >= 4: print(f"hidden_states: {self.hidden_states}") - print(f"model_inputs: {model_inputs}") print(f"layer_amt: {layer_amt}") for i in layer_amt: From f0795bd17cfa1cbf1e491c1dfc2da7cb3a6d0824 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 19 Sep 2024 03:59:15 -0800 Subject: [PATCH 365/589] removing dynamic cache passing in inference_state as model does its own, added cleaning out cached_iids when process is finished --- exo/inference/pytorch/inference.py | 76 ++++++++++-------------------- 1 file changed, 24 insertions(+), 52 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 302597a34..fa16ccb5e 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -59,13 +59,11 @@ def __init__(self, shard_downloader: ShardDownloader): def infer_caching( self, inference_state: Optional[str] = None - ) -> Tuple[Optional[Cache], Optional[torch.tensor], Optional[dict]]: + ) -> Tuple[Optional[torch.tensor], Optional[dict]]: """ - inference caching for past_kvs and cached input_ids - user json inference_state + inference caching from inference_state json """ # setup cache and cached input_ids - past_kvs = None past_iids = None cached_iids = None if inference_state is not None: @@ -75,15 +73,7 @@ def infer_caching( infer_state = None if infer_state is not None: - # setup cache - cached_kvs = infer_state[0] - if not cached_kvs or (cached_kvs and len(cached_kvs["key_cache"]) == 0): - past_kvs = DynamicCache() - else: - past_kvs = DynamicCache.from_legacy_cache(json.loads(inference_state)) - - # setup cached input_ids with one coming in, if any cached - cached_iids = infer_state[1] + cached_iids = infer_state["cached_iids"] if cached_iids is not None: past_iids = None if len(cached_iids) > 0: @@ -91,10 +81,9 @@ def infer_caching( cached_iids = {"input_ids": past_iids.tolist()} if DEBUG >= 4: - print(f"past_kvs: {past_kvs}") print(f"cached_iids: {cached_iids}") - return (past_kvs, past_iids, cached_iids) + return (past_iids, cached_iids) async def infer_prompt( @@ -126,9 +115,8 @@ async def infer_prompt( input_attention_mask = inputs.attention_mask.to(self.device) batch_size, seq_length = input_ids.shape[:2] - # get cache from inference_state - past_kvs, past_iids, cached_iids = self.infer_caching(inference_state) + past_iids, cached_iids = self.infer_caching(inference_state) if past_iids is not None: self.past_input_ids = past_iids, @@ -140,8 +128,7 @@ async def infer_prompt( shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( input_ids=self.past_input_ids, - attention_mask=input_attention_mask, - past_key_values=past_kvs + attention_mask=input_attention_mask ) if DEBUG >= 4: @@ -155,15 +142,6 @@ async def infer_prompt( self.past_input_ids = torch.cat([input_ids, next_token[:, None].squeeze(-1)], dim=-1) input_ids = next_token - # cache - if shard_past_kvs is not None: - cache_dict = { - 'key_cache': [tensor.tolist() for tensor in shard_past_kvs.key_cache], - 'value_cache': [tensor.tolist() for tensor in shard_past_kvs.value_cache] - } - else: - cache_dict = None - if self.past_input_ids is not None: cached_iids = {"input_ids": self.past_input_ids.tolist()} @@ -171,6 +149,10 @@ async def infer_prompt( if next_token is not None: is_finished = next_token.item() == self.tokenizer.eos_token_id + if is_finished: + # clear cache + cached_iids = {"input_ids": []} + if DEBUG >= 4: print(f"\ninput_ids: {input_ids}") print(f"\nshard_hidden_states: {shard_hidden_states}\n") @@ -179,7 +161,7 @@ async def infer_prompt( return_values = ( input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), - json.dumps([cache_dict, cached_iids]), + json.dumps({"cached_iids": cached_iids}), is_finished ) @@ -206,18 +188,18 @@ async def infer_tensor( input_ids = torch.tensor(input_data).long().to(self.device) # get cache from inference_state - past_kvs, past_iids, cached_iids = self.infer_caching(inference_state) + past_iids, cached_iids = self.infer_caching(inference_state) # detect if hidden_states or not hidden_states = None + self.past_input_ids = None if input_ids.size()[-1] > 1: hidden_states = input_ids - #self.past_input_ids = None - #else: - if past_iids is not None: - self.past_input_ids = past_iids else: - self.past_input_ids = input_ids + if past_iids is not None: + self.past_input_ids = past_iids + else: + self.past_input_ids = input_ids if DEBUG >= 4: print(f"input_ids: {input_ids}") @@ -225,8 +207,7 @@ async def infer_tensor( shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( input_ids=self.past_input_ids, - hidden_states=hidden_states, - past_key_values=past_kvs + hidden_states=hidden_states ) hidden_dict = None @@ -238,27 +219,18 @@ async def infer_tensor( next_token = self.stateful_sharded_model.logits_sample(shard_logits) input_ids = next_token - #cache - if shard_past_kvs is not None: - cache_dict = { - 'key_cache': [tensor.tolist() for tensor in shard_past_kvs.key_cache], - 'value_cache': [tensor.tolist() for tensor in shard_past_kvs.value_cache] - } - else: - cache_dict = None - + #cache if self.past_input_ids is not None: next_cached_logits = torch.cat([self.past_input_ids, input_ids], dim=-1).to(self.device) cached_iids = {"input_ids": next_cached_logits.tolist()} - #stopping_critera = self.stateful_sharded_model.stopping_critera - #self.unfinished_sequences = self.unfinished_sequences & ~stopping_critera(input_ids, None) - is_finished = False if next_token is not None: is_finished = next_token.item() == self.tokenizer.eos_token_id - #is_finished = self.unfinished_sequences.max() == 0 or hit_eos + if is_finished: + # clear cache + cached_iids = {"input_ids": []} if DEBUG >= 4: print(f"\ninput_ids: {input_ids}") @@ -267,8 +239,8 @@ async def infer_tensor( print(f"\nshard_logits: {shard_logits}") return_values = ( - input_ids.numpy(force=True), #if shard_logits is not None else shard_hidden_states.numpy(force=True), - json.dumps([cache_dict, cached_iids]), + input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), + json.dumps({"cached_iids": cached_iids}), is_finished ) From b8f15a0e66c6a7a820b116f50b945cdafb8ea953 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 19 Sep 2024 04:00:14 -0800 Subject: [PATCH 366/589] removed clearning cache on infer prompt and only on finished infer tensor --- exo/inference/pytorch/inference.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index fa16ccb5e..441fc3ed2 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -149,10 +149,6 @@ async def infer_prompt( if next_token is not None: is_finished = next_token.item() == self.tokenizer.eos_token_id - if is_finished: - # clear cache - cached_iids = {"input_ids": []} - if DEBUG >= 4: print(f"\ninput_ids: {input_ids}") print(f"\nshard_hidden_states: {shard_hidden_states}\n") From d0f3cb77022dc024b1bfc9af58e729c0e5b2166b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 19 Sep 2024 04:08:36 -0800 Subject: [PATCH 367/589] hidden state dropping between nodes issue --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 441fc3ed2..835ee54a2 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -181,7 +181,7 @@ async def infer_tensor( await self.ensure_shard(shard) - input_ids = torch.tensor(input_data).long().to(self.device) + input_ids = torch.tensor(input_data).to(self.device) # get cache from inference_state past_iids, cached_iids = self.infer_caching(inference_state) From fa6f26350a7fcf524f49f9008b7f08e87d876d19 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 19 Sep 2024 04:22:04 -0800 Subject: [PATCH 368/589] hidden state dropping between nodes issue --- exo/inference/pytorch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 835ee54a2..0e23e9728 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -216,8 +216,8 @@ async def infer_tensor( input_ids = next_token #cache - if self.past_input_ids is not None: - next_cached_logits = torch.cat([self.past_input_ids, input_ids], dim=-1).to(self.device) + if next_token is not None: + next_cached_logits = torch.cat([self.past_input_ids, next_token], dim=-1).to(self.device) cached_iids = {"input_ids": next_cached_logits.tolist()} is_finished = False From 2b0e7b56f72b64e99e0ee32d65ed72bff6baf6e2 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 19 Sep 2024 04:26:45 -0800 Subject: [PATCH 369/589] hidden state dropping between nodes issue --- exo/inference/pytorch/inference.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 0e23e9728..f3036e788 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -217,7 +217,11 @@ async def infer_tensor( #cache if next_token is not None: - next_cached_logits = torch.cat([self.past_input_ids, next_token], dim=-1).to(self.device) + if self.past_input_ids is not None: + next_cached_logits = torch.cat([self.past_input_ids, next_token], dim=-1).to(self.device) + elif past_iids is not None: + next_cached_logits = torch.cat([past_iids, next_token], dim=-1).to(self.device) + cached_iids = {"input_ids": next_cached_logits.tolist()} is_finished = False From cee3e311809390c8d36c0360f3b5a763acb58b99 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 2 Oct 2024 10:37:17 -0800 Subject: [PATCH 370/589] cleaning up code, removing helpers.py --- exo/inference/pytorch/helpers.py | 24 --- exo/inference/pytorch/inference.py | 4 +- .../pytorch/model/archive/hf_manual.py | 203 ------------------ exo/inference/pytorch/model/hf.py | 28 +-- 4 files changed, 2 insertions(+), 257 deletions(-) delete mode 100644 exo/inference/pytorch/helpers.py delete mode 100644 exo/inference/pytorch/model/archive/hf_manual.py diff --git a/exo/inference/pytorch/helpers.py b/exo/inference/pytorch/helpers.py deleted file mode 100644 index addea2db7..000000000 --- a/exo/inference/pytorch/helpers.py +++ /dev/null @@ -1,24 +0,0 @@ -# Helper functions for pytorch inference -# Some code coming from tinygrad but written towards pytorch - -import asyncio -import aiohttp -from tqdm import tqdm -from pathlib import Path -from typing import List - -async def fetch_file_async(session, url: str, output_path: Path): - async with session.get(url) as response: - response.raise_for_status() - with open(output_path, 'wb') as f: - async for chunk in response.content.iter_chunked(8192): - f.write(chunk) - -async def download_files(urls: List[str], output_paths: List[Path]): - async with aiohttp.ClientSession() as session: - tasks = [] - for url, output_path in zip(urls, output_paths): - tasks.append(fetch_file_async(session, url, output_path)) - - for f in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Downloading files"): - await f diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index f3036e788..94cea1004 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -2,15 +2,13 @@ import numpy as np import torch import json -import gc + from typing import Optional, Tuple from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel from exo.api.chatgpt_api import resolve_tokenizer from exo.helpers import DEBUG -from transformers import DynamicCache, Cache -from accelerate import disk_offload from exo.download.shard_download import ShardDownloader # model value options diff --git a/exo/inference/pytorch/model/archive/hf_manual.py b/exo/inference/pytorch/model/archive/hf_manual.py deleted file mode 100644 index e5af2eaf8..000000000 --- a/exo/inference/pytorch/model/archive/hf_manual.py +++ /dev/null @@ -1,203 +0,0 @@ -# Attempted version to recreate manually using LlamaModel and others -# BROKEN -import torch -import numpy as np -from transformers import AutoModelForCausalLM, DynamicCache, Cache, AutoModel -from exo.inference.shard import Shard -from exo.helpers import DEBUG -from typing import Tuple, Optional, Union, List -from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask -from exo.inference.pytorch.model.archive.utils import sample_logits - -TOP_P = 0.7 #0.95 -TOP_K = 50 -TEMP = 0.01 - - -class ShardedHuggingFaceModel(torch.nn.Module): - def __init__(self, shard: Shard): - super(ShardedHuggingFaceModel, self).__init__() - - if torch.cuda.is_available(): - self.device = torch.device("cuda") - else: - self.device = torch.device("cpu") - - self.shard = shard - - # Load the model - try: - self.base_model = AutoModel.from_pretrained( - shard.model_id, - torch_dtype=torch.float32, - device_map="auto", - # offload_buffers=True - ) - - # disk_offload(model=self.base_model, offload_dir="./.offload") - except Exception as err: - print(f"Error loading model: {err}") - raise - - if DEBUG >= 2: - print(f"\nShardedHuggingFaceModel init with shard {shard}") - print(f"self.base_model: {self.base_model}") - - # Embeddings and final layer norm - # used for doing what forward LlamaModel does in transformers - self.norm = self.base_model.norm - self.lm_head = torch.nn.Linear( - self.base_model.config.hidden_size, - self.base_model.config.vocab_size, - bias=False - ).to(self.device) - self.embed_tokens = self.base_model.embed_tokens - - def forward( - self, - input_ids: torch.tensor, - attention_mask: torch.tensor = None, - past_kvs: Optional[Union[Cache, List[torch.FloatTensor]]] = None, - ) -> Tuple[np.ndarray, any]: - """ - Forward through layers using the base model - - Args: - input_ids: tensor input - attention_mask: attention mask from tokenizer - past_kvs: past key value stores for cache - - Returns: - hidden_states: numpy of states between layers - or logits: numpy of normalization and linearization of last hidden state - past_kvs: DynamicCache of past key values if use_cache is true - - Ref: - https://github.com/huggingface/transformers/blob/v4.44.2/src/transformers/models/qwen2/modeling_qwen2.py#L804 - https://github.com/huggingface/transformers/blob/v4.44.2/src/transformers/models/llama/modeling_llama.py#L887 - """ - if DEBUG >= 4: - print("forward called") - print(f"input_ids: {input_ids}\n") - print(f"layer_count: {self.shard.get_layer_count()}") - print(f"is_first_layer: {self.shard.is_first_layer()}") - print(f"is_last_layer: {self.shard.is_last_layer()}") - - if self.shard.is_first_layer(): - if DEBUG >= 2: - print("first layer, embed") - print(f"input_ids: {input_ids}") - input_ids = self.embed_tokens(input_ids) - - if DEBUG >= 2: - print(f"embeded input_ids: {input_ids}") - - if attention_mask == None: - # get attention mask - past_kv_length = len(past_kvs) - batch_size, seq_length = input_ids.shape[:2] - attention_mask = _prepare_4d_causal_attention_mask( - None, (batch_size, seq_length), input_ids, past_kv_length - ) - - past_kvs = DynamicCache.from_legacy_cache(past_kvs) - past_seen_tokens = past_kvs.get_seq_length() if past_kvs is not None else 0 - cache_position = torch.arange( - past_seen_tokens, - past_seen_tokens + input_ids.shape[1], - device=self.device - ) - - position_ids = cache_position.unsqueeze(0).to(self.device) - - try: - position_embeddings = self.base_model.rotary_emb( - input_ids, - position_ids - ) - except Exception as err: - print(f"rotary_emb not found in base_model") - position_embeddings = None - - causal_mask = self.base_model._update_causal_mask( - attention_mask, - input_ids, - cache_position, - past_kvs, - self.base_model.config.output_attentions - ) - - # progress through layers - for i in range(self.shard.start_layer, self.shard.end_layer + 1): - decoder_layer = self.base_model.layers[i] - - if DEBUG >= 4: - print("Going through layer") - print(f"{decoder_layer}") - print("input_ids") - print(f"{input_ids}") - print("causal_mask") - print(f"{causal_mask}") - - try: - layer_outputs = decoder_layer( - input_ids, - attention_mask=causal_mask, - position_ids=position_ids, - position_embeddings=position_embeddings, - past_key_value=past_kvs, - use_cache=True, - cache_position=cache_position, - output_logits=True - ) - except Exception as err: - print(f"Going through layer failed: {err}") - print(err.__traceback__.tb_lineno) - raise - - hidden_states = layer_outputs[0] - next_kvs = layer_outputs[1] - - if DEBUG >= 3: - print(f"layer_outputs {layer_outputs}") - print(layer_outputs[1:]) - - if self.shard.is_last_layer(): - hs_norm = self.norm(hidden_states).to(self.device) - # hs_lm_head = self.base_model.lm_head(hs_norm).float() - - # Use the sampling function with default settings - with torch.no_grad(): - logits = self.lm_head( - hs_norm[:, -1:, :] - ).to(self.device).float() - - if DEBUG >= 2: - print(f"hs_norm: {hs_norm}") - # print(f"hs_lm_head: {hs_lm_head}") - print(f"logits: {logits}") - print(f"logits.shape: {logits.shape}") - - # output_token = sample_logits( - # logits, - # TEMP, - # TOP_P, - # TOP_K - # ).unsqueeze(0).unsqueeze(0).long() - - output_token = torch.distributions.Categorical( - logits=logits - ).sample(sample_shape=(1,)) - - if DEBUG >= 2: - print(f"output_token: {output_token}") - - return (output_token.numpy(force=True), next_kvs) - - with torch.no_grad(): - out_hidden_states = hidden_states.float().numpy(force=True) - - return ( - out_hidden_states, - next_kvs - ) \ No newline at end of file diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 1481c40cc..1b617d7cd 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -5,38 +5,20 @@ from exo.inference.shard import Shard from exo.helpers import DEBUG -from exo.inference.inference_engine import InferenceEngine -from exo.download.shard_download import ShardDownloader from transformers import ( - AutoModel, AutoModelForCausalLM, - AutoTokenizer, DynamicCache, Cache, LogitsProcessorList, - #MinLengthLogitsProcessor, - LogitsWarper, TopKLogitsWarper, TopPLogitsWarper, - TemperatureLogitsWarper, - StoppingCriteriaList, - MaxLengthCriteria, - MaxTimeCriteria -) - -from transformers.generation.configuration_utils import ( - GenerationConfig, - GenerationMode + TemperatureLogitsWarper ) # llama from transformers.models.llama.modeling_llama import LlamaModel -# qwen2 -from transformers.models.qwen2.modeling_qwen2 import Qwen2Model - - class ShardedHuggingFaceModel: def __init__( self, @@ -68,14 +50,6 @@ def __init__( TopPLogitsWarper(top_p) ]) - # setup stopping critera for generation - self.stopping_critera = StoppingCriteriaList( - [ - #MaxLengthCriteria(max_length=max_length), - MaxTimeCriteria(max_time=max_time), - ] - ) - self.device = device self.torch_dtype = dtype From 57e14e8cbf6e3d2dbae632a70b8b53c4ec14efcb Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 11:08:19 -0800 Subject: [PATCH 371/589] adding needed libs to setup.py, fixing 4 space to 2 space issue, adding in hf downloader to inference engine, testing --- .gitignore | 3 + exo/inference/pytorch/inference.py | 533 +++++++++--------- exo/inference/pytorch/model/hf.py | 30 +- .../pytorch/tests/test_inference_engine.py | 283 ++++------ .../pytorch/tests/test_split_model.py | 25 +- exo/models.py | 12 +- exo/tinychat/index.html | 1 + setup.py | 2 + 8 files changed, 438 insertions(+), 451 deletions(-) diff --git a/.gitignore b/.gitignore index f5609f311..33907f700 100644 --- a/.gitignore +++ b/.gitignore @@ -173,3 +173,6 @@ cython_debug/ # PyTorch interface .offload + +# neovim/vim settings +.vimrc diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 94cea1004..2f87c1b10 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -1,4 +1,5 @@ # experimental, based off of tinygrad/inference.py +import os import numpy as np import torch import json @@ -9,9 +10,9 @@ from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel from exo.api.chatgpt_api import resolve_tokenizer from exo.helpers import DEBUG -from exo.download.shard_download import ShardDownloader +from exo.download.hf.hf_shard_download import HFShardDownloader -# model value options +# model value options TOP_K = 20 TEMP = 0.6 TOP_P = 0.9 @@ -19,267 +20,273 @@ MAX_TIME = 60.0 class PyTorchDynamicShardInferenceEngine(InferenceEngine): + """ + PyTorch Dynamic Shard Inference Engine for performing model inference with sharded Pytorch/HF based models. + """ + + def __init__(self, shard_downloader: HFShardDownloader): + """ + Initialize the inference engine. + + Args: + debug (bool): If True, enables debug logging. Defaults to False. """ - PyTorch Dynamic Shard Inference Engine for performing model inference with sharded Pytorch/HF based models. + self.shard = None + self.shard_downloader = shard_downloader + self.stateful_sharded_model = None + self.tokenizer = None + + # the whole history with new logits need to + # be passed to the model to reach the end token + # even with caching + self.past_input_ids = None + + # setup cuda device + if os.environ.get("PYTORCH_DEVICE"): + pytorch_device = os.environ["PYTOCH_DEVICE"] + if pytorch_device not in ["cuda", "mps", "cpu"]: + pytorch_device = "cpu" + + self.device = pytorch_device + self.torch_dtype = torch.float32 if pytorch_device != "cpu" else torch.float16 + + if torch.cuda.is_available(): + self.device = torch.device("cuda") + self.torch_dtype = torch.float32 + elif torch.backends.mps.is_available(): + self.device = torch.device("mps") + self.torch_dtype = torch.float32 + else: + self.device = torch.device("cpu") + self.torch_dtype = torch.float16 + + # setup unfinished sequence + self.unfinished_sequences = torch.ones(1, dtype=torch.long, device=self.device) + + def infer_caching( + self, + inference_state: Optional[str] = None + ) -> Tuple[Optional[torch.tensor], Optional[dict]]: """ + inference caching from inference_state json + """ + # setup cache and cached input_ids + past_iids = None + cached_iids = None + if inference_state is not None: + try: + infer_state = json.loads(inference_state) + except ValueError: + infer_state = None + + if infer_state is not None: + cached_iids = infer_state["cached_iids"] + if cached_iids is not None: + past_iids = None + if len(cached_iids) > 0: + past_iids = torch.tensor(cached_iids["input_ids"]).to(self.device) + cached_iids = {"input_ids": past_iids.tolist()} + + if DEBUG >= 4: + print(f"cached_iids: {cached_iids}") + + return (past_iids, cached_iids) + + async def infer_prompt( + self, + request_id: Optional[str] = None, + shard: Optional[Shard] = None, + prompt: Optional[str] = "", + image_str: Optional[str] = None, + inference_state: Optional[str] = None + ) -> Tuple[np.ndarray, str, bool]: + if DEBUG >= 4: + print("infer_prompt called") + print(f"prompt: {prompt}") + print(f"shard: {shard}") + print(f"inference_state: {inference_state}") + + await self.ensure_shard(shard) + + # setup prompt input + messages = [{"role": "user", "content": prompt}] + txt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) + + inputs = self.tokenizer([txt], return_tensors="pt") + input_ids = inputs.input_ids.to(self.device) + input_attention_mask = inputs.attention_mask.to(self.device) + batch_size, seq_length = input_ids.shape[:2] + + # get cache from inference_state + past_iids, cached_iids = self.infer_caching(inference_state) + + if past_iids is not None: + self.past_input_ids = past_iids, + else: + self.past_input_ids = input_ids + + if DEBUG >= 4: + print(f"past_input_ids: {self.past_input_ids}\n") + + shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( + input_ids=self.past_input_ids, + attention_mask=input_attention_mask + ) + + if DEBUG >= 4: + print(f"\nshard_hidden_states: {shard_hidden_states}\n") + print(f"\nshard_past_kvs {shard_past_kvs}\n") + print(f"\nshard_logits: {shard_logits}") + + next_token = None + if shard_logits is not None: + next_token = self.stateful_sharded_model.logits_sample(shard_logits) + self.past_input_ids = torch.cat([input_ids, next_token[:, None].squeeze(-1)], dim=-1) + input_ids = next_token + + if self.past_input_ids is not None: + cached_iids = {"input_ids": self.past_input_ids.tolist()} + + is_finished = False + if next_token is not None: + is_finished = next_token.item() == self.tokenizer.eos_token_id + + if DEBUG >= 4: + print(f"\ninput_ids: {input_ids}") + print(f"\nshard_hidden_states: {shard_hidden_states}\n") + print(f"\nshard_past_kvs {shard_past_kvs}\n") + print(f"\nshard_logits: {shard_logits}") + + return_values = ( + input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), + json.dumps({"cached_iids": cached_iids}), + is_finished + ) + + if DEBUG >= 4: + print(f"return_values: {return_values}") + + return return_values + + async def infer_tensor( + self, + request_id: str, + shard: Shard, + input_data: np.ndarray, + inference_state: Optional[str] = None + ) -> Tuple[np.ndarray, str, bool]: + if DEBUG >= 4: + print("infer_tensor called") + print(f"input_data: {input_data}") + print(f"shard: {shard}") + print(f"inference_state: {inference_state}") + + await self.ensure_shard(shard) + + input_ids = torch.tensor(input_data).to(self.device) + + # get cache from inference_state + past_iids, cached_iids = self.infer_caching(inference_state) + + # detect if hidden_states or not + hidden_states = None + self.past_input_ids = None + if input_ids.size()[-1] > 1: + hidden_states = input_ids + else: + if past_iids is not None: + self.past_input_ids = past_iids + else: + self.past_input_ids = input_ids + + if DEBUG >= 4: + print(f"input_ids: {input_ids}") + print(f"inference_state: {inference_state}") + + shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( + input_ids=self.past_input_ids, + hidden_states=hidden_states + ) + + hidden_dict = None + if shard_hidden_states is not None: + hidden_dict = {"hidden_states": shard_hidden_states.tolist()} + + next_token = None + if shard_logits is not None: + next_token = self.stateful_sharded_model.logits_sample(shard_logits) + input_ids = next_token + + #cache + if next_token is not None: + if self.past_input_ids is not None: + next_cached_logits = torch.cat([self.past_input_ids, next_token], dim=-1).to(self.device) + elif past_iids is not None: + next_cached_logits = torch.cat([past_iids, next_token], dim=-1).to(self.device) + + cached_iids = {"input_ids": next_cached_logits.tolist()} + + is_finished = False + if next_token is not None: + is_finished = next_token.item() == self.tokenizer.eos_token_id + + if is_finished: + # clear cache + cached_iids = {"input_ids": []} + + if DEBUG >= 4: + print(f"\ninput_ids: {input_ids}") + print(f"\nshard_hidden_states: {shard_hidden_states}\n") + print(f"\nshard_past_kvs {shard_past_kvs}\n") + print(f"\nshard_logits: {shard_logits}") + + return_values = ( + input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), + json.dumps({"cached_iids": cached_iids}), + is_finished + ) + + if DEBUG >= 4: + print(f"return_values: {return_values}") + + return return_values + + + async def ensure_shard(self, shard: Optional[Shard]): + """ + Ensure the model shard is loaded and ready for inference. - def __init__(self, shard_downloader: ShardDownloader): - """ - Initialize the inference engine. - - Args: - debug (bool): If True, enables debug logging. Defaults to False. - """ - self.shard = None - self.shard_downloader = shard_downloader - self.stateful_sharded_model = None - self.tokenizer = None - - # the whole history with new logits need to - # be passed to the model to reach the end token - # even with caching - self.past_input_ids = None - - # setup cuda device - if torch.cuda.is_available(): - self.device = torch.device("cuda") - self.torch_dtype = torch.float32 - elif torch.backends.mps.is_available(): - self.device = torch.device("mps") - self.torch_dtype = torch.float32 - else: - self.device = torch.device("cpu") - self.torch_dtype = torch.float16 - - # setup unfinished sequence - self.unfinished_sequences = torch.ones(1, dtype=torch.long, device=self.device) - - def infer_caching( - self, - inference_state: Optional[str] = None - ) -> Tuple[Optional[torch.tensor], Optional[dict]]: - """ - inference caching from inference_state json - """ - # setup cache and cached input_ids - past_iids = None - cached_iids = None - if inference_state is not None: - try: - infer_state = json.loads(inference_state) - except ValueError: - infer_state = None - - if infer_state is not None: - cached_iids = infer_state["cached_iids"] - if cached_iids is not None: - past_iids = None - if len(cached_iids) > 0: - past_iids = torch.tensor(cached_iids["input_ids"]).to(self.device) - cached_iids = {"input_ids": past_iids.tolist()} - - if DEBUG >= 4: - print(f"cached_iids: {cached_iids}") - - return (past_iids, cached_iids) - - - async def infer_prompt( - self, - request_id: str, - shard: Optional[Shard] = None, - prompt: str = "", - image_str: Optional[str] = None, - inference_state: Optional[str] = None - ) -> Tuple[np.ndarray, str, bool]: - if DEBUG >= 4: - print("infer_prompt called") - print(f"prompt: {prompt}") - print(f"shard: {shard}") - print(f"inference_state: {inference_state}") - - await self.ensure_shard(shard) - - # setup prompt input - messages = [{"role": "user", "content": prompt}] - txt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True - ) - - inputs = self.tokenizer([txt], return_tensors="pt") - input_ids = inputs.input_ids.to(self.device) - input_attention_mask = inputs.attention_mask.to(self.device) - batch_size, seq_length = input_ids.shape[:2] - - # get cache from inference_state - past_iids, cached_iids = self.infer_caching(inference_state) - - if past_iids is not None: - self.past_input_ids = past_iids, - else: - self.past_input_ids = input_ids - - if DEBUG >= 4: - print(f"past_input_ids: {self.past_input_ids}\n") - - shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( - input_ids=self.past_input_ids, - attention_mask=input_attention_mask - ) - - if DEBUG >= 4: - print(f"\nshard_hidden_states: {shard_hidden_states}\n") - print(f"\nshard_past_kvs {shard_past_kvs}\n") - print(f"\nshard_logits: {shard_logits}") - - next_token = None - if shard_logits is not None: - next_token = self.stateful_sharded_model.logits_sample(shard_logits) - self.past_input_ids = torch.cat([input_ids, next_token[:, None].squeeze(-1)], dim=-1) - input_ids = next_token - - if self.past_input_ids is not None: - cached_iids = {"input_ids": self.past_input_ids.tolist()} - - is_finished = False - if next_token is not None: - is_finished = next_token.item() == self.tokenizer.eos_token_id - - if DEBUG >= 4: - print(f"\ninput_ids: {input_ids}") - print(f"\nshard_hidden_states: {shard_hidden_states}\n") - print(f"\nshard_past_kvs {shard_past_kvs}\n") - print(f"\nshard_logits: {shard_logits}") - - return_values = ( - input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), - json.dumps({"cached_iids": cached_iids}), - is_finished - ) - - if DEBUG >= 4: - print(f"return_values: {return_values}") - - return return_values - - async def infer_tensor( - self, - request_id: str, - shard: Shard, - input_data: np.ndarray, - inference_state: Optional[str] = None - ) -> Tuple[np.ndarray, str, bool]: - if DEBUG >= 4: - print("infer_tensor called") - print(f"input_data: {input_data}") - print(f"shard: {shard}") - print(f"inference_state: {inference_state}") - - await self.ensure_shard(shard) - - input_ids = torch.tensor(input_data).to(self.device) - - # get cache from inference_state - past_iids, cached_iids = self.infer_caching(inference_state) - - # detect if hidden_states or not - hidden_states = None - self.past_input_ids = None - if input_ids.size()[-1] > 1: - hidden_states = input_ids - else: - if past_iids is not None: - self.past_input_ids = past_iids - else: - self.past_input_ids = input_ids - - if DEBUG >= 4: - print(f"input_ids: {input_ids}") - print(f"inference_state: {inference_state}") - - shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( - input_ids=self.past_input_ids, - hidden_states=hidden_states - ) - - hidden_dict = None - if shard_hidden_states is not None: - hidden_dict = {"hidden_states": shard_hidden_states.tolist()} - - next_token = None - if shard_logits is not None: - next_token = self.stateful_sharded_model.logits_sample(shard_logits) - input_ids = next_token - - #cache - if next_token is not None: - if self.past_input_ids is not None: - next_cached_logits = torch.cat([self.past_input_ids, next_token], dim=-1).to(self.device) - elif past_iids is not None: - next_cached_logits = torch.cat([past_iids, next_token], dim=-1).to(self.device) - - cached_iids = {"input_ids": next_cached_logits.tolist()} - - is_finished = False - if next_token is not None: - is_finished = next_token.item() == self.tokenizer.eos_token_id - - if is_finished: - # clear cache - cached_iids = {"input_ids": []} - - if DEBUG >= 4: - print(f"\ninput_ids: {input_ids}") - print(f"\nshard_hidden_states: {shard_hidden_states}\n") - print(f"\nshard_past_kvs {shard_past_kvs}\n") - print(f"\nshard_logits: {shard_logits}") - - return_values = ( - input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), - json.dumps({"cached_iids": cached_iids}), - is_finished - ) - - if DEBUG >= 4: - print(f"return_values: {return_values}") - - return return_values - - - async def ensure_shard(self, shard: Optional[Shard]): - """ - Ensure the model shard is loaded and ready for inference. - - Args: - shard (Optional[Shard]): Shard information for the model. - """ - if self.shard == shard: - return - - if DEBUG >= 4: - print(f"Loading new shard: {shard}") - - # -- TO DO -- - # Build in shard downloader but requires pulling - # apart how TrainedModel loads weight in its __init__ - # function in the transformer library - # model_path = await self.shard_downloader.ensure_shard(shard) - - self.tokenizer = await resolve_tokenizer(shard.model_id) - self.stateful_sharded_model = ShardedHuggingFaceModel( - shard=shard, - device=self.device, - dtype=self.torch_dtype, - top_k=TOP_K, - temp=TEMP, - top_p=TOP_P, - max_length=MAX_LENGTH, - max_time=MAX_TIME - ) - - self.shard = shard - - if DEBUG >= 4: - print(f"Shard loaded successfully: {shard}") + Args: + shard (Optional[Shard]): Shard information for the model. + """ + if self.shard == shard: + return + + if DEBUG >= 4: + print(f"Loading new shard: {shard}") + + model_path = await self.shard_downloader.ensure_shard(shard) + if DEBUG >= 4: + print(f"model_path: {model_path}") + + self.tokenizer = await resolve_tokenizer(shard.model_id) + self.stateful_sharded_model = ShardedHuggingFaceModel( + shard=shard, + local_model_path=model_path, + device=self.device, + dtype=self.torch_dtype, + top_k=TOP_K, + temp=TEMP, + top_p=TOP_P, + max_length=MAX_LENGTH, + max_time=MAX_TIME + ) + + self.shard = shard + + if DEBUG >= 4: + print(f"Shard loaded successfully: {shard}") diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 1b617d7cd..38cd85c20 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -23,7 +23,8 @@ class ShardedHuggingFaceModel: def __init__( self, shard: Shard, - device, + local_model_path, + device, dtype, top_k: int = 25, temp: float = 0.7, @@ -31,19 +32,20 @@ def __init__( max_length: int = 50, max_time: float = 10.0 ): - # class vars + # class vars self.shard = shard - self.hidden_states = None + self.hidden_states = None self.input_ids = None self.inputs_embeds = None self.attention_mask = None - self.position_embeddings = None - self.past_key_values = None - self.cache_position = None - self.position_ids = None + self.position_embeddings = None + self.past_key_values = None + self.cache_position = None + self.position_ids = None self.causal_mask = None + self.local_model_path = local_model_path - # setup logit processors + # setup logit processors self.logits_processor = LogitsProcessorList([ TopKLogitsWarper(top_k), TemperatureLogitsWarper(temp), @@ -56,13 +58,13 @@ def __init__( # setup pytorch and transformer llm try: self.llm_model = AutoModelForCausalLM.from_pretrained( - shard.model_id, + pretrained_model_name_or_path=self.local_model_path, torch_dtype=self.torch_dtype, device_map="auto", offload_buffers=True ) - self.model = self.llm_model.model + self.model = self.llm_model.model except Exception as err: print(f"error loading and splitting model: {err}") raise @@ -70,7 +72,6 @@ def __init__( def forward( self, - shard: Optional[Shard] = None, input_ids: Optional[torch.tensor] = None, hidden_states: Optional[torch.tensor] = None, attention_mask: Optional[torch.tensor] = None, @@ -93,7 +94,7 @@ def forward( infer_tensor: bool optional, lets forward know to handle tensors Returns: - Tuple of + Tuple of - hidden_states: tensor optional - past_key_values: Cache or list[tensor] optional - logits: tensor Optional @@ -199,9 +200,8 @@ def forward( print(f"hidden_states: {self.hidden_states}") print(f"next_decoder_cache: {self.next_decoder_cache}") - # handle last layer to get logits - # shard is last layer says true at the start and not detecting last layer correctly + # shard is last layer says true at the start and not detecting last layer correctly if self.shard.is_last_layer(): self.hidden_states = self.model.norm(self.hidden_states) if use_legacy_cache: @@ -209,7 +209,7 @@ def forward( else: self.past_key_values = self.next_decoder_cache - # lm_head + # lm_head logits = self.llm_model.lm_head(self.hidden_states).to(self.device) if DEBUG >= 4: diff --git a/exo/inference/pytorch/tests/test_inference_engine.py b/exo/inference/pytorch/tests/test_inference_engine.py index 7e64c137a..854d9b9c9 100644 --- a/exo/inference/pytorch/tests/test_inference_engine.py +++ b/exo/inference/pytorch/tests/test_inference_engine.py @@ -11,164 +11,131 @@ import time async def test_inference_engine( - inference_engine_1: InferenceEngine, - inference_engine_2: InferenceEngine, - model_id: str, - n_layers: int): - - # prompt = "Why is the sky blue?" - prompt = "In a single word only, what is the last name of the current president of the USA?" - - shard = Shard( - model_id=model_id, - start_layer=0, - end_layer=n_layers-1, - n_layers=n_layers - ) - - resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( - "A", - shard=shard, - prompt=prompt - ) - - print("\n------------resp_full---------------\n") - print(resp_full) - print("\n------------resp_full---------------\n") - - time.sleep(5) - - next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( - "A", - shard=shard, - input_data=resp_full, - inference_state=inference_state_full, - ) - - print("\n------------next_resp_full---------------\n") - print(next_resp_full) - print("\n------------next_resp_full---------------\n") - - time.sleep(5) - - pp = int(n_layers/2) - - resp_shard = Shard( - model_id=model_id, - start_layer=0, - end_layer=pp, - n_layers=n_layers - ) - - resp_shard2 = Shard( - model_id=model_id, - start_layer=pp + 1, - end_layer=n_layers-1, - n_layers=n_layers - ) - - resp1, inference_state_1, _ = await inference_engine_1.infer_prompt( - "B", - shard=resp_shard, - prompt=prompt - ) - - print("\n------------resp1---------------\n") - print(resp1) - print("\n------------resp1---------------\n") - - time.sleep(5) - - - resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( - "B", - shard=resp_shard2, - input_data=resp1, - inference_state=inference_state_1, - ) - - print("\n------------resp2---------------\n") - print(resp2) - print("\n------------resp2---------------\n") - - resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( - "B", - shard=resp_shard, - input_data=resp2, - inference_state=inference_state_2, - ) - - print("\n------------resp3---------------\n") - print(resp3) - print("\n------------resp3---------------\n") - - resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( - "B", - shard=resp_shard2, - input_data=resp3, - inference_state=inference_state_3, - ) - - print("\n------------resp4---------------\n") - print(resp4) - print("\n------------resp4---------------\n") - - assert np.array_equal(resp_full, resp2) - assert np.array_equal(next_resp_full, resp4) + inference_engine_1: InferenceEngine, + inference_engine_2: InferenceEngine, + model_id: str, + n_layers: int): + + # prompt = "Why is the sky blue?" + prompt = "In a single word only, what is the last name of the current president of the USA?" + + shard = Shard( + model_id=model_id, + start_layer=0, + end_layer=n_layers-1, + n_layers=n_layers + ) + + resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( + "A", + shard=shard, + prompt=prompt + ) + + print("\n------------resp_full---------------\n") + print(resp_full) + print("\n------------resp_full---------------\n") + + time.sleep(5) + + next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( + "A", + shard=shard, + input_data=resp_full, + inference_state=inference_state_full, + ) + + print("\n------------next_resp_full---------------\n") + print(next_resp_full) + print("\n------------next_resp_full---------------\n") + + time.sleep(5) + + pp = int(n_layers/2) + + resp_shard = Shard( + model_id=model_id, + start_layer=0, + end_layer=pp, + n_layers=n_layers + ) + + resp_shard2 = Shard( + model_id=model_id, + start_layer=pp + 1, + end_layer=n_layers-1, + n_layers=n_layers + ) + + resp1, inference_state_1, _ = await inference_engine_1.infer_prompt( + "B", + shard=resp_shard, + prompt=prompt + ) + + print("\n------------resp1---------------\n") + print(resp1) + print("\n------------resp1---------------\n") + + time.sleep(5) + + + resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( + "B", + shard=resp_shard2, + input_data=resp1, + inference_state=inference_state_1, + ) + + print("\n------------resp2---------------\n") + print(resp2) + print("\n------------resp2---------------\n") + + resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( + "B", + shard=resp_shard, + input_data=resp2, + inference_state=inference_state_2, + ) + + print("\n------------resp3---------------\n") + print(resp3) + print("\n------------resp3---------------\n") + + resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( + "B", + shard=resp_shard2, + input_data=resp3, + inference_state=inference_state_3, + ) + + print("\n------------resp4---------------\n") + print(resp4) + print("\n------------resp4---------------\n") + + assert np.array_equal(resp_full, resp2) + assert np.array_equal(next_resp_full, resp4) if __name__ == '__main__': - try: - print(f"\n\n -------- TEST QWEN2 -------- \n\n") - asyncio.run(test_inference_engine( - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "Qwen/Qwen2-0.5B-Instruct", - 24 - )) - except Exception as err: - print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") - - # try: - # print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "andrijdavid/Llama3-1B-Base", - # 3 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! LLAMA3-1B-Base TEST FAILED \n{err}\n") - - # try: - # print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "meta-llama/Meta-Llama-3.1-8B", - # 32 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") - - # try: - # print(f"\n\n ------- TEST Chickaboo/ChickaQ-Large -----\n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "Chickaboo/ChickaQ-Large", - # 24 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! Chickaboo/ChickaQ-Large TEST FAILED \n{err}\n") - - #try: - # print(f"\n\n --------- TEST TinyLlama/TinyLlama_v1.1 -------\n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "TinyLlama/TinyLlama_v1.1", - # 22 - # )) - #except Exception as err: - # print(f"\n\n !!!!!!!!!!! TinyLlama/TinyLlama_v1.1 TEST FAILED \n{err}\n") + # try: + # print("\n\n -------- TEST QWEN2 -------- \n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "Qwen/Qwen2-0.5B-Instruct", + # 24 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") + + try: + print("\n-------- Test meta-llama/Llama-3.2-1B-Instruct ----------\n") + asyncio.run(test_inference_engine( + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + "meta-llama/Llama-3.2-1B-Instruct", + 24 + )) + except Exception as err: + print(f"\n\n !!!!!!!!!!! meta-llama/Llama-3.2-1B-Instruct TEST FAILED \n{err}\n") diff --git a/exo/inference/pytorch/tests/test_split_model.py b/exo/inference/pytorch/tests/test_split_model.py index 827bdec2e..157a215d1 100644 --- a/exo/inference/pytorch/tests/test_split_model.py +++ b/exo/inference/pytorch/tests/test_split_model.py @@ -3,14 +3,11 @@ import asyncio import gc from transformers import ( - AutoModel, AutoModelForCausalLM, AutoTokenizer, DynamicCache, Cache, LogitsProcessorList, - #MinLengthLogitsProcessor, - LogitsWarper, TopKLogitsWarper, TopPLogitsWarper, TemperatureLogitsWarper, @@ -286,8 +283,8 @@ async def model_half_split_test(prompt: str, model_id: str, layers: int): stopping_critera = StoppingCriteriaList( [ - MaxLengthCriteria(max_length=50), - MaxTimeCriteria(max_time=10.0), + MaxLengthCriteria(max_length=255), + MaxTimeCriteria(max_time=100.0), ] ) @@ -355,9 +352,21 @@ async def model_half_split_test(prompt: str, model_id: str, layers: int): # ) #) - print("\n-------- Test Qwen/Qwen2-0.5B-Instruct ----------\n") - model_id = "Qwen/Qwen2-0.5B-Instruct" - model_layers = 24 + #print("\n-------- Test Qwen/Qwen2-0.5B-Instruct ----------\n") + #model_id = "Qwen/Qwen2-0.5B-Instruct" + #model_layers = 24 + + #asyncio.run( + # model_half_split_test( + # prompt=prompt, + # model_id=model_id, + # layers=model_layers + # ) + #) + + print("\n-------- Test meta-llama/Llama-3.2-1B-Instruct ----------\n") + model_id = "meta-llama/Llama-3.2-1B-Instruct" + model_layers = 32 asyncio.run( model_half_split_test( diff --git a/exo/models.py b/exo/models.py index 67ea81c41..6f69960ea 100644 --- a/exo/models.py +++ b/exo/models.py @@ -36,8 +36,8 @@ "llama-3-1B-Base": { "PyTorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-1B-Base", start_layer=0, end_layer=0, n_layers=3), }, - "TinyLlama-1.1B-Chat-yaw": { - "PyTorchDynamicShardInferenceEngine": Shard(model_id="ambrosfitz/TinyLlama-1.1B-Chat-yawp", start_layer=0, end_layer=0, n_layers=22), + "meta-llama/Llama-3.2-1B-Instruct": { + "PyTorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Llama-3.2-1B-Instruct", start_layer=0, end_layer=0, n_layers=24), }, ### mistral "mistral-nemo": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Nemo-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=40),}, @@ -47,11 +47,6 @@ "deepseek-coder-v2.5": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/DeepSeek-V2.5-MLX-AQ4_1_64", start_layer=0, end_layer=0, n_layers=60),}, ### llava "llava-1.5-7b-hf": {"MLXDynamicShardInferenceEngine": Shard(model_id="llava-hf/llava-1.5-7b-hf", start_layer=0, end_layer=0, n_layers=32),}, - ### qwen - "Qwen2-0.5B-Instruct": { - "PyTorchDynamicShardInferenceEngine": Shard(model_id="Qwen/Qwen2-0.5B-Instruct", start_layer=0, end_layer=0, n_layers=24), - }, - ### qwen "qwen-2.5-coder-1.5b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-1.5B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28), @@ -74,4 +69,7 @@ "qwen-2.5-math-72b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Math-72B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), }, + "Qwen2-0.5B-Instruct": { + "PyTorchDynamicShardInferenceEngine": Shard(model_id="Qwen/Qwen2-0.5B-Instruct", start_layer=0, end_layer=0, n_layers=24), + }, } diff --git a/exo/tinychat/index.html b/exo/tinychat/index.html index 9cad69d58..c00d2b0a6 100644 --- a/exo/tinychat/index.html +++ b/exo/tinychat/index.html @@ -38,6 +38,7 @@ + diff --git a/setup.py b/setup.py index 75d570e9f..8401167be 100644 --- a/setup.py +++ b/setup.py @@ -26,6 +26,8 @@ "transformers==4.43.3", "uuid==1.30", "tinygrad @ git+https://github.com/tinygrad/tinygrad.git@232edcfd4f8b388807c64fb1817a7668ce27cbad", + "torch==2.4.0+cu124", + "accelerate=0.33.0" ] # Add macOS-specific packages if on Darwin (macOS) From 9fe3ec63dd26b78d9c27e3bcb17f72a79c7ee977 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 11:51:33 -0800 Subject: [PATCH 372/589] cleaning up code, added pytorch engine to llama 3.2 1b model shard in models.py, removed old 3.2 1b model shard, moving to test server for more vram --- exo/inference/pytorch/inference.py | 21 ++++++++++++--------- exo/inference/pytorch/model/hf.py | 2 +- exo/models.py | 4 +--- exo/tinychat/index.html | 1 - setup.py | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 2f87c1b10..8264aae83 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -12,6 +12,9 @@ from exo.helpers import DEBUG from exo.download.hf.hf_shard_download import HFShardDownloader +# llama +from transformers.models.llama.modeling_llama import LlamaModel + # model value options TOP_K = 20 TEMP = 0.6 @@ -52,7 +55,7 @@ def __init__(self, shard_downloader: HFShardDownloader): if torch.cuda.is_available(): self.device = torch.device("cuda") - self.torch_dtype = torch.float32 + self.torch_dtype = torch.float16 elif torch.backends.mps.is_available(): self.device = torch.device("mps") self.torch_dtype = torch.float32 @@ -105,10 +108,10 @@ async def infer_prompt( print(f"prompt: {prompt}") print(f"shard: {shard}") print(f"inference_state: {inference_state}") - + await self.ensure_shard(shard) - - # setup prompt input + + # setup prompt input messages = [{"role": "user", "content": prompt}] txt = self.tokenizer.apply_chat_template( messages, @@ -174,9 +177,9 @@ async def infer_prompt( async def infer_tensor( self, - request_id: str, - shard: Shard, - input_data: np.ndarray, + request_id: str, + shard: Shard, + input_data: np.ndarray, inference_state: Optional[str] = None ) -> Tuple[np.ndarray, str, bool]: if DEBUG >= 4: @@ -192,13 +195,13 @@ async def infer_tensor( # get cache from inference_state past_iids, cached_iids = self.infer_caching(inference_state) - # detect if hidden_states or not + # detect if hidden_states or not hidden_states = None self.past_input_ids = None if input_ids.size()[-1] > 1: hidden_states = input_ids else: - if past_iids is not None: + if past_iids is not None: self.past_input_ids = past_iids else: self.past_input_ids = input_ids diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 38cd85c20..57a1590b0 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -16,7 +16,7 @@ TemperatureLogitsWarper ) -# llama +# llama from transformers.models.llama.modeling_llama import LlamaModel class ShardedHuggingFaceModel: diff --git a/exo/models.py b/exo/models.py index 6f69960ea..2f1e7d10a 100644 --- a/exo/models.py +++ b/exo/models.py @@ -4,6 +4,7 @@ ### llama "llama-3.2-1b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Llama-3.2-1B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=16), + "PyTorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Llama-3.2-1B-Instruct", start_layer=0, end_layer=0, n_layers=16), }, "llama-3.2-3b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Llama-3.2-3B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28), @@ -36,9 +37,6 @@ "llama-3-1B-Base": { "PyTorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-1B-Base", start_layer=0, end_layer=0, n_layers=3), }, - "meta-llama/Llama-3.2-1B-Instruct": { - "PyTorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Llama-3.2-1B-Instruct", start_layer=0, end_layer=0, n_layers=24), - }, ### mistral "mistral-nemo": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Nemo-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=40),}, "mistral-large": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Large-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=88),}, diff --git a/exo/tinychat/index.html b/exo/tinychat/index.html index c00d2b0a6..9cad69d58 100644 --- a/exo/tinychat/index.html +++ b/exo/tinychat/index.html @@ -38,7 +38,6 @@ - diff --git a/setup.py b/setup.py index 8401167be..b23485a7f 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ "uuid==1.30", "tinygrad @ git+https://github.com/tinygrad/tinygrad.git@232edcfd4f8b388807c64fb1817a7668ce27cbad", "torch==2.4.0+cu124", - "accelerate=0.33.0" + "accelerate" ] # Add macOS-specific packages if on Darwin (macOS) From b44f6e975f4b77642fba46a5fde601499c0bb52b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 13:08:14 -0800 Subject: [PATCH 373/589] updating pytorch requirement --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b23485a7f..f8ae17eed 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ "transformers==4.43.3", "uuid==1.30", "tinygrad @ git+https://github.com/tinygrad/tinygrad.git@232edcfd4f8b388807c64fb1817a7668ce27cbad", - "torch==2.4.0+cu124", + "torch==2.4.0", "accelerate" ] From 936e60a42dfbb1b2e6a94168f3237e3dec42635a Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 15:41:26 -0800 Subject: [PATCH 374/589] trying tokenizer fixes for llama3.1 --- exo/inference/pytorch/inference.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 8264aae83..1791e0c98 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -8,10 +8,11 @@ from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel -from exo.api.chatgpt_api import resolve_tokenizer +from exo.inference.tokenizers import resolve_tokenizer from exo.helpers import DEBUG from exo.download.hf.hf_shard_download import HFShardDownloader +from transformers import AutoTokenizer # llama from transformers.models.llama.modeling_llama import LlamaModel @@ -176,7 +177,7 @@ async def infer_prompt( return return_values async def infer_tensor( - self, + self, request_id: str, shard: Shard, input_data: np.ndarray, @@ -259,7 +260,7 @@ async def infer_tensor( return return_values - async def ensure_shard(self, shard: Optional[Shard]): + async def ensure_shard(self, shard: Shard): """ Ensure the model shard is loaded and ready for inference. @@ -276,7 +277,6 @@ async def ensure_shard(self, shard: Optional[Shard]): if DEBUG >= 4: print(f"model_path: {model_path}") - self.tokenizer = await resolve_tokenizer(shard.model_id) self.stateful_sharded_model = ShardedHuggingFaceModel( shard=shard, local_model_path=model_path, @@ -288,8 +288,15 @@ async def ensure_shard(self, shard: Optional[Shard]): max_length=MAX_LENGTH, max_time=MAX_TIME ) - self.shard = shard + if isinstance(self.stateful_sharded_model.model, LlamaModel): + self.tokenizer = AutoTokenizer.from_pretrained( + model_path if model_path is not None else shard.model_id, + trust_remote_code=True + ) + else: + self.tokenizer = await resolve_tokenizer(shard.model_id) + if DEBUG >= 4: print(f"Shard loaded successfully: {shard}") From 43c3c627b506dd188ba755cba59eedd3a4b9e9da Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 15:51:16 -0800 Subject: [PATCH 375/589] detecting 3.1 for adding padding token and using autotokenizer for llama models --- exo/inference/pytorch/inference.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 1791e0c98..020bd8652 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -1,5 +1,6 @@ # experimental, based off of tinygrad/inference.py -import os +import os +import re import numpy as np import torch import json @@ -295,6 +296,10 @@ async def ensure_shard(self, shard: Shard): model_path if model_path is not None else shard.model_id, trust_remote_code=True ) + + if len(re.findall(r"3\.1", shard.model_id)) > 0: + self.tokenizer.add_special_tokens({"pad_token":""}) + else: self.tokenizer = await resolve_tokenizer(shard.model_id) From 75a29f464f9e8b1cd3d8c085cb7da97ab085c1ba Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 16:26:55 -0800 Subject: [PATCH 376/589] updating models.py to use instruct version --- exo/inference/pytorch/inference.py | 20 ++++++++++---------- exo/models.py | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 020bd8652..01841ba4c 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -13,7 +13,7 @@ from exo.helpers import DEBUG from exo.download.hf.hf_shard_download import HFShardDownloader -from transformers import AutoTokenizer +from tokenizers import Tokenizer # llama from transformers.models.llama.modeling_llama import LlamaModel @@ -114,14 +114,14 @@ async def infer_prompt( await self.ensure_shard(shard) # setup prompt input - messages = [{"role": "user", "content": prompt}] - txt = self.tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True - ) - - inputs = self.tokenizer([txt], return_tensors="pt") + #messages = [{"role": "user", "content": prompt}] + #txt = self.tokenizer.apply_chat_template( + # messages, + # tokenize=False, + # add_generation_prompt=True + #) + + inputs = self.tokenizer([prompt], return_tensors="pt") input_ids = inputs.input_ids.to(self.device) input_attention_mask = inputs.attention_mask.to(self.device) batch_size, seq_length = input_ids.shape[:2] @@ -292,7 +292,7 @@ async def ensure_shard(self, shard: Shard): self.shard = shard if isinstance(self.stateful_sharded_model.model, LlamaModel): - self.tokenizer = AutoTokenizer.from_pretrained( + self.tokenizer = Tokenizer.from_pretrained( model_path if model_path is not None else shard.model_id, trust_remote_code=True ) diff --git a/exo/models.py b/exo/models.py index bb9ccf5c2..29e9a7d6e 100644 --- a/exo/models.py +++ b/exo/models.py @@ -12,7 +12,7 @@ "llama-3.1-8b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32), "TinygradDynamicShardInferenceEngine": Shard(model_id="mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", start_layer=0, end_layer=0, n_layers=32), - "PyTorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Meta-Llama-3.1-8B", start_layer=0, end_layer=0, n_layers=32), + "PyTorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct", start_layer=0, end_layer=0, n_layers=32), }, "llama-3.1-70b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), From e407404fa4c0af11cc5a01a5ce6fd1696e1a7a57 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 16:41:40 -0800 Subject: [PATCH 377/589] fixing autotokenizer --- exo/inference/pytorch/inference.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 01841ba4c..8bbc25798 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -13,7 +13,7 @@ from exo.helpers import DEBUG from exo.download.hf.hf_shard_download import HFShardDownloader -from tokenizers import Tokenizer +from transformers import AutoTokenizer # llama from transformers.models.llama.modeling_llama import LlamaModel @@ -71,7 +71,7 @@ def __init__(self, shard_downloader: HFShardDownloader): def infer_caching( self, inference_state: Optional[str] = None - ) -> Tuple[Optional[torch.tensor], Optional[dict]]: + ) -> Tuple[Optional[torch.Tensor], Optional[dict]]: """ inference caching from inference_state json """ @@ -99,9 +99,9 @@ def infer_caching( async def infer_prompt( self, - request_id: Optional[str] = None, - shard: Optional[Shard] = None, - prompt: Optional[str] = "", + request_id: str, + shard: Shard, + prompt: str, image_str: Optional[str] = None, inference_state: Optional[str] = None ) -> Tuple[np.ndarray, str, bool]: @@ -113,14 +113,6 @@ async def infer_prompt( await self.ensure_shard(shard) - # setup prompt input - #messages = [{"role": "user", "content": prompt}] - #txt = self.tokenizer.apply_chat_template( - # messages, - # tokenize=False, - # add_generation_prompt=True - #) - inputs = self.tokenizer([prompt], return_tensors="pt") input_ids = inputs.input_ids.to(self.device) input_attention_mask = inputs.attention_mask.to(self.device) @@ -196,7 +188,7 @@ async def infer_tensor( # get cache from inference_state past_iids, cached_iids = self.infer_caching(inference_state) - + # detect if hidden_states or not hidden_states = None self.past_input_ids = None @@ -292,7 +284,7 @@ async def ensure_shard(self, shard: Shard): self.shard = shard if isinstance(self.stateful_sharded_model.model, LlamaModel): - self.tokenizer = Tokenizer.from_pretrained( + self.tokenizer = AutoTokenizer.from_pretrained( model_path if model_path is not None else shard.model_id, trust_remote_code=True ) From 668668f5439dc59cfa632dca0427b88e26e3a382 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 18:17:02 -0800 Subject: [PATCH 378/589] making it so position and cache is computed every forward on hf model --- exo/inference/pytorch/model/hf.py | 499 +++++++++++++++--------------- 1 file changed, 247 insertions(+), 252 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 57a1590b0..547ed040f 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -7,261 +7,256 @@ from exo.helpers import DEBUG from transformers import ( - AutoModelForCausalLM, - DynamicCache, - Cache, - LogitsProcessorList, - TopKLogitsWarper, - TopPLogitsWarper, - TemperatureLogitsWarper + AutoModelForCausalLM, + DynamicCache, + Cache, + LogitsProcessorList, + TopKLogitsWarper, + TopPLogitsWarper, + TemperatureLogitsWarper ) # llama from transformers.models.llama.modeling_llama import LlamaModel class ShardedHuggingFaceModel: - def __init__( - self, - shard: Shard, - local_model_path, - device, - dtype, - top_k: int = 25, - temp: float = 0.7, - top_p: float = 0.9, - max_length: int = 50, - max_time: float = 10.0 - ): - # class vars - self.shard = shard - self.hidden_states = None - self.input_ids = None - self.inputs_embeds = None - self.attention_mask = None - self.position_embeddings = None - self.past_key_values = None - self.cache_position = None - self.position_ids = None - self.causal_mask = None - self.local_model_path = local_model_path - - # setup logit processors - self.logits_processor = LogitsProcessorList([ - TopKLogitsWarper(top_k), - TemperatureLogitsWarper(temp), - TopPLogitsWarper(top_p) - ]) - - self.device = device - self.torch_dtype = dtype - - # setup pytorch and transformer llm - try: - self.llm_model = AutoModelForCausalLM.from_pretrained( - pretrained_model_name_or_path=self.local_model_path, - torch_dtype=self.torch_dtype, - device_map="auto", - offload_buffers=True - ) - - self.model = self.llm_model.model - except Exception as err: - print(f"error loading and splitting model: {err}") - raise - - - def forward( - self, - input_ids: Optional[torch.tensor] = None, - hidden_states: Optional[torch.tensor] = None, - attention_mask: Optional[torch.tensor] = None, - past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, - use_legacy_cache: bool = False - ) -> Tuple[Optional[torch.tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.tensor]]: - - """ - Generate hidden states or logits via passing through set amount of layers of a model - To be passed only input_ids OR hidden_state and not both. This is for connecting the model - layer to generate a complete output - - Args: - model: base llm model tramsformers class - llm_model: llm chat model class - input_ids: tensor optional - attention_mask: tensor optional - past_key_values: Cache or list[tensor] optional - use_legacy_cache: bool optional - infer_tensor: bool optional, lets forward know to handle tensors - - Returns: - Tuple of - - hidden_states: tensor optional - - past_key_values: Cache or list[tensor] optional - - logits: tensor Optional - - """ - - model_inputs = None - self.hidden_states = None - - if hidden_states is not None: - self.hidden_states = hidden_states - else: - self.input_ids = input_ids - - # embed input_ids - self.inputs_embeds = self.model.embed_tokens(self.input_ids) - - # cache - if past_key_values and not isinstance(past_key_values, Cache): - use_legacy_cache = True - past_key_values = DynamicCache.from_legacy_cache(past_key_values) - - past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 - cache_position = torch.arange( - past_seen_tokens, - past_seen_tokens + self.inputs_embeds.shape[1], - device=self.inputs_embeds.device - ) - - # position id - position_ids = cache_position.unsqueeze(0) - - # casual mask and attention_mask - self.attention_mask = attention_mask - self.causal_mask = self.model._update_causal_mask( - None, - self.inputs_embeds, - cache_position, - past_key_values, - False # dont out attentions - ) - - # embed positions, some models require and some dont - if isinstance(self.model, LlamaModel): - self.position_embeddings = self.model.rotary_emb( - self.inputs_embeds, - position_ids - ) - - # prepare inputs for decoder layers - model_inputs = self.llm_model.prepare_inputs_for_generation( - self.input_ids, - past_key_values=past_key_values, - attention_mask=self.attention_mask, - inputs_embeds=self.inputs_embeds, - position_ids=position_ids, - cache_position=cache_position - ) - - self.hidden_states = self.inputs_embeds - self.position_ids = model_inputs["position_ids"] - self.cache_position = model_inputs["cache_position"] - self.past_key_values = model_inputs["past_key_values"] - - if DEBUG >= 4: - print(f"model_inputs: {model_inputs}") - - # run through decoder layers - layer_amt = range(self.shard.start_layer, self.shard.end_layer + 1) - - if DEBUG >= 4: - print(f"hidden_states: {self.hidden_states}") - print(f"layer_amt: {layer_amt}") - - for i in layer_amt: - decoder_layer = self.model.layers[i] - if DEBUG >= 5: - print("decoder_layer before") - print(f"decoder_layer: {decoder_layer}") - print(f"hidden_states: {self.hidden_states}") - - # TODO: fix caching as decoder layer is not returning - # present_key_value from attention layer on models - # might have some other generation functions needed to do it - # see https://github.com/huggingface/transformers/blob/main/src/transformers/generation/utils.py#L2917 - # for qwen2 exhttps://github.com/huggingface/transformers/blob/main/src/transformers/models/qwen2/modeling_qwen2.py#L291 - layer_outputs = decoder_layer( - self.hidden_states, - attention_mask=self.causal_mask, - position_ids=self.position_ids, - past_key_values=self.past_key_values, - use_cache=True, - cache_position=self.cache_position - ) - - self.hidden_states = layer_outputs[0] - self.next_decoder_cache = layer_outputs[1] - - if DEBUG >= 5: - print("decoder_layer after") - print(f"layer_outputs: {layer_outputs}\n") - print(f"self.next_decoder_cache: {self.next_decoder_cache}") - print(f"hidden_states: {self.hidden_states}") - print(f"next_decoder_cache: {self.next_decoder_cache}") - - # handle last layer to get logits - # shard is last layer says true at the start and not detecting last layer correctly - if self.shard.is_last_layer(): - self.hidden_states = self.model.norm(self.hidden_states) - if use_legacy_cache: - self.past_key_values = self.next_decoder_cache.to_legacy_cache() - else: - self.past_key_values = self.next_decoder_cache - - # lm_head - logits = self.llm_model.lm_head(self.hidden_states).to(self.device) - - if DEBUG >= 4: - print(f"logits: {logits}") - - return ( - None, - None, - logits - ) - - if DEBUG >= 4: - print(f"hidden_states: {self.hidden_states}") - print(f"past_key_values: {self.past_key_values}") - - return ( - self.hidden_states, - self.past_key_values, - None - ) - - def logits_sample( - self, - logits: torch.tensor, - use_max: Optional[bool] = False - ) -> torch.tensor: - """ - Get a sample of the logits from end of model run for next token - - Args: - logits: tensor - use_max: bool, if function should sample with argmax - - Returns: - next_token: tensor - """ - - # get a single cloned logit - logits = logits[:, -1, :].clone().float() - - next_token_scores = self.logits_processor(self.input_ids, logits) - - if not use_max: - probs = nn.functional.softmax(next_token_scores, dim=-1) - next_token = torch.multinomial(probs, num_samples=1) - else: - next_token = torch.argmax(next_token_scores, dim=-1) - - if DEBUG >= 4: - print(f"input_ids: {self.input_ids}") - print(f"next_token: {next_token}") - - return next_token[:, None].squeeze(-1) - - + def __init__( + self, + shard: Shard, + local_model_path, + device, + dtype, + top_k: int = 25, + temp: float = 0.7, + top_p: float = 0.9, + max_length: int = 50, + max_time: float = 10.0 + ): + # class vars + self.shard = shard + self.hidden_states = None + self.input_ids = None + self.inputs_embeds = None + self.attention_mask = None + self.position_embeddings = None + self.past_key_values = None + self.cache_position = None + self.position_ids = None + self.causal_mask = None + self.local_model_path = local_model_path + + # setup logit processors + self.logits_processor = LogitsProcessorList([ + TopKLogitsWarper(top_k), + TemperatureLogitsWarper(temp), + TopPLogitsWarper(top_p) + ]) + + self.device = device + self.torch_dtype = dtype + + # setup pytorch and transformer llm + try: + self.llm_model = AutoModelForCausalLM.from_pretrained( + pretrained_model_name_or_path=self.local_model_path, + torch_dtype=self.torch_dtype, + device_map="auto", + offload_buffers=True + ) + + self.model = self.llm_model.model + except Exception as err: + print(f"error loading and splitting model: {err}") + raise + + + def forward( + self, + input_ids: Optional[torch.tensor] = None, + hidden_states: Optional[torch.tensor] = None, + attention_mask: Optional[torch.tensor] = None, + past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + use_legacy_cache: bool = False + ) -> Tuple[Optional[torch.tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.tensor]]: + + """ + Generate hidden states or logits via passing through set amount of layers of a model + To be passed only input_ids OR hidden_state and not both. This is for connecting the model + layer to generate a complete output + + Args: + model: base llm model tramsformers class + llm_model: llm chat model class + input_ids: tensor optional + attention_mask: tensor optional + past_key_values: Cache or list[tensor] optional + use_legacy_cache: bool optional + infer_tensor: bool optional, lets forward know to handle tensors + + Returns: + Tuple of + - hidden_states: tensor optional + - past_key_values: Cache or list[tensor] optional + - logits: tensor Optional + + """ + model_inputs = None + self.hidden_states = hidden_states + self.input_ids = input_ids + + # embed input_ids + self.inputs_embeds = self.model.embed_tokens(self.input_ids) + + # cache + if past_key_values and not isinstance(past_key_values, Cache): + use_legacy_cache = True + past_key_values = DynamicCache.from_legacy_cache(past_key_values) + + past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 + cache_position = torch.arange( + past_seen_tokens, + past_seen_tokens + self.inputs_embeds.shape[1], + device=self.inputs_embeds.device + ) + + # position id + position_ids = cache_position.unsqueeze(0) + + # casual mask and attention_mask + self.attention_mask = attention_mask + self.causal_mask = self.model._update_causal_mask( + None, + self.inputs_embeds, + cache_position, + past_key_values, + False # dont out attentions + ) + + # embed positions, some models require and some dont + if isinstance(self.model, LlamaModel): + self.position_embeddings = self.model.rotary_emb( + self.inputs_embeds, + position_ids + ) + + # prepare inputs for decoder layers + model_inputs = self.llm_model.prepare_inputs_for_generation( + self.input_ids, + past_key_values=past_key_values, + attention_mask=self.attention_mask, + inputs_embeds=self.inputs_embeds, + position_ids=position_ids, + cache_position=cache_position + ) + + self.hidden_states = self.inputs_embeds + self.position_ids = model_inputs["position_ids"] + self.cache_position = model_inputs["cache_position"] + self.past_key_values = model_inputs["past_key_values"] + + if DEBUG >= 4: + print(f"model_inputs: {model_inputs}") + + # run through decoder layers + layer_amt = range(self.shard.start_layer, self.shard.end_layer + 1) + + if DEBUG >= 4: + print(f"hidden_states: {self.hidden_states}") + print(f"layer_amt: {layer_amt}") + + for i in layer_amt: + decoder_layer = self.model.layers[i] + if DEBUG >= 5: + print("decoder_layer before") + print(f"decoder_layer: {decoder_layer}") + print(f"hidden_states: {self.hidden_states}") + print(f"position_ids: {self.position_ids}") + print(f"position_embeddings: {self.position_embeddings}") + + # TODO: fix caching as decoder layer is not returning + # present_key_value from attention layer on models + # might have some other generation functions needed to do it + # see https://github.com/huggingface/transformers/blob/main/src/transformers/generation/utils.py#L2917 + # for qwen2 exhttps://github.com/huggingface/transformers/blob/main/src/transformers/models/qwen2/modeling_qwen2.py#L291 + layer_outputs = decoder_layer( + self.hidden_states, + attention_mask=self.causal_mask, + position_ids=self.position_ids, + past_key_values=self.past_key_values, + use_cache=True, + cache_position=self.cache_position + ) + + self.hidden_states = layer_outputs[0] + self.next_decoder_cache = layer_outputs[1] + + if DEBUG >= 5: + print("decoder_layer after") + print(f"layer_outputs: {layer_outputs}\n") + print(f"self.next_decoder_cache: {self.next_decoder_cache}") + print(f"hidden_states: {self.hidden_states}") + print(f"next_decoder_cache: {self.next_decoder_cache}") + + # handle last layer to get logits + # shard is last layer says true at the start and not detecting last layer correctly + if self.shard.is_last_layer(): + self.hidden_states = self.model.norm(self.hidden_states) + if use_legacy_cache: + self.past_key_values = self.next_decoder_cache.to_legacy_cache() + else: + self.past_key_values = self.next_decoder_cache + + # lm_head + logits = self.llm_model.lm_head(self.hidden_states).to(self.device) + + if DEBUG >= 4: + print(f"logits: {logits}") + + return ( + None, + None, + logits + ) + + if DEBUG >= 4: + print(f"hidden_states: {self.hidden_states}") + print(f"past_key_values: {self.past_key_values}") + + return ( + self.hidden_states, + self.past_key_values, + None + ) + + def logits_sample( + self, + logits: torch.tensor, + use_max: Optional[bool] = False + ) -> torch.tensor: + """ + Get a sample of the logits from end of model run for next token + + Args: + logits: tensor + use_max: bool, if function should sample with argmax + + Returns: + next_token: tensor + """ + + # get a single cloned logit + logits = logits[:, -1, :].clone().float() + + next_token_scores = self.logits_processor(self.input_ids, logits) + + if not use_max: + probs = nn.functional.softmax(next_token_scores, dim=-1) + next_token = torch.multinomial(probs, num_samples=1) + else: + next_token = torch.argmax(next_token_scores, dim=-1) + + if DEBUG >= 4: + print(f"input_ids: {self.input_ids}") + print(f"next_token: {next_token}") + + return next_token[:, None].squeeze(-1) From 4e356f8dac26f7378e276807fabd2fa773b540b8 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 18:20:23 -0800 Subject: [PATCH 379/589] loading cached input_ids when passing hidden states --- exo/inference/pytorch/inference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 8bbc25798..157609942 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -194,6 +194,7 @@ async def infer_tensor( self.past_input_ids = None if input_ids.size()[-1] > 1: hidden_states = input_ids + self.past_input_ids = past_iids else: if past_iids is not None: self.past_input_ids = past_iids From a5ef04a9ecdb3f9185f665654c19118240af85b5 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 18:33:43 -0800 Subject: [PATCH 380/589] loading cached iids from infer state fix --- exo/inference/pytorch/inference.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 157609942..6a14f1f7f 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -105,7 +105,7 @@ async def infer_prompt( image_str: Optional[str] = None, inference_state: Optional[str] = None ) -> Tuple[np.ndarray, str, bool]: - if DEBUG >= 4: + if DEBUG >= 2: print("infer_prompt called") print(f"prompt: {prompt}") print(f"shard: {shard}") @@ -126,7 +126,7 @@ async def infer_prompt( else: self.past_input_ids = input_ids - if DEBUG >= 4: + if DEBUG >= 2: print(f"past_input_ids: {self.past_input_ids}\n") shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( @@ -134,7 +134,7 @@ async def infer_prompt( attention_mask=input_attention_mask ) - if DEBUG >= 4: + if DEBUG >= 2: print(f"\nshard_hidden_states: {shard_hidden_states}\n") print(f"\nshard_past_kvs {shard_past_kvs}\n") print(f"\nshard_logits: {shard_logits}") @@ -152,7 +152,7 @@ async def infer_prompt( if next_token is not None: is_finished = next_token.item() == self.tokenizer.eos_token_id - if DEBUG >= 4: + if DEBUG >= 2: print(f"\ninput_ids: {input_ids}") print(f"\nshard_hidden_states: {shard_hidden_states}\n") print(f"\nshard_past_kvs {shard_past_kvs}\n") @@ -164,7 +164,7 @@ async def infer_prompt( is_finished ) - if DEBUG >= 4: + if DEBUG >= 2: print(f"return_values: {return_values}") return return_values @@ -176,7 +176,7 @@ async def infer_tensor( input_data: np.ndarray, inference_state: Optional[str] = None ) -> Tuple[np.ndarray, str, bool]: - if DEBUG >= 4: + if DEBUG >= 2: print("infer_tensor called") print(f"input_data: {input_data}") print(f"shard: {shard}") @@ -194,15 +194,16 @@ async def infer_tensor( self.past_input_ids = None if input_ids.size()[-1] > 1: hidden_states = input_ids - self.past_input_ids = past_iids + self.past_input_ids = torch.tensor(cached_iids["input_ids"]) else: if past_iids is not None: self.past_input_ids = past_iids else: self.past_input_ids = input_ids - if DEBUG >= 4: + if DEBUG >= 2: print(f"input_ids: {input_ids}") + print(f"hidden_state: {hidden_states}") print(f"inference_state: {inference_state}") shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( @@ -236,7 +237,7 @@ async def infer_tensor( # clear cache cached_iids = {"input_ids": []} - if DEBUG >= 4: + if DEBUG >= 2: print(f"\ninput_ids: {input_ids}") print(f"\nshard_hidden_states: {shard_hidden_states}\n") print(f"\nshard_past_kvs {shard_past_kvs}\n") @@ -248,7 +249,7 @@ async def infer_tensor( is_finished ) - if DEBUG >= 4: + if DEBUG >= 2: print(f"return_values: {return_values}") return return_values @@ -264,11 +265,11 @@ async def ensure_shard(self, shard: Shard): if self.shard == shard: return - if DEBUG >= 4: + if DEBUG >= 2: print(f"Loading new shard: {shard}") model_path = await self.shard_downloader.ensure_shard(shard) - if DEBUG >= 4: + if DEBUG >= 2: print(f"model_path: {model_path}") self.stateful_sharded_model = ShardedHuggingFaceModel( From e888baa1537c98cd450ee28db29c83bc4e7320be Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 18:35:12 -0800 Subject: [PATCH 381/589] device fix --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 6a14f1f7f..fbf99a6ee 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -194,7 +194,7 @@ async def infer_tensor( self.past_input_ids = None if input_ids.size()[-1] > 1: hidden_states = input_ids - self.past_input_ids = torch.tensor(cached_iids["input_ids"]) + self.past_input_ids = torch.tensor(cached_iids["input_ids"]).to(self.device) else: if past_iids is not None: self.past_input_ids = past_iids From 7d9eb17d22a832dccb359ba48bd08cd650dc33ec Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 18:42:45 -0800 Subject: [PATCH 382/589] position id fix --- exo/inference/pytorch/model/hf.py | 91 ++++++++++++++++--------------- 1 file changed, 46 insertions(+), 45 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 547ed040f..3f88afb24 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -104,58 +104,59 @@ def forward( self.hidden_states = hidden_states self.input_ids = input_ids - # embed input_ids - self.inputs_embeds = self.model.embed_tokens(self.input_ids) - - # cache - if past_key_values and not isinstance(past_key_values, Cache): - use_legacy_cache = True - past_key_values = DynamicCache.from_legacy_cache(past_key_values) - - past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 - cache_position = torch.arange( - past_seen_tokens, - past_seen_tokens + self.inputs_embeds.shape[1], - device=self.inputs_embeds.device - ) + if self.hidden_states is None or self.position_ids is None: + # embed input_ids + self.inputs_embeds = self.model.embed_tokens(self.input_ids) + + # cache + if past_key_values and not isinstance(past_key_values, Cache): + use_legacy_cache = True + past_key_values = DynamicCache.from_legacy_cache(past_key_values) + + past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 + cache_position = torch.arange( + past_seen_tokens, + past_seen_tokens + self.inputs_embeds.shape[1], + device=self.inputs_embeds.device + ) - # position id - position_ids = cache_position.unsqueeze(0) - - # casual mask and attention_mask - self.attention_mask = attention_mask - self.causal_mask = self.model._update_causal_mask( - None, - self.inputs_embeds, - cache_position, - past_key_values, - False # dont out attentions - ) + # position id + position_ids = cache_position.unsqueeze(0) - # embed positions, some models require and some dont - if isinstance(self.model, LlamaModel): - self.position_embeddings = self.model.rotary_emb( + # casual mask and attention_mask + self.attention_mask = attention_mask + self.causal_mask = self.model._update_causal_mask( + None, self.inputs_embeds, - position_ids + cache_position, + past_key_values, + False # dont out attentions ) - # prepare inputs for decoder layers - model_inputs = self.llm_model.prepare_inputs_for_generation( - self.input_ids, - past_key_values=past_key_values, - attention_mask=self.attention_mask, - inputs_embeds=self.inputs_embeds, - position_ids=position_ids, - cache_position=cache_position - ) + # embed positions, some models require and some dont + if isinstance(self.model, LlamaModel): + self.position_embeddings = self.model.rotary_emb( + self.inputs_embeds, + position_ids + ) + + # prepare inputs for decoder layers + model_inputs = self.llm_model.prepare_inputs_for_generation( + self.input_ids, + past_key_values=past_key_values, + attention_mask=self.attention_mask, + inputs_embeds=self.inputs_embeds, + position_ids=position_ids, + cache_position=cache_position + ) - self.hidden_states = self.inputs_embeds - self.position_ids = model_inputs["position_ids"] - self.cache_position = model_inputs["cache_position"] - self.past_key_values = model_inputs["past_key_values"] + self.hidden_states = self.inputs_embeds + self.position_ids = model_inputs["position_ids"] + self.cache_position = model_inputs["cache_position"] + self.past_key_values = model_inputs["past_key_values"] - if DEBUG >= 4: - print(f"model_inputs: {model_inputs}") + if DEBUG >= 4: + print(f"model_inputs: {model_inputs}") # run through decoder layers layer_amt = range(self.shard.start_layer, self.shard.end_layer + 1) From 11986289e7095d4229e4ddd7d0330817b3aecbcd Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 19:00:11 -0800 Subject: [PATCH 383/589] fixing inference instance state issues between nodes --- exo/inference/pytorch/inference.py | 4 ++-- exo/inference/pytorch/model/hf.py | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index fbf99a6ee..03995975d 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -194,7 +194,7 @@ async def infer_tensor( self.past_input_ids = None if input_ids.size()[-1] > 1: hidden_states = input_ids - self.past_input_ids = torch.tensor(cached_iids["input_ids"]).to(self.device) + self.past_input_ids = past_iids else: if past_iids is not None: self.past_input_ids = past_iids @@ -202,7 +202,7 @@ async def infer_tensor( self.past_input_ids = input_ids if DEBUG >= 2: - print(f"input_ids: {input_ids}") + print(f"past_input_ids: {self.past_input_ids}") print(f"hidden_state: {hidden_states}") print(f"inference_state: {inference_state}") diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 3f88afb24..786899e2c 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -104,6 +104,12 @@ def forward( self.hidden_states = hidden_states self.input_ids = input_ids + if DEBUG >= 2: + print("hf forward called") + print(f"hidden_states: {self.hidden_states}") + print(f"input_ids: {self.input_ids}") + print(f"self.position_ids: {self.position_ids}") + if self.hidden_states is None or self.position_ids is None: # embed input_ids self.inputs_embeds = self.model.embed_tokens(self.input_ids) From d25b7ac0398b44b5f0aed51d9922b70513af96fd Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 19:38:31 -0800 Subject: [PATCH 384/589] node testing --- exo/models.py | 2 +- exo/tinychat/index.html | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/models.py b/exo/models.py index 29e9a7d6e..7d6bc30ad 100644 --- a/exo/models.py +++ b/exo/models.py @@ -67,7 +67,7 @@ "qwen-2.5-math-72b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Math-72B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), }, - "Qwen2-0.5B-Instruct": { + "qwen2-0.5b-instruct": { "PyTorchDynamicShardInferenceEngine": Shard(model_id="Qwen/Qwen2-0.5B-Instruct", start_layer=0, end_layer=0, n_layers=24), }, } diff --git a/exo/tinychat/index.html b/exo/tinychat/index.html index 706255bce..e9be92184 100644 --- a/exo/tinychat/index.html +++ b/exo/tinychat/index.html @@ -50,6 +50,7 @@ +
Date: Sun, 6 Oct 2024 20:26:38 -0800 Subject: [PATCH 386/589] node inference fix --- exo/inference/pytorch/model/hf.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 80ec8f78a..d5f8f68b8 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -109,8 +109,15 @@ def forward( print(f"hidden_states: {self.hidden_states}") print(f"input_ids: {self.input_ids}") print(f"self.position_ids: {self.position_ids}") + print(f"past_key_values: {past_key_values}") + + # skip if there is a hidden state with position_ids already calculated + # if there is hidden states and no position_ids, will need to be calculated + # this is not needed for Qwen model but Llama requires it + if (self.hidden_states is None or + (self.hidden_states is not None and self.position_ids is None) + ): - if self.hidden_states is None: # embed input_ids self.inputs_embeds = self.model.embed_tokens(self.input_ids) @@ -228,6 +235,8 @@ def forward( if DEBUG >= 4: print(f"hidden_states: {self.hidden_states}") print(f"past_key_values: {self.past_key_values}") + print(f"position_ids: {self.position_ids}") + print(f"input_ids: {self.input_ids}") return ( self.hidden_states, From 77a52a57eefd789e094a550a7a6d9640a67c844d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 20:33:04 -0800 Subject: [PATCH 387/589] node inference fix --- exo/inference/pytorch/model/hf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index d5f8f68b8..5ba8615a7 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -232,7 +232,8 @@ def forward( logits ) - if DEBUG >= 4: + if DEBUG >= 2: + print("hf out [no logit]") print(f"hidden_states: {self.hidden_states}") print(f"past_key_values: {self.past_key_values}") print(f"position_ids: {self.position_ids}") From 2b3397f459e22bf91423bba36324ed74ce6d4236 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 21:07:21 -0800 Subject: [PATCH 388/589] node inference fix --- exo/inference/pytorch/inference.py | 2 +- exo/inference/pytorch/model/hf.py | 38 ++++++++++++++---------------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 03995975d..971eaa7d3 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -194,7 +194,7 @@ async def infer_tensor( self.past_input_ids = None if input_ids.size()[-1] > 1: hidden_states = input_ids - self.past_input_ids = past_iids + #self.past_input_ids = past_iids else: if past_iids is not None: self.past_input_ids = past_iids diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 5ba8615a7..f73a97129 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -114,28 +114,26 @@ def forward( # skip if there is a hidden state with position_ids already calculated # if there is hidden states and no position_ids, will need to be calculated # this is not needed for Qwen model but Llama requires it - if (self.hidden_states is None or - (self.hidden_states is not None and self.position_ids is None) - ): - - # embed input_ids - self.inputs_embeds = self.model.embed_tokens(self.input_ids) - - # cache - if past_key_values and not isinstance(past_key_values, Cache): - use_legacy_cache = True - past_key_values = DynamicCache.from_legacy_cache(past_key_values) - - past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 - cache_position = torch.arange( - past_seen_tokens, - past_seen_tokens + self.inputs_embeds.shape[1], - device=self.inputs_embeds.device - ) - # position id - position_ids = cache_position.unsqueeze(0) + # embed input_ids + self.inputs_embeds = self.model.embed_tokens(self.input_ids) + + # cache + if past_key_values and not isinstance(past_key_values, Cache): + use_legacy_cache = True + past_key_values = DynamicCache.from_legacy_cache(past_key_values) + + past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 + cache_position = torch.arange( + past_seen_tokens, + past_seen_tokens + self.inputs_embeds.shape[1], + device=self.inputs_embeds.device + ) + + # position id + position_ids = cache_position.unsqueeze(0) + if self.hidden_states is None: # casual mask and attention_mask self.attention_mask = attention_mask self.causal_mask = self.model._update_causal_mask( From 2e588afaa4493adc6fc3c2ccb4eb6be77a282939 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 21:10:18 -0800 Subject: [PATCH 389/589] node inference fix --- exo/inference/pytorch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 971eaa7d3..03995975d 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -194,7 +194,7 @@ async def infer_tensor( self.past_input_ids = None if input_ids.size()[-1] > 1: hidden_states = input_ids - #self.past_input_ids = past_iids + self.past_input_ids = past_iids else: if past_iids is not None: self.past_input_ids = past_iids From e2eba0592aecdc205dd087c571a1384987617fef Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 21:13:11 -0800 Subject: [PATCH 390/589] node inference fix --- exo/inference/pytorch/model/hf.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index f73a97129..872ea7e2c 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -104,14 +104,6 @@ def forward( self.hidden_states = hidden_states self.input_ids = input_ids - if DEBUG >= 2: - print("hf forward called") - print(f"hidden_states: {self.hidden_states}") - print(f"input_ids: {self.input_ids}") - print(f"self.position_ids: {self.position_ids}") - print(f"past_key_values: {past_key_values}") - - # skip if there is a hidden state with position_ids already calculated # if there is hidden states and no position_ids, will need to be calculated # this is not needed for Qwen model but Llama requires it @@ -133,6 +125,14 @@ def forward( # position id position_ids = cache_position.unsqueeze(0) + if DEBUG >= 2: + print("hf forward called") + print(f"hidden_states: {self.hidden_states}") + print(f"input_ids: {self.input_ids}") + print(f"self.position_ids: {self.position_ids}") + print(f"past_key_values: {past_key_values}") + + if self.hidden_states is None: # casual mask and attention_mask self.attention_mask = attention_mask From d7699ebaf939da9f660dc39de983d399ef155d84 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 21:16:23 -0800 Subject: [PATCH 391/589] node inference fix --- exo/inference/pytorch/model/hf.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 872ea7e2c..eb00e9332 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -123,13 +123,14 @@ def forward( ) # position id - position_ids = cache_position.unsqueeze(0) + self.position_ids = cache_position.unsqueeze(0) if DEBUG >= 2: print("hf forward called") print(f"hidden_states: {self.hidden_states}") print(f"input_ids: {self.input_ids}") - print(f"self.position_ids: {self.position_ids}") + print(f"input_embeds: {self.inputs_embeds}") + print(f"position_ids: {self.position_ids}") print(f"past_key_values: {past_key_values}") @@ -148,7 +149,7 @@ def forward( if isinstance(self.model, LlamaModel): self.position_embeddings = self.model.rotary_emb( self.inputs_embeds, - position_ids + self.position_ids ) # prepare inputs for decoder layers @@ -157,7 +158,7 @@ def forward( past_key_values=past_key_values, attention_mask=self.attention_mask, inputs_embeds=self.inputs_embeds, - position_ids=position_ids, + position_ids=self.position_ids, cache_position=cache_position ) From bd9bf4f1afd3046730fc534ccadcc43bc066d67e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 6 Oct 2024 21:28:38 -0800 Subject: [PATCH 392/589] inference between nodes fixed by always calculating position id and input embed from input_ids cache, working on vram mem management --- exo/inference/pytorch/model/hf.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index eb00e9332..9c30d8aef 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -72,13 +72,12 @@ def __init__( def forward( self, - input_ids: Optional[torch.tensor] = None, - hidden_states: Optional[torch.tensor] = None, - attention_mask: Optional[torch.tensor] = None, + input_ids: Optional[torch.Tensor] = None, + hidden_states: Optional[torch.Tensor] = None, + attention_mask: Optional[torch.Tensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, use_legacy_cache: bool = False - ) -> Tuple[Optional[torch.tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.tensor]]: - + ) -> Tuple[Optional[torch.Tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.Tensor]]: """ Generate hidden states or logits via passing through set amount of layers of a model To be passed only input_ids OR hidden_state and not both. This is for connecting the model @@ -125,7 +124,7 @@ def forward( # position id self.position_ids = cache_position.unsqueeze(0) - if DEBUG >= 2: + if DEBUG >= 4: print("hf forward called") print(f"hidden_states: {self.hidden_states}") print(f"input_ids: {self.input_ids}") @@ -231,7 +230,7 @@ def forward( logits ) - if DEBUG >= 2: + if DEBUG >= 4: print("hf out [no logit]") print(f"hidden_states: {self.hidden_states}") print(f"past_key_values: {self.past_key_values}") @@ -246,18 +245,18 @@ def forward( def logits_sample( self, - logits: torch.tensor, + logits: torch.Tensor, use_max: Optional[bool] = False - ) -> torch.tensor: + ) -> torch.Tensor: """ Get a sample of the logits from end of model run for next token - + Args: - logits: tensor + logits: tensor use_max: bool, if function should sample with argmax Returns: - next_token: tensor + next_token: tensor """ # get a single cloned logit @@ -273,6 +272,6 @@ def logits_sample( if DEBUG >= 4: print(f"input_ids: {self.input_ids}") - print(f"next_token: {next_token}") + print(f"next_token: {next_token}") return next_token[:, None].squeeze(-1) From 913a00859e284aae186d8e1867f2afb3b0b26bfc Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 7 Oct 2024 08:10:01 -0800 Subject: [PATCH 393/589] cleaning up code --- exo/inference/pytorch/inference.py | 56 +++++++++++++----------------- exo/inference/pytorch/model/hf.py | 19 +++++----- 2 files changed, 32 insertions(+), 43 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 03995975d..11f8eddb3 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -34,7 +34,7 @@ def __init__(self, shard_downloader: HFShardDownloader): Initialize the inference engine. Args: - debug (bool): If True, enables debug logging. Defaults to False. + shard_downloader: Model and weights sharding download """ self.shard = None self.shard_downloader = shard_downloader @@ -49,15 +49,15 @@ def __init__(self, shard_downloader: HFShardDownloader): # setup cuda device if os.environ.get("PYTORCH_DEVICE"): pytorch_device = os.environ["PYTOCH_DEVICE"] - if pytorch_device not in ["cuda", "mps", "cpu"]: + if pytorch_device not in ["cuda", "mps"]: pytorch_device = "cpu" self.device = pytorch_device - self.torch_dtype = torch.float32 if pytorch_device != "cpu" else torch.float16 + self.torch_dtype = torch.float16 if pytorch_device != "cpu" else torch.float32 if torch.cuda.is_available(): self.device = torch.device("cuda") - self.torch_dtype = torch.float16 + self.torch_dtype = torch.float32 elif torch.backends.mps.is_available(): self.device = torch.device("mps") self.torch_dtype = torch.float32 @@ -101,11 +101,11 @@ async def infer_prompt( self, request_id: str, shard: Shard, - prompt: str, - image_str: Optional[str] = None, + prompt: str, + image_str: Optional[str] = None, inference_state: Optional[str] = None ) -> Tuple[np.ndarray, str, bool]: - if DEBUG >= 2: + if DEBUG >= 4: print("infer_prompt called") print(f"prompt: {prompt}") print(f"shard: {shard}") @@ -115,26 +115,25 @@ async def infer_prompt( inputs = self.tokenizer([prompt], return_tensors="pt") input_ids = inputs.input_ids.to(self.device) - input_attention_mask = inputs.attention_mask.to(self.device) - batch_size, seq_length = input_ids.shape[:2] + input_attention_mask = inputs.attention_mask.to(self.device) # get cache from inference_state past_iids, cached_iids = self.infer_caching(inference_state) if past_iids is not None: - self.past_input_ids = past_iids, + self.past_input_ids = past_iids else: self.past_input_ids = input_ids - if DEBUG >= 2: + if DEBUG >= 4: print(f"past_input_ids: {self.past_input_ids}\n") - + shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( input_ids=self.past_input_ids, attention_mask=input_attention_mask ) - if DEBUG >= 2: + if DEBUG >= 4: print(f"\nshard_hidden_states: {shard_hidden_states}\n") print(f"\nshard_past_kvs {shard_past_kvs}\n") print(f"\nshard_logits: {shard_logits}") @@ -152,7 +151,7 @@ async def infer_prompt( if next_token is not None: is_finished = next_token.item() == self.tokenizer.eos_token_id - if DEBUG >= 2: + if DEBUG >= 4: print(f"\ninput_ids: {input_ids}") print(f"\nshard_hidden_states: {shard_hidden_states}\n") print(f"\nshard_past_kvs {shard_past_kvs}\n") @@ -164,7 +163,7 @@ async def infer_prompt( is_finished ) - if DEBUG >= 2: + if DEBUG >= 4: print(f"return_values: {return_values}") return return_values @@ -176,7 +175,7 @@ async def infer_tensor( input_data: np.ndarray, inference_state: Optional[str] = None ) -> Tuple[np.ndarray, str, bool]: - if DEBUG >= 2: + if DEBUG >= 4: print("infer_tensor called") print(f"input_data: {input_data}") print(f"shard: {shard}") @@ -201,7 +200,7 @@ async def infer_tensor( else: self.past_input_ids = input_ids - if DEBUG >= 2: + if DEBUG >= 4: print(f"past_input_ids: {self.past_input_ids}") print(f"hidden_state: {hidden_states}") print(f"inference_state: {inference_state}") @@ -211,22 +210,18 @@ async def infer_tensor( hidden_states=hidden_states ) - hidden_dict = None - if shard_hidden_states is not None: - hidden_dict = {"hidden_states": shard_hidden_states.tolist()} - - next_token = None + next_token = None if shard_logits is not None: next_token = self.stateful_sharded_model.logits_sample(shard_logits) input_ids = next_token - + #cache if next_token is not None: if self.past_input_ids is not None: next_cached_logits = torch.cat([self.past_input_ids, next_token], dim=-1).to(self.device) elif past_iids is not None: next_cached_logits = torch.cat([past_iids, next_token], dim=-1).to(self.device) - + cached_iids = {"input_ids": next_cached_logits.tolist()} is_finished = False @@ -234,10 +229,10 @@ async def infer_tensor( is_finished = next_token.item() == self.tokenizer.eos_token_id if is_finished: - # clear cache + # clear cache cached_iids = {"input_ids": []} - if DEBUG >= 2: + if DEBUG >= 4: print(f"\ninput_ids: {input_ids}") print(f"\nshard_hidden_states: {shard_hidden_states}\n") print(f"\nshard_past_kvs {shard_past_kvs}\n") @@ -245,16 +240,15 @@ async def infer_tensor( return_values = ( input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), - json.dumps({"cached_iids": cached_iids}), + json.dumps({"cached_iids": cached_iids}), is_finished ) - if DEBUG >= 2: + if DEBUG >= 4: print(f"return_values: {return_values}") return return_values - async def ensure_shard(self, shard: Shard): """ Ensure the model shard is loaded and ready for inference. @@ -265,12 +259,10 @@ async def ensure_shard(self, shard: Shard): if self.shard == shard: return - if DEBUG >= 2: + if DEBUG >= 4: print(f"Loading new shard: {shard}") model_path = await self.shard_downloader.ensure_shard(shard) - if DEBUG >= 2: - print(f"model_path: {model_path}") self.stateful_sharded_model = ShardedHuggingFaceModel( shard=shard, diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/pytorch/model/hf.py index 9c30d8aef..d6038f043 100644 --- a/exo/inference/pytorch/model/hf.py +++ b/exo/inference/pytorch/model/hf.py @@ -1,6 +1,5 @@ import torch import torch.nn as nn -import numpy as np from typing import Tuple, Optional, Union, List from exo.inference.shard import Shard @@ -69,7 +68,6 @@ def __init__( print(f"error loading and splitting model: {err}") raise - def forward( self, input_ids: Optional[torch.Tensor] = None, @@ -84,8 +82,8 @@ def forward( layer to generate a complete output Args: - model: base llm model tramsformers class - llm_model: llm chat model class + model: base llm model tramsformers class + llm_model: llm chat model class input_ids: tensor optional attention_mask: tensor optional past_key_values: Cache or list[tensor] optional @@ -95,7 +93,7 @@ def forward( Returns: Tuple of - hidden_states: tensor optional - - past_key_values: Cache or list[tensor] optional + - past_key_values: Cache or list[tensor] optional - logits: tensor Optional """ @@ -132,7 +130,6 @@ def forward( print(f"position_ids: {self.position_ids}") print(f"past_key_values: {past_key_values}") - if self.hidden_states is None: # casual mask and attention_mask self.attention_mask = attention_mask @@ -169,7 +166,7 @@ def forward( if DEBUG >= 4: print(f"model_inputs: {model_inputs}") - # run through decoder layers + # run through decoder layers layer_amt = range(self.shard.start_layer, self.shard.end_layer + 1) if DEBUG >= 4: @@ -186,8 +183,8 @@ def forward( print(f"position_embeddings: {self.position_embeddings}") # TODO: fix caching as decoder layer is not returning - # present_key_value from attention layer on models - # might have some other generation functions needed to do it + # present_key_value from attention layer on models + # might have some other generation functions needed to do it # see https://github.com/huggingface/transformers/blob/main/src/transformers/generation/utils.py#L2917 # for qwen2 exhttps://github.com/huggingface/transformers/blob/main/src/transformers/models/qwen2/modeling_qwen2.py#L291 layer_outputs = decoder_layer( @@ -217,8 +214,8 @@ def forward( self.past_key_values = self.next_decoder_cache.to_legacy_cache() else: self.past_key_values = self.next_decoder_cache - - # lm_head + + # lm_head logits = self.llm_model.lm_head(self.hidden_states).to(self.device) if DEBUG >= 4: From b518f73fcf4e803431b619a50012226aa7b92e78 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 7 Oct 2024 12:06:29 -0800 Subject: [PATCH 394/589] comma and other text issue fix --- exo/api/chatgpt_api.py | 9 +-------- exo/inference/pytorch/inference.py | 1 + 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/exo/api/chatgpt_api.py b/exo/api/chatgpt_api.py index fe8cc5906..9a65deaec 100644 --- a/exo/api/chatgpt_api.py +++ b/exo/api/chatgpt_api.py @@ -71,16 +71,9 @@ def generate_completion( } choice = completion["choices"][0] - print(f"\nchoice {choice}") if object_type.startswith("chat.completion"): key_name = "delta" if stream else "message" - - token_decode = tokenizer.batch_decode( - tokens, - skip_special_tokens=True, - clean_up_tokenization_spaces=False - ) - choice[key_name] = {"role": "assistant", "content": token_decode} + choice[key_name] = {"role": "assistant", "content": tokenizer.decode(tokens)} elif object_type == "text_completion": choice["text"] = tokenizer.decode(tokens) else: diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 11f8eddb3..676e31620 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -14,6 +14,7 @@ from exo.download.hf.hf_shard_download import HFShardDownloader from transformers import AutoTokenizer + # llama from transformers.models.llama.modeling_llama import LlamaModel From 9d2477952769a3415c60350e6291d12567ba8aef Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 8 Oct 2024 23:17:26 -0800 Subject: [PATCH 395/589] adding threadpooling to forward and logit sampling --- exo/inference/pytorch/inference.py | 66 ++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 12 deletions(-) diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index 676e31620..a613015ed 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -1,11 +1,14 @@ # experimental, based off of tinygrad/inference.py +import asyncio import os import re import numpy as np import torch import json +import functools +from concurrent.futures import ThreadPoolExecutor -from typing import Optional, Tuple +from typing import Optional, Tuple, Union, List from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel @@ -13,7 +16,7 @@ from exo.helpers import DEBUG from exo.download.hf.hf_shard_download import HFShardDownloader -from transformers import AutoTokenizer +from transformers import AutoTokenizer, Cache # llama from transformers.models.llama.modeling_llama import LlamaModel @@ -39,8 +42,6 @@ def __init__(self, shard_downloader: HFShardDownloader): """ self.shard = None self.shard_downloader = shard_downloader - self.stateful_sharded_model = None - self.tokenizer = None # the whole history with new logits need to # be passed to the model to reach the end token @@ -59,15 +60,15 @@ def __init__(self, shard_downloader: HFShardDownloader): if torch.cuda.is_available(): self.device = torch.device("cuda") self.torch_dtype = torch.float32 - elif torch.backends.mps.is_available(): + elif torch.backends.mps.is_available() and torch.backends.mps.is_built(): self.device = torch.device("mps") self.torch_dtype = torch.float32 else: self.device = torch.device("cpu") self.torch_dtype = torch.float16 - # setup unfinished sequence - self.unfinished_sequences = torch.ones(1, dtype=torch.long, device=self.device) + # setup threadding + torch.set_num_threads(torch.get_num_threads()) def infer_caching( self, @@ -98,6 +99,44 @@ def infer_caching( return (past_iids, cached_iids) + async def async_forward( + self, + input_ids: Optional[torch.Tensor] = None, + hidden_states: Optional[torch.Tensor] = None, + attention_mask: Optional[torch.Tensor] = None + ) -> Tuple[Optional[torch.Tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.Tensor]]: + + loop = asyncio.get_running_loop() + + forward_partial = functools.partial( + self.stateful_sharded_model.forward, + input_ids=input_ids, + hidden_states=hidden_states, + attention_mask=attention_mask + ) + + with ThreadPoolExecutor() as pool: + result = await loop.run_in_executor(pool, forward_partial) + + return result + + async def async_logit_sample( + self, + logits: torch.Tensor + ) -> torch.Tensor: + + loop = asyncio.get_running_loop() + + sample_partial = functools.partial( + self.stateful_sharded_model.logits_sample, + logits=logits + ) + + with ThreadPoolExecutor() as pool: + result = await loop.run_in_executor(pool, sample_partial) + + return result + async def infer_prompt( self, request_id: str, @@ -129,7 +168,7 @@ async def infer_prompt( if DEBUG >= 4: print(f"past_input_ids: {self.past_input_ids}\n") - shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( + shard_hidden_states, shard_past_kvs, shard_logits = await self.async_forward( input_ids=self.past_input_ids, attention_mask=input_attention_mask ) @@ -141,7 +180,7 @@ async def infer_prompt( next_token = None if shard_logits is not None: - next_token = self.stateful_sharded_model.logits_sample(shard_logits) + next_token = await self.async_logit_sample(shard_logits) self.past_input_ids = torch.cat([input_ids, next_token[:, None].squeeze(-1)], dim=-1) input_ids = next_token @@ -206,24 +245,27 @@ async def infer_tensor( print(f"hidden_state: {hidden_states}") print(f"inference_state: {inference_state}") - shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( + shard_hidden_states, shard_past_kvs, shard_logits = await self.async_forward( input_ids=self.past_input_ids, hidden_states=hidden_states ) next_token = None if shard_logits is not None: - next_token = self.stateful_sharded_model.logits_sample(shard_logits) + next_token = await self.async_logit_sample(shard_logits) input_ids = next_token #cache + next_cached_logits = None if next_token is not None: if self.past_input_ids is not None: next_cached_logits = torch.cat([self.past_input_ids, next_token], dim=-1).to(self.device) elif past_iids is not None: next_cached_logits = torch.cat([past_iids, next_token], dim=-1).to(self.device) - cached_iids = {"input_ids": next_cached_logits.tolist()} + cached_iids = { + "input_ids": next_cached_logits.tolist() if next_cached_logits is not None else [] + } is_finished = False if next_token is not None: From d4fb74fa7e8a09348f56f0b237cd492179e30cac Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Thu, 10 Oct 2024 13:09:58 -0700 Subject: [PATCH 396/589] rename (PyTorch, pytorch) -> (Torch, torch) --- exo/inference/inference_engine.py | 6 +++--- exo/inference/{pytorch => torch}/.gitignore | 0 exo/inference/{pytorch => torch}/README.md | 0 exo/inference/{pytorch => torch}/__init__.py | 0 exo/inference/{pytorch => torch}/inference.py | 18 +++++++++--------- .../{pytorch => torch}/model/__init__.py | 0 exo/inference/{pytorch => torch}/model/hf.py | 0 .../{pytorch => torch}/tests/__init__.py | 0 .../tests/test_inference_engine.py | 10 +++++----- .../tests/test_simple_model.py | 0 .../tests/test_split_model.py | 0 .../{pytorch => torch}/tests/utils.py | 0 exo/models.py | 10 +++++----- 13 files changed, 22 insertions(+), 22 deletions(-) rename exo/inference/{pytorch => torch}/.gitignore (100%) rename exo/inference/{pytorch => torch}/README.md (100%) rename exo/inference/{pytorch => torch}/__init__.py (100%) rename exo/inference/{pytorch => torch}/inference.py (94%) rename exo/inference/{pytorch => torch}/model/__init__.py (100%) rename exo/inference/{pytorch => torch}/model/hf.py (100%) rename exo/inference/{pytorch => torch}/tests/__init__.py (100%) rename exo/inference/{pytorch => torch}/tests/test_inference_engine.py (90%) rename exo/inference/{pytorch => torch}/tests/test_simple_model.py (100%) rename exo/inference/{pytorch => torch}/tests/test_split_model.py (100%) rename exo/inference/{pytorch => torch}/tests/utils.py (100%) diff --git a/exo/inference/inference_engine.py b/exo/inference/inference_engine.py index 2b98adbe8..7fd7528b4 100644 --- a/exo/inference/inference_engine.py +++ b/exo/inference/inference_engine.py @@ -27,8 +27,8 @@ def get_inference_engine(inference_engine_name: str, shard_downloader: 'ShardDow tinygrad.helpers.DEBUG.value = int(os.getenv("TINYGRAD_DEBUG", default="0")) return TinygradDynamicShardInferenceEngine(shard_downloader) - elif inference_engine_name == "pytorch": - from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine - return PyTorchDynamicShardInferenceEngine(shard_downloader) + elif inference_engine_name == "torch": + from exo.inference.torch.inference import TorchDynamicShardInferenceEngine + return TorchDynamicShardInferenceEngine(shard_downloader) else: raise ValueError(f"Inference engine {inference_engine_name} not supported") diff --git a/exo/inference/pytorch/.gitignore b/exo/inference/torch/.gitignore similarity index 100% rename from exo/inference/pytorch/.gitignore rename to exo/inference/torch/.gitignore diff --git a/exo/inference/pytorch/README.md b/exo/inference/torch/README.md similarity index 100% rename from exo/inference/pytorch/README.md rename to exo/inference/torch/README.md diff --git a/exo/inference/pytorch/__init__.py b/exo/inference/torch/__init__.py similarity index 100% rename from exo/inference/pytorch/__init__.py rename to exo/inference/torch/__init__.py diff --git a/exo/inference/pytorch/inference.py b/exo/inference/torch/inference.py similarity index 94% rename from exo/inference/pytorch/inference.py rename to exo/inference/torch/inference.py index a613015ed..093724c1b 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/torch/inference.py @@ -11,7 +11,7 @@ from typing import Optional, Tuple, Union, List from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine -from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel +from exo.inference.torch.model.hf import ShardedHuggingFaceModel from exo.inference.tokenizers import resolve_tokenizer from exo.helpers import DEBUG from exo.download.hf.hf_shard_download import HFShardDownloader @@ -28,9 +28,9 @@ MAX_LENGTH = 125 MAX_TIME = 60.0 -class PyTorchDynamicShardInferenceEngine(InferenceEngine): +class TorchDynamicShardInferenceEngine(InferenceEngine): """ - PyTorch Dynamic Shard Inference Engine for performing model inference with sharded Pytorch/HF based models. + Torch Dynamic Shard Inference Engine for performing model inference with sharded Pytorch/HF based models. """ def __init__(self, shard_downloader: HFShardDownloader): @@ -49,13 +49,13 @@ def __init__(self, shard_downloader: HFShardDownloader): self.past_input_ids = None # setup cuda device - if os.environ.get("PYTORCH_DEVICE"): - pytorch_device = os.environ["PYTOCH_DEVICE"] - if pytorch_device not in ["cuda", "mps"]: - pytorch_device = "cpu" + if os.environ.get("TORCH_DEVICE"): + torch_device = os.environ["PYTOCH_DEVICE"] + if torch_device not in ["cuda", "mps"]: + torch_device = "cpu" - self.device = pytorch_device - self.torch_dtype = torch.float16 if pytorch_device != "cpu" else torch.float32 + self.device = torch_device + self.torch_dtype = torch.float16 if torch_device != "cpu" else torch.float32 if torch.cuda.is_available(): self.device = torch.device("cuda") diff --git a/exo/inference/pytorch/model/__init__.py b/exo/inference/torch/model/__init__.py similarity index 100% rename from exo/inference/pytorch/model/__init__.py rename to exo/inference/torch/model/__init__.py diff --git a/exo/inference/pytorch/model/hf.py b/exo/inference/torch/model/hf.py similarity index 100% rename from exo/inference/pytorch/model/hf.py rename to exo/inference/torch/model/hf.py diff --git a/exo/inference/pytorch/tests/__init__.py b/exo/inference/torch/tests/__init__.py similarity index 100% rename from exo/inference/pytorch/tests/__init__.py rename to exo/inference/torch/tests/__init__.py diff --git a/exo/inference/pytorch/tests/test_inference_engine.py b/exo/inference/torch/tests/test_inference_engine.py similarity index 90% rename from exo/inference/pytorch/tests/test_inference_engine.py rename to exo/inference/torch/tests/test_inference_engine.py index 854d9b9c9..e8b0b14c3 100644 --- a/exo/inference/pytorch/tests/test_inference_engine.py +++ b/exo/inference/torch/tests/test_inference_engine.py @@ -1,7 +1,7 @@ import asyncio from exo.inference.shard import Shard -from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine +from exo.inference.torch.inference import TorchDynamicShardInferenceEngine from exo.download.hf.hf_shard_download import HFShardDownloader from exo.inference.inference_engine import InferenceEngine from exo.inference.shard import Shard @@ -120,8 +120,8 @@ async def test_inference_engine( # try: # print("\n\n -------- TEST QWEN2 -------- \n\n") # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # TorchDynamicShardInferenceEngine(HFShardDownloader()), + # TorchDynamicShardInferenceEngine(HFShardDownloader()), # "Qwen/Qwen2-0.5B-Instruct", # 24 # )) @@ -131,8 +131,8 @@ async def test_inference_engine( try: print("\n-------- Test meta-llama/Llama-3.2-1B-Instruct ----------\n") asyncio.run(test_inference_engine( - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + TorchDynamicShardInferenceEngine(HFShardDownloader()), + TorchDynamicShardInferenceEngine(HFShardDownloader()), "meta-llama/Llama-3.2-1B-Instruct", 24 )) diff --git a/exo/inference/pytorch/tests/test_simple_model.py b/exo/inference/torch/tests/test_simple_model.py similarity index 100% rename from exo/inference/pytorch/tests/test_simple_model.py rename to exo/inference/torch/tests/test_simple_model.py diff --git a/exo/inference/pytorch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py similarity index 100% rename from exo/inference/pytorch/tests/test_split_model.py rename to exo/inference/torch/tests/test_split_model.py diff --git a/exo/inference/pytorch/tests/utils.py b/exo/inference/torch/tests/utils.py similarity index 100% rename from exo/inference/pytorch/tests/utils.py rename to exo/inference/torch/tests/utils.py diff --git a/exo/models.py b/exo/models.py index 7d6bc30ad..b6a7092b5 100644 --- a/exo/models.py +++ b/exo/models.py @@ -4,7 +4,7 @@ ### llama "llama-3.2-1b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Llama-3.2-1B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=16), - "PyTorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Llama-3.2-1B-Instruct", start_layer=0, end_layer=0, n_layers=16), + "TorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Llama-3.2-1B-Instruct", start_layer=0, end_layer=0, n_layers=16), }, "llama-3.2-3b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Llama-3.2-3B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28), @@ -12,7 +12,7 @@ "llama-3.1-8b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32), "TinygradDynamicShardInferenceEngine": Shard(model_id="mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", start_layer=0, end_layer=0, n_layers=32), - "PyTorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct", start_layer=0, end_layer=0, n_layers=32), + "TorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct", start_layer=0, end_layer=0, n_layers=32), }, "llama-3.1-70b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), @@ -32,10 +32,10 @@ "TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-70B-R", start_layer=0, end_layer=0, n_layers=80), }, "llama-3-2B-Base": { - "PyTorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-2B-Base", start_layer=0, end_layer=0, n_layers=6), + "TorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-2B-Base", start_layer=0, end_layer=0, n_layers=6), }, "llama-3-1B-Base": { - "PyTorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-1B-Base", start_layer=0, end_layer=0, n_layers=3), + "TorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-1B-Base", start_layer=0, end_layer=0, n_layers=3), }, ### mistral "mistral-nemo": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Nemo-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=40),}, @@ -68,6 +68,6 @@ "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Math-72B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), }, "qwen2-0.5b-instruct": { - "PyTorchDynamicShardInferenceEngine": Shard(model_id="Qwen/Qwen2-0.5B-Instruct", start_layer=0, end_layer=0, n_layers=24), + "TorchDynamicShardInferenceEngine": Shard(model_id="Qwen/Qwen2-0.5B-Instruct", start_layer=0, end_layer=0, n_layers=24), }, } From edf1c3d0003b488ab27443fadf217fe554c5faa2 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Thu, 10 Oct 2024 13:13:20 -0700 Subject: [PATCH 397/589] add ci jobs for chatgpt_api_integration_test_torch_linux_cpu and chatgpt_api_integration_test_torch_mac --- .circleci/config.yml | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index ba5f59687..92b4d6e1a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -178,6 +178,50 @@ jobs: inference_engine: mlx model_id: llama-3.1-8b + chatgpt_api_integration_test_torch_linux_cpu: + machine: + image: ubuntu-2404:2024.08.1 + resource_class: large + steps: + - checkout + - run: + name: Set up Python + command: | + brew install python@3.12 + python3.12 -m venv env + source env/bin/activate + - run: + name: Install dependencies + command: | + source env/bin/activate + pip install --upgrade pip + pip install . + - run_chatgpt_api_test: + inference_engine: torch + model_id: llama-3.2-1b + + chatgpt_api_integration_test_torch_mac: + macos: + xcode: "15.4.0" + resource_class: macos.m1.large.gen1 + steps: + - checkout + - run: + name: Set up Python + command: | + brew install python@3.12 + python3.12 -m venv env + source env/bin/activate + - run: + name: Install dependencies + command: | + source env/bin/activate + pip install --upgrade pip + pip install . + - run_chatgpt_api_test: + inference_engine: torch + model_id: llama-3.2-1b + test_macos_m1: macos: xcode: "15.4.0" From 0fd6711723debb03524b7c0878655e16fb2942b2 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Thu, 10 Oct 2024 13:17:49 -0700 Subject: [PATCH 398/589] add ci jobs for chatgpt_api_integration_test_torch_linux_cpu and chatgpt_api_integration_test_torch_mac --- .circleci/config.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 92b4d6e1a..c9adbd2c7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -260,4 +260,6 @@ workflows: - discovery_integration_test - chatgpt_api_integration_test_mlx - test_macos_m1 + - chatgpt_api_integration_test_torch_linux_cpu + - chatgpt_api_integration_test_torch_mac # - chatgpt_api_integration_test_tinygrad \ No newline at end of file From a4feeab9bd955c178fb75ec9c8a560e9d39a1bae Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Thu, 10 Oct 2024 13:26:18 -0700 Subject: [PATCH 399/589] ci filters --- .circleci/config.yml | 66 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c9adbd2c7..e9b23b6f0 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -256,10 +256,62 @@ workflows: version: 2 build_and_test: jobs: - - unit_test - - discovery_integration_test - - chatgpt_api_integration_test_mlx - - test_macos_m1 - - chatgpt_api_integration_test_torch_linux_cpu - - chatgpt_api_integration_test_torch_mac - # - chatgpt_api_integration_test_tinygrad \ No newline at end of file + - approve_run: + type: approval + requires: [] + filters: + branches: + ignore: main + - unit_test: + requires: + - approve_run + - discovery_integration_test: + requires: + - approve_run + - chatgpt_api_integration_test_mlx: + requires: + - approve_run + - test_macos_m1: + requires: + - approve_run + - chatgpt_api_integration_test_torch_linux_cpu: + requires: + - approve_run + - chatgpt_api_integration_test_torch_mac: + requires: + - approve_run + # - chatgpt_api_integration_test_tinygrad: + # requires: + # - approve_run + + # Run jobs without approval on the main branch + main_branch_workflow: + jobs: + - unit_test: + filters: + branches: + only: main + - discovery_integration_test: + filters: + branches: + only: main + - chatgpt_api_integration_test_mlx: + filters: + branches: + only: main + - test_macos_m1: + filters: + branches: + only: main + - chatgpt_api_integration_test_torch_linux_cpu: + filters: + branches: + only: main + - chatgpt_api_integration_test_torch_mac: + filters: + branches: + only: main + # - chatgpt_api_integration_test_tinygrad: + # filters: + # branches: + # only: main \ No newline at end of file From 55fd48247d985a9b18ef3bda992a2d6e293b6330 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Thu, 10 Oct 2024 13:34:34 -0700 Subject: [PATCH 400/589] rm comments --- .circleci/config.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index e9b23b6f0..71fada3df 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -280,9 +280,6 @@ workflows: - chatgpt_api_integration_test_torch_mac: requires: - approve_run - # - chatgpt_api_integration_test_tinygrad: - # requires: - # - approve_run # Run jobs without approval on the main branch main_branch_workflow: @@ -311,7 +308,3 @@ workflows: filters: branches: only: main - # - chatgpt_api_integration_test_tinygrad: - # filters: - # branches: - # only: main \ No newline at end of file From da39519fe8735e20286d0dcb72b621811767e3da Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Thu, 10 Oct 2024 13:38:32 -0700 Subject: [PATCH 401/589] ci --- .circleci/config.yml | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 71fada3df..b14383351 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -281,7 +281,23 @@ workflows: requires: - approve_run - # Run jobs without approval on the main branch + # Workflow for forked PRs without approval + forked_pr_workflow: + jobs: + - unit_test + - discovery_integration_test + - chatgpt_api_integration_test_mlx + - test_macos_m1 + - chatgpt_api_integration_test_torch_linux_cpu + - chatgpt_api_integration_test_torch_mac + # The trigger condition ensures this workflow runs for forked PRs + triggers: + - type: pull_request + filters: + branches: + ignore: main + + # Existing workflow for main branch main_branch_workflow: jobs: - unit_test: @@ -307,4 +323,4 @@ workflows: - chatgpt_api_integration_test_torch_mac: filters: branches: - only: main + only: main \ No newline at end of file From 5eb6c34fb2e6a4e82ba680b468a662a97f5d9509 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 10 Oct 2024 17:32:51 -0800 Subject: [PATCH 402/589] fixed torch device selection --- exo/inference/pytorch/README.md | 31 +++++++----------------------- exo/inference/pytorch/inference.py | 26 ++++++++++++------------- 2 files changed, 19 insertions(+), 38 deletions(-) diff --git a/exo/inference/pytorch/README.md b/exo/inference/pytorch/README.md index 670c8df63..5cbeeef60 100644 --- a/exo/inference/pytorch/README.md +++ b/exo/inference/pytorch/README.md @@ -1,26 +1,9 @@ # PyTorch & HuggingFace inference engine -Experimental, still under development - -## Install -Install needed py modules, make sure to be using CUDA 12.4 for the PyTorch install - -```console -$ pip install torch --index-url https://download.pytorch.org/whl/cu124 -$ pip install transformers accelerate -``` - -After installing accelerate you get hit with a dependency error, for now ignore until we can fix this as exo works fine with 1.26.4 - -```console -ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. -exo 0.0.1 requires numpy==2.0.0, but you have numpy 1.26.4 which is incompatible. -``` - -## Low VRAM Notes - -- When trying to do disk_offload getting the error "Cannot copy out of meta tensor; no data!", looking up the error it is tied to (low vram)[https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/13087#issuecomment-2080272004] - -## Multiple GPU in 1 Notes -### Running multiple GPUs on 1 machine -- Getting error "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:1! (when checking argument for argument tensors in method wrapper_CUDA_cat)" +## Notes/Issues +### 10/10/2024 +- To select a pytorch device via environment variables, set the variable TORCH_DEVICE +- - XLA is currently not installed and will need to be added to inference.py, looking into doing this on a TPU VM +- - With pytorch, CUDA and ROCm are the same so specifying CUDA also enables ROCm support. See this [post](https://github.com/pytorch/pytorch/issues/55223#issuecomment-812587373) +- - Looking into adding mobile device support properly +- If device is not CPU the data type defaults to float32 else float16. diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py index a613015ed..04bda3003 100644 --- a/exo/inference/pytorch/inference.py +++ b/exo/inference/pytorch/inference.py @@ -2,12 +2,14 @@ import asyncio import os import re -import numpy as np -import torch import json import functools from concurrent.futures import ThreadPoolExecutor +import numpy as np + +import torch + from typing import Optional, Tuple, Union, List from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine @@ -49,23 +51,19 @@ def __init__(self, shard_downloader: HFShardDownloader): self.past_input_ids = None # setup cuda device - if os.environ.get("PYTORCH_DEVICE"): - pytorch_device = os.environ["PYTOCH_DEVICE"] - if pytorch_device not in ["cuda", "mps"]: - pytorch_device = "cpu" - - self.device = pytorch_device - self.torch_dtype = torch.float16 if pytorch_device != "cpu" else torch.float32 - - if torch.cuda.is_available(): + if os.environ.get("TORCH_DEVICE"): + self.device = torch.device(os.environ["TORCH_DEVICE"]) + elif torch.cuda.is_available(): self.device = torch.device("cuda") - self.torch_dtype = torch.float32 elif torch.backends.mps.is_available() and torch.backends.mps.is_built(): self.device = torch.device("mps") - self.torch_dtype = torch.float32 else: self.device = torch.device("cpu") - self.torch_dtype = torch.float16 + + torch.set_default_device(self.device) + + # setup cude dtype + self.torch_dtype = torch.float32 if self.device != torch.device('cpu') else torch.float16 # setup threadding torch.set_num_threads(torch.get_num_threads()) From 18d41ebf79ebd288a4fe3e1d571d420d54ff6a80 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 10 Oct 2024 17:46:12 -0800 Subject: [PATCH 403/589] fixing imports --- exo/inference/pytorch/inference.py | 332 +++++++++++++++++++++++++++++ 1 file changed, 332 insertions(+) create mode 100644 exo/inference/pytorch/inference.py diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py new file mode 100644 index 000000000..0684aa13c --- /dev/null +++ b/exo/inference/pytorch/inference.py @@ -0,0 +1,332 @@ +# experimental, based off of tinygrad/inference.py +import asyncio +import os +import re +import json +import functools +from concurrent.futures import ThreadPoolExecutor +from typing import Optional, Tuple, Union, List + +import numpy as np +import torch + +from exo.inference.shard import Shard +from exo.inference.inference_engine import InferenceEngine +from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel +from exo.inference.tokenizers import resolve_tokenizer +from exo.helpers import DEBUG +from exo.download.hf.hf_shard_download import HFShardDownloader + +from transformers import AutoTokenizer, Cache +# llama +from transformers.models.llama.modeling_llama import LlamaModel + +# model value options +TOP_K = 20 +TEMP = 0.6 +TOP_P = 0.9 +MAX_LENGTH = 125 +MAX_TIME = 60.0 + +class PyTorchDynamicShardInferenceEngine(InferenceEngine): + """ + PyTorch Dynamic Shard Inference Engine for performing model inference with sharded Pytorch/HF based models. + """ + + def __init__(self, shard_downloader: HFShardDownloader): + """ + Initialize the inference engine. + + Args: + shard_downloader: Model and weights sharding download + """ + self.shard = None + self.shard_downloader = shard_downloader + + # the whole history with new logits need to + # be passed to the model to reach the end token + # even with caching + self.past_input_ids = None + + # setup cuda device + if os.environ.get("TORCH_DEVICE"): + self.device = torch.device(os.environ["TORCH_DEVICE"]) + elif torch.cuda.is_available(): + self.device = torch.device("cuda") + elif torch.backends.mps.is_available() and torch.backends.mps.is_built(): + self.device = torch.device("mps") + else: + self.device = torch.device("cpu") + + torch.set_default_device(self.device) + + # setup cude dtype + self.torch_dtype = torch.float32 if self.device != torch.device('cpu') else torch.float16 + + # setup threadding + torch.set_num_threads(torch.get_num_threads()) + + def infer_caching( + self, + inference_state: Optional[str] = None + ) -> Tuple[Optional[torch.Tensor], Optional[dict]]: + """ + inference caching from inference_state json + """ + # setup cache and cached input_ids + past_iids = None + cached_iids = None + if inference_state is not None: + try: + infer_state = json.loads(inference_state) + except ValueError: + infer_state = None + + if infer_state is not None: + cached_iids = infer_state["cached_iids"] + if cached_iids is not None: + past_iids = None + if len(cached_iids) > 0: + past_iids = torch.tensor(cached_iids["input_ids"]).to(self.device) + cached_iids = {"input_ids": past_iids.tolist()} + + if DEBUG >= 4: + print(f"cached_iids: {cached_iids}") + + return (past_iids, cached_iids) + + async def async_forward( + self, + input_ids: Optional[torch.Tensor] = None, + hidden_states: Optional[torch.Tensor] = None, + attention_mask: Optional[torch.Tensor] = None + ) -> Tuple[Optional[torch.Tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.Tensor]]: + + loop = asyncio.get_running_loop() + + forward_partial = functools.partial( + self.stateful_sharded_model.forward, + input_ids=input_ids, + hidden_states=hidden_states, + attention_mask=attention_mask + ) + + with ThreadPoolExecutor() as pool: + result = await loop.run_in_executor(pool, forward_partial) + + return result + + async def async_logit_sample( + self, + logits: torch.Tensor + ) -> torch.Tensor: + + loop = asyncio.get_running_loop() + + sample_partial = functools.partial( + self.stateful_sharded_model.logits_sample, + logits=logits + ) + + with ThreadPoolExecutor() as pool: + result = await loop.run_in_executor(pool, sample_partial) + + return result + + async def infer_prompt( + self, + request_id: str, + shard: Shard, + prompt: str, + image_str: Optional[str] = None, + inference_state: Optional[str] = None + ) -> Tuple[np.ndarray, str, bool]: + if DEBUG >= 4: + print("infer_prompt called") + print(f"prompt: {prompt}") + print(f"shard: {shard}") + print(f"inference_state: {inference_state}") + + await self.ensure_shard(shard) + + inputs = self.tokenizer([prompt], return_tensors="pt") + input_ids = inputs.input_ids.to(self.device) + input_attention_mask = inputs.attention_mask.to(self.device) + + # get cache from inference_state + past_iids, cached_iids = self.infer_caching(inference_state) + + if past_iids is not None: + self.past_input_ids = past_iids + else: + self.past_input_ids = input_ids + + if DEBUG >= 4: + print(f"past_input_ids: {self.past_input_ids}\n") + + shard_hidden_states, shard_past_kvs, shard_logits = await self.async_forward( + input_ids=self.past_input_ids, + attention_mask=input_attention_mask + ) + + if DEBUG >= 4: + print(f"\nshard_hidden_states: {shard_hidden_states}\n") + print(f"\nshard_past_kvs {shard_past_kvs}\n") + print(f"\nshard_logits: {shard_logits}") + + next_token = None + if shard_logits is not None: + next_token = await self.async_logit_sample(shard_logits) + self.past_input_ids = torch.cat([input_ids, next_token[:, None].squeeze(-1)], dim=-1) + input_ids = next_token + + if self.past_input_ids is not None: + cached_iids = {"input_ids": self.past_input_ids.tolist()} + + is_finished = False + if next_token is not None: + is_finished = next_token.item() == self.tokenizer.eos_token_id + + if DEBUG >= 4: + print(f"\ninput_ids: {input_ids}") + print(f"\nshard_hidden_states: {shard_hidden_states}\n") + print(f"\nshard_past_kvs {shard_past_kvs}\n") + print(f"\nshard_logits: {shard_logits}") + + return_values = ( + input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), + json.dumps({"cached_iids": cached_iids}), + is_finished + ) + + if DEBUG >= 4: + print(f"return_values: {return_values}") + + return return_values + + async def infer_tensor( + self, + request_id: str, + shard: Shard, + input_data: np.ndarray, + inference_state: Optional[str] = None + ) -> Tuple[np.ndarray, str, bool]: + if DEBUG >= 4: + print("infer_tensor called") + print(f"input_data: {input_data}") + print(f"shard: {shard}") + print(f"inference_state: {inference_state}") + + await self.ensure_shard(shard) + + input_ids = torch.tensor(input_data).to(self.device) + + # get cache from inference_state + past_iids, cached_iids = self.infer_caching(inference_state) + + # detect if hidden_states or not + hidden_states = None + self.past_input_ids = None + if input_ids.size()[-1] > 1: + hidden_states = input_ids + self.past_input_ids = past_iids + else: + if past_iids is not None: + self.past_input_ids = past_iids + else: + self.past_input_ids = input_ids + + if DEBUG >= 4: + print(f"past_input_ids: {self.past_input_ids}") + print(f"hidden_state: {hidden_states}") + print(f"inference_state: {inference_state}") + + shard_hidden_states, shard_past_kvs, shard_logits = await self.async_forward( + input_ids=self.past_input_ids, + hidden_states=hidden_states + ) + + next_token = None + if shard_logits is not None: + next_token = await self.async_logit_sample(shard_logits) + input_ids = next_token + + #cache + next_cached_logits = None + if next_token is not None: + if self.past_input_ids is not None: + next_cached_logits = torch.cat([self.past_input_ids, next_token], dim=-1).to(self.device) + elif past_iids is not None: + next_cached_logits = torch.cat([past_iids, next_token], dim=-1).to(self.device) + + cached_iids = { + "input_ids": next_cached_logits.tolist() if next_cached_logits is not None else [] + } + + is_finished = False + if next_token is not None: + is_finished = next_token.item() == self.tokenizer.eos_token_id + + if is_finished: + # clear cache + cached_iids = {"input_ids": []} + + if DEBUG >= 4: + print(f"\ninput_ids: {input_ids}") + print(f"\nshard_hidden_states: {shard_hidden_states}\n") + print(f"\nshard_past_kvs {shard_past_kvs}\n") + print(f"\nshard_logits: {shard_logits}") + + return_values = ( + input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), + json.dumps({"cached_iids": cached_iids}), + is_finished + ) + + if DEBUG >= 4: + print(f"return_values: {return_values}") + + return return_values + + async def ensure_shard(self, shard: Shard): + """ + Ensure the model shard is loaded and ready for inference. + + Args: + shard (Optional[Shard]): Shard information for the model. + """ + if self.shard == shard: + return + + if DEBUG >= 4: + print(f"Loading new shard: {shard}") + + model_path = await self.shard_downloader.ensure_shard(shard) + + self.stateful_sharded_model = ShardedHuggingFaceModel( + shard=shard, + local_model_path=model_path, + device=self.device, + dtype=self.torch_dtype, + top_k=TOP_K, + temp=TEMP, + top_p=TOP_P, + max_length=MAX_LENGTH, + max_time=MAX_TIME + ) + self.shard = shard + + if isinstance(self.stateful_sharded_model.model, LlamaModel): + self.tokenizer = AutoTokenizer.from_pretrained( + model_path if model_path is not None else shard.model_id, + trust_remote_code=True + ) + + if len(re.findall(r"3\.1", shard.model_id)) > 0: + self.tokenizer.add_special_tokens({"pad_token":""}) + + else: + self.tokenizer = await resolve_tokenizer(shard.model_id) + + if DEBUG >= 4: + print(f"Shard loaded successfully: {shard}") From 9ecbf0c0f138950b5adb0839cb8a5e948dc10897 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 10 Oct 2024 17:48:40 -0800 Subject: [PATCH 404/589] fixing chatgpt_api mistake --- exo/api/chatgpt_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/api/chatgpt_api.py b/exo/api/chatgpt_api.py index befc1b43a..7b7be502d 100644 --- a/exo/api/chatgpt_api.py +++ b/exo/api/chatgpt_api.py @@ -125,7 +125,7 @@ def build_prompt(tokenizer, _messages: List[Message]): continue for content in message.content: - # note: wae only support one image at time right now. Multiple is possible. See: https://github.com/huggingface/transformers/blob/e68ec18ce224af879f22d904c7505a765fb77de3/docs/source/en/model_doc/llava.md?plain=1#L41 + # note: we only support one image at a time right now. Multiple is possible. See: https://github.com/huggingface/transformers/blob/e68ec18ce224af879f22d904c7505a765fb77de3/docs/source/en/model_doc/llava.md?plain=1#L41 # follows the convention in https://platform.openai.com/docs/guides/vision if isinstance(content, dict) and content.get("type", None) == "image": image_str = content.get("image", None) From dae2cbe6b6b1683deac126b58f66813a5b162cd1 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 10 Oct 2024 17:54:07 -0800 Subject: [PATCH 405/589] removing old pytorch folder --- exo/inference/pytorch/inference.py | 332 ----------------------------- 1 file changed, 332 deletions(-) delete mode 100644 exo/inference/pytorch/inference.py diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py deleted file mode 100644 index 0684aa13c..000000000 --- a/exo/inference/pytorch/inference.py +++ /dev/null @@ -1,332 +0,0 @@ -# experimental, based off of tinygrad/inference.py -import asyncio -import os -import re -import json -import functools -from concurrent.futures import ThreadPoolExecutor -from typing import Optional, Tuple, Union, List - -import numpy as np -import torch - -from exo.inference.shard import Shard -from exo.inference.inference_engine import InferenceEngine -from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel -from exo.inference.tokenizers import resolve_tokenizer -from exo.helpers import DEBUG -from exo.download.hf.hf_shard_download import HFShardDownloader - -from transformers import AutoTokenizer, Cache -# llama -from transformers.models.llama.modeling_llama import LlamaModel - -# model value options -TOP_K = 20 -TEMP = 0.6 -TOP_P = 0.9 -MAX_LENGTH = 125 -MAX_TIME = 60.0 - -class PyTorchDynamicShardInferenceEngine(InferenceEngine): - """ - PyTorch Dynamic Shard Inference Engine for performing model inference with sharded Pytorch/HF based models. - """ - - def __init__(self, shard_downloader: HFShardDownloader): - """ - Initialize the inference engine. - - Args: - shard_downloader: Model and weights sharding download - """ - self.shard = None - self.shard_downloader = shard_downloader - - # the whole history with new logits need to - # be passed to the model to reach the end token - # even with caching - self.past_input_ids = None - - # setup cuda device - if os.environ.get("TORCH_DEVICE"): - self.device = torch.device(os.environ["TORCH_DEVICE"]) - elif torch.cuda.is_available(): - self.device = torch.device("cuda") - elif torch.backends.mps.is_available() and torch.backends.mps.is_built(): - self.device = torch.device("mps") - else: - self.device = torch.device("cpu") - - torch.set_default_device(self.device) - - # setup cude dtype - self.torch_dtype = torch.float32 if self.device != torch.device('cpu') else torch.float16 - - # setup threadding - torch.set_num_threads(torch.get_num_threads()) - - def infer_caching( - self, - inference_state: Optional[str] = None - ) -> Tuple[Optional[torch.Tensor], Optional[dict]]: - """ - inference caching from inference_state json - """ - # setup cache and cached input_ids - past_iids = None - cached_iids = None - if inference_state is not None: - try: - infer_state = json.loads(inference_state) - except ValueError: - infer_state = None - - if infer_state is not None: - cached_iids = infer_state["cached_iids"] - if cached_iids is not None: - past_iids = None - if len(cached_iids) > 0: - past_iids = torch.tensor(cached_iids["input_ids"]).to(self.device) - cached_iids = {"input_ids": past_iids.tolist()} - - if DEBUG >= 4: - print(f"cached_iids: {cached_iids}") - - return (past_iids, cached_iids) - - async def async_forward( - self, - input_ids: Optional[torch.Tensor] = None, - hidden_states: Optional[torch.Tensor] = None, - attention_mask: Optional[torch.Tensor] = None - ) -> Tuple[Optional[torch.Tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.Tensor]]: - - loop = asyncio.get_running_loop() - - forward_partial = functools.partial( - self.stateful_sharded_model.forward, - input_ids=input_ids, - hidden_states=hidden_states, - attention_mask=attention_mask - ) - - with ThreadPoolExecutor() as pool: - result = await loop.run_in_executor(pool, forward_partial) - - return result - - async def async_logit_sample( - self, - logits: torch.Tensor - ) -> torch.Tensor: - - loop = asyncio.get_running_loop() - - sample_partial = functools.partial( - self.stateful_sharded_model.logits_sample, - logits=logits - ) - - with ThreadPoolExecutor() as pool: - result = await loop.run_in_executor(pool, sample_partial) - - return result - - async def infer_prompt( - self, - request_id: str, - shard: Shard, - prompt: str, - image_str: Optional[str] = None, - inference_state: Optional[str] = None - ) -> Tuple[np.ndarray, str, bool]: - if DEBUG >= 4: - print("infer_prompt called") - print(f"prompt: {prompt}") - print(f"shard: {shard}") - print(f"inference_state: {inference_state}") - - await self.ensure_shard(shard) - - inputs = self.tokenizer([prompt], return_tensors="pt") - input_ids = inputs.input_ids.to(self.device) - input_attention_mask = inputs.attention_mask.to(self.device) - - # get cache from inference_state - past_iids, cached_iids = self.infer_caching(inference_state) - - if past_iids is not None: - self.past_input_ids = past_iids - else: - self.past_input_ids = input_ids - - if DEBUG >= 4: - print(f"past_input_ids: {self.past_input_ids}\n") - - shard_hidden_states, shard_past_kvs, shard_logits = await self.async_forward( - input_ids=self.past_input_ids, - attention_mask=input_attention_mask - ) - - if DEBUG >= 4: - print(f"\nshard_hidden_states: {shard_hidden_states}\n") - print(f"\nshard_past_kvs {shard_past_kvs}\n") - print(f"\nshard_logits: {shard_logits}") - - next_token = None - if shard_logits is not None: - next_token = await self.async_logit_sample(shard_logits) - self.past_input_ids = torch.cat([input_ids, next_token[:, None].squeeze(-1)], dim=-1) - input_ids = next_token - - if self.past_input_ids is not None: - cached_iids = {"input_ids": self.past_input_ids.tolist()} - - is_finished = False - if next_token is not None: - is_finished = next_token.item() == self.tokenizer.eos_token_id - - if DEBUG >= 4: - print(f"\ninput_ids: {input_ids}") - print(f"\nshard_hidden_states: {shard_hidden_states}\n") - print(f"\nshard_past_kvs {shard_past_kvs}\n") - print(f"\nshard_logits: {shard_logits}") - - return_values = ( - input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), - json.dumps({"cached_iids": cached_iids}), - is_finished - ) - - if DEBUG >= 4: - print(f"return_values: {return_values}") - - return return_values - - async def infer_tensor( - self, - request_id: str, - shard: Shard, - input_data: np.ndarray, - inference_state: Optional[str] = None - ) -> Tuple[np.ndarray, str, bool]: - if DEBUG >= 4: - print("infer_tensor called") - print(f"input_data: {input_data}") - print(f"shard: {shard}") - print(f"inference_state: {inference_state}") - - await self.ensure_shard(shard) - - input_ids = torch.tensor(input_data).to(self.device) - - # get cache from inference_state - past_iids, cached_iids = self.infer_caching(inference_state) - - # detect if hidden_states or not - hidden_states = None - self.past_input_ids = None - if input_ids.size()[-1] > 1: - hidden_states = input_ids - self.past_input_ids = past_iids - else: - if past_iids is not None: - self.past_input_ids = past_iids - else: - self.past_input_ids = input_ids - - if DEBUG >= 4: - print(f"past_input_ids: {self.past_input_ids}") - print(f"hidden_state: {hidden_states}") - print(f"inference_state: {inference_state}") - - shard_hidden_states, shard_past_kvs, shard_logits = await self.async_forward( - input_ids=self.past_input_ids, - hidden_states=hidden_states - ) - - next_token = None - if shard_logits is not None: - next_token = await self.async_logit_sample(shard_logits) - input_ids = next_token - - #cache - next_cached_logits = None - if next_token is not None: - if self.past_input_ids is not None: - next_cached_logits = torch.cat([self.past_input_ids, next_token], dim=-1).to(self.device) - elif past_iids is not None: - next_cached_logits = torch.cat([past_iids, next_token], dim=-1).to(self.device) - - cached_iids = { - "input_ids": next_cached_logits.tolist() if next_cached_logits is not None else [] - } - - is_finished = False - if next_token is not None: - is_finished = next_token.item() == self.tokenizer.eos_token_id - - if is_finished: - # clear cache - cached_iids = {"input_ids": []} - - if DEBUG >= 4: - print(f"\ninput_ids: {input_ids}") - print(f"\nshard_hidden_states: {shard_hidden_states}\n") - print(f"\nshard_past_kvs {shard_past_kvs}\n") - print(f"\nshard_logits: {shard_logits}") - - return_values = ( - input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), - json.dumps({"cached_iids": cached_iids}), - is_finished - ) - - if DEBUG >= 4: - print(f"return_values: {return_values}") - - return return_values - - async def ensure_shard(self, shard: Shard): - """ - Ensure the model shard is loaded and ready for inference. - - Args: - shard (Optional[Shard]): Shard information for the model. - """ - if self.shard == shard: - return - - if DEBUG >= 4: - print(f"Loading new shard: {shard}") - - model_path = await self.shard_downloader.ensure_shard(shard) - - self.stateful_sharded_model = ShardedHuggingFaceModel( - shard=shard, - local_model_path=model_path, - device=self.device, - dtype=self.torch_dtype, - top_k=TOP_K, - temp=TEMP, - top_p=TOP_P, - max_length=MAX_LENGTH, - max_time=MAX_TIME - ) - self.shard = shard - - if isinstance(self.stateful_sharded_model.model, LlamaModel): - self.tokenizer = AutoTokenizer.from_pretrained( - model_path if model_path is not None else shard.model_id, - trust_remote_code=True - ) - - if len(re.findall(r"3\.1", shard.model_id)) > 0: - self.tokenizer.add_special_tokens({"pad_token":""}) - - else: - self.tokenizer = await resolve_tokenizer(shard.model_id) - - if DEBUG >= 4: - print(f"Shard loaded successfully: {shard}") From 55ae0271d995e073cc7df10b47075289fd607ac0 Mon Sep 17 00:00:00 2001 From: Vincent C Date: Thu, 10 Oct 2024 17:56:33 -0800 Subject: [PATCH 406/589] Update README.md cleaning up readme --- exo/inference/torch/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exo/inference/torch/README.md b/exo/inference/torch/README.md index 5cbeeef60..59e73f7c9 100644 --- a/exo/inference/torch/README.md +++ b/exo/inference/torch/README.md @@ -3,7 +3,7 @@ ## Notes/Issues ### 10/10/2024 - To select a pytorch device via environment variables, set the variable TORCH_DEVICE -- - XLA is currently not installed and will need to be added to inference.py, looking into doing this on a TPU VM -- - With pytorch, CUDA and ROCm are the same so specifying CUDA also enables ROCm support. See this [post](https://github.com/pytorch/pytorch/issues/55223#issuecomment-812587373) -- - Looking into adding mobile device support properly + - XLA is currently not installed and will need to be added to inference.py, looking into doing this on a TPU VM + - With pytorch, CUDA and ROCm are the same so specifying CUDA also enables ROCm support. See this [post](https://github.com/pytorch/pytorch/issues/55223#issuecomment-812587373) + - Looking into adding mobile device support properly - If device is not CPU the data type defaults to float32 else float16. From 4b6a86d8f9bcce40df4177bf188e6c195375af6e Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 11 Oct 2024 16:38:45 -0700 Subject: [PATCH 407/589] set all torch models in models.py --- exo/models.py | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/exo/models.py b/exo/models.py index b6a7092b5..3cefe8892 100644 --- a/exo/models.py +++ b/exo/models.py @@ -4,32 +4,39 @@ ### llama "llama-3.2-1b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Llama-3.2-1B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=16), - "TorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Llama-3.2-1B-Instruct", start_layer=0, end_layer=0, n_layers=16), + "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Llama-3.2-1B-Instruct", start_layer=0, end_layer=0, n_layers=16), }, "llama-3.2-3b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Llama-3.2-3B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28), + "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Llama-3.2-3B-Instruct", start_layer=0, end_layer=0, n_layers=28), }, "llama-3.1-8b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32), "TinygradDynamicShardInferenceEngine": Shard(model_id="mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", start_layer=0, end_layer=0, n_layers=32), - "TorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct", start_layer=0, end_layer=0, n_layers=32), + "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Meta-Llama-3.1-8B-Instruct", start_layer=0, end_layer=0, n_layers=32), }, "llama-3.1-70b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), "TinygradDynamicShardInferenceEngine": Shard(model_id="NousResearch/Meta-Llama-3.1-70B-Instruct", start_layer=0, end_layer=0, n_layers=80), + "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Meta-Llama-3.1-70B-Instruct", start_layer=0, end_layer=0, n_layers=80), }, "llama-3.1-70b-bf16": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-bf16-CORRECTED", start_layer=0, end_layer=0, n_layers=80), "TinygradDynamicShardInferenceEngine": Shard(model_id="NousResearch/Meta-Llama-3.1-70B-Instruct", start_layer=0, end_layer=0, n_layers=80), + "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Meta-Llama-3.1-70B-Instruct", start_layer=0, end_layer=0, n_layers=80), + }, + "llama-3.1-405b": { + "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-405B-4bit", start_layer=0, end_layer=0, n_layers=126), }, - "llama-3.1-405b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-405B-4bit", start_layer=0, end_layer=0, n_layers=126),}, "llama-3-8b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32), "TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R", start_layer=0, end_layer=0, n_layers=32), + "TorchDynamicShardInferenceEngine": Shard(model_id="NousResearch/Meta-Llama-3-8B-Instruct", start_layer=0, end_layer=0, n_layers=32), }, "llama-3-70b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), "TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-70B-R", start_layer=0, end_layer=0, n_layers=80), + "TorchDynamicShardInferenceEngine": Shard(model_id="NousResearch/Meta-Llama-3-70B-Instruct", start_layer=0, end_layer=0, n_layers=80), }, "llama-3-2B-Base": { "TorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-2B-Base", start_layer=0, end_layer=0, n_layers=6), @@ -38,34 +45,55 @@ "TorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-1B-Base", start_layer=0, end_layer=0, n_layers=3), }, ### mistral - "mistral-nemo": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Nemo-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=40),}, - "mistral-large": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Large-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=88),}, + "mistral-nemo": { + "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Nemo-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=40), + "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Mistral-Nemo-Instruct-2407", start_layer=0, end_layer=0, n_layers=40), + }, + "mistral-large": { + "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Large-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=88), + "TorchDynamicShardInferenceEngine": Shard(model_id="mistralai/Mistral-Large-Instruct-2407", start_layer=0, end_layer=0, n_layers=88), + }, ### deepseek - "deepseek-coder-v2-lite": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx", start_layer=0, end_layer=0, n_layers=27),}, - "deepseek-coder-v2.5": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/DeepSeek-V2.5-MLX-AQ4_1_64", start_layer=0, end_layer=0, n_layers=60),}, + "deepseek-coder-v2-lite": { + "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx", start_layer=0, end_layer=0, n_layers=27), + "TorchDynamicShardInferenceEngine": Shard(model_id="deepseek-ai/DeepSeek-V2-Lite", start_layer=0, end_layer=0, n_layers=27), + }, + "deepseek-coder-v2.5": { + "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/DeepSeek-V2.5-MLX-AQ4_1_64", start_layer=0, end_layer=0, n_layers=60), + "TorchDynamicShardInferenceEngine": Shard(model_id="deepseek-ai/DeepSeek-V2.5", start_layer=0, end_layer=0, n_layers=60), + }, ### llava - "llava-1.5-7b-hf": {"MLXDynamicShardInferenceEngine": Shard(model_id="llava-hf/llava-1.5-7b-hf", start_layer=0, end_layer=0, n_layers=32),}, + "llava-1.5-7b-hf": { + "MLXDynamicShardInferenceEngine": Shard(model_id="llava-hf/llava-1.5-7b-hf", start_layer=0, end_layer=0, n_layers=32), + }, ### qwen "qwen-2.5-coder-1.5b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-1.5B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28), + "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Qwen2.5-Coder-1.5B-Instruct", start_layer=0, end_layer=0, n_layers=28), }, "qwen-2.5-coder-7b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-7B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28), + "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Qwen2.5-Coder-7B-Instruct", start_layer=0, end_layer=0, n_layers=28), }, "qwen-2.5-7b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-7B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28), + "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Qwen2.5-7B-Instruct", start_layer=0, end_layer=0, n_layers=28), }, "qwen-2.5-math-7b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Math-7B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28), + "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Qwen2.5-Math-7B-Instruct", start_layer=0, end_layer=0, n_layers=28), }, "qwen-2.5-14b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-14B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=48), + "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Qwen2.5-14B-Instruct", start_layer=0, end_layer=0, n_layers=48), }, "qwen-2.5-72b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-72B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), + "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Qwen2.5-72B-Instruct", start_layer=0, end_layer=0, n_layers=80), }, "qwen-2.5-math-72b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Math-72B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), + "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Qwen2.5-Math-72B-Instruct", start_layer=0, end_layer=0, n_layers=80), }, "qwen2-0.5b-instruct": { "TorchDynamicShardInferenceEngine": Shard(model_id="Qwen/Qwen2-0.5B-Instruct", start_layer=0, end_layer=0, n_layers=24), From 830d33d5e17ebdcd41167dc40d8d1c02b104bcca Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 11 Oct 2024 16:39:12 -0700 Subject: [PATCH 408/589] in torch, explicitly set the device when initilaizing the model --- exo/inference/torch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index d6038f043..7e154aeb3 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -59,7 +59,7 @@ def __init__( self.llm_model = AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=self.local_model_path, torch_dtype=self.torch_dtype, - device_map="auto", + device_map={"": self.device}, offload_buffers=True ) From 074dfe3dc965db3838f89ca1d05cb486eddf02ed Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 11 Oct 2024 16:39:16 -0700 Subject: [PATCH 409/589] spacing --- exo/inference/torch/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/inference.py b/exo/inference/torch/inference.py index c5eddabde..bdb71f642 100644 --- a/exo/inference/torch/inference.py +++ b/exo/inference/torch/inference.py @@ -114,7 +114,7 @@ async def async_forward( ) with ThreadPoolExecutor() as pool: - result = await loop.run_in_executor(pool, forward_partial) + result = await loop.run_in_executor(pool, forward_partial) return result From d9cfcc4c20555981ae88188a42270604240777eb Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Fri, 11 Oct 2024 16:53:13 -0700 Subject: [PATCH 410/589] add model mlx-community/Qwen2-0.5B-Instruct-4bit --- exo/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exo/models.py b/exo/models.py index 3cefe8892..fe608f05e 100644 --- a/exo/models.py +++ b/exo/models.py @@ -96,6 +96,7 @@ "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Qwen2.5-Math-72B-Instruct", start_layer=0, end_layer=0, n_layers=80), }, "qwen2-0.5b-instruct": { + "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2-0.5B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=24), "TorchDynamicShardInferenceEngine": Shard(model_id="Qwen/Qwen2-0.5B-Instruct", start_layer=0, end_layer=0, n_layers=24), }, } From 2c056b4fc71e7b57ec0c615364bd35e81f406a66 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 12 Oct 2024 02:56:16 -0800 Subject: [PATCH 411/589] code changes from PR feedback, working on splitting of weights --- exo/inference/torch/inference.py | 123 +++--- exo/inference/torch/model/hf.py | 56 +-- .../torch/tests/test_inference_engine.py | 13 +- .../torch/tests/test_simple_model.py | 13 +- exo/inference/torch/tests/test_split_model.py | 378 ------------------ exo/models.py | 4 +- exo/networking/grpc/grpc_peer_handle.py | 4 +- setup.py | 2 +- 8 files changed, 125 insertions(+), 468 deletions(-) delete mode 100644 exo/inference/torch/tests/test_split_model.py diff --git a/exo/inference/torch/inference.py b/exo/inference/torch/inference.py index c5eddabde..d3f4e8536 100644 --- a/exo/inference/torch/inference.py +++ b/exo/inference/torch/inference.py @@ -1,7 +1,6 @@ # experimental, based off of tinygrad/inference.py import asyncio import os -import re import json import functools from concurrent.futures import ThreadPoolExecutor @@ -17,18 +16,14 @@ from exo.inference.tokenizers import resolve_tokenizer from exo.helpers import DEBUG from exo.download.hf.hf_shard_download import HFShardDownloader +from exo.download.hf.hf_helpers import get_weight_map -from transformers import AutoTokenizer, Cache - -# llama -from transformers.models.llama.modeling_llama import LlamaModel +from transformers import Cache # model value options TOP_K = 20 TEMP = 0.6 TOP_P = 0.9 -MAX_LENGTH = 125 -MAX_TIME = 60.0 class TorchDynamicShardInferenceEngine(InferenceEngine): """ @@ -63,7 +58,7 @@ def __init__(self, shard_downloader: HFShardDownloader): torch.set_default_device(self.device) # setup cude dtype - self.torch_dtype = torch.float32 if self.device != torch.device('cpu') else torch.float16 + self.dtype = torch.get_default_dtype() # setup threadding torch.set_num_threads(torch.get_num_threads()) @@ -103,18 +98,30 @@ async def async_forward( hidden_states: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None ) -> Tuple[Optional[torch.Tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.Tensor]]: + """ + Asynchronously performs the forward pass using a stateful sharded model. - loop = asyncio.get_running_loop() + Args: + input_ids (torch.Tensor, optional): Input token IDs for the model. If not provided, `hidden_states` must be used. + hidden_states (torch.Tensor, optional): Precomputed hidden states to be used instead of `input_ids`. + attention_mask (torch.Tensor, optional): Mask to prevent attention on padding token indices. - forward_partial = functools.partial( - self.stateful_sharded_model.forward, - input_ids=input_ids, - hidden_states=hidden_states, - attention_mask=attention_mask - ) + Returns: + A tuple containing: + + - shard_hidden_states (torch.Tensor, optional): Hidden states resulting from the forward pass. + - shard_past_kvs (list(torch.FloatTensor), optional): List of past key-value tensors (cache) used in the model. + - shard_logits (torch.Tensor, optional): The logits computed during the forward pass. + """ + loop = asyncio.get_running_loop() with ThreadPoolExecutor() as pool: - result = await loop.run_in_executor(pool, forward_partial) + result = await loop.run_in_executor(pool, functools.partial( + self.stateful_sharded_model.forward, + input_ids=input_ids, + hidden_states=hidden_states, + attention_mask=attention_mask + )) return result @@ -122,16 +129,22 @@ async def async_logit_sample( self, logits: torch.Tensor ) -> torch.Tensor: + """ + Asynchronously samples logits using the model's logit sampling method. - loop = asyncio.get_running_loop() + Args: + logits (torch.Tensor): The logits produced by the model for sampling. - sample_partial = functools.partial( - self.stateful_sharded_model.logits_sample, - logits=logits - ) + Returns: + next_logit (torch.Tensor): The next logit samples from given logis + """ + loop = asyncio.get_running_loop() with ThreadPoolExecutor() as pool: - result = await loop.run_in_executor(pool, sample_partial) + result = await loop.run_in_executor(pool, functools.partial( + self.stateful_sharded_model.logits_sample, + logits=logits + )) return result @@ -143,6 +156,23 @@ async def infer_prompt( image_str: Optional[str] = None, inference_state: Optional[str] = None ) -> Tuple[np.ndarray, str, bool]: + """ + Asynchronously processes a prompt using the specified shard and returns the inference result. + + Args: + request_id (str): The unique identifier for the request. + shard (Shard): The model shard used for inference. + prompt (str): The text prompt to be processed by the model. + image_str (str, optional): A base64 encoded image string to be optionally used in the inference. Defaults to None. + inference_state (str, optional): The cached inference state for resuming or continuing inference. Defaults to None. + + Returns: + A tuple containing: + + - input_ids (np.ndarray): The processed token IDs as a NumPy array if logits were generated. Otherwise, it returns hidden states. + - cache_json (str): A JSON string containing the cached input IDs for further inference steps. + - is_finished (bool): A boolean indicating whether the model has reached the end-of-sequence (EOS) token. + """ if DEBUG >= 4: print("infer_prompt called") print(f"prompt: {prompt}") @@ -182,6 +212,9 @@ async def infer_prompt( self.past_input_ids = torch.cat([input_ids, next_token[:, None].squeeze(-1)], dim=-1) input_ids = next_token + if DEBUG >= 4: + print(f"\nnext_token: {next_token}") + if self.past_input_ids is not None: cached_iids = {"input_ids": self.past_input_ids.tolist()} @@ -189,12 +222,6 @@ async def infer_prompt( if next_token is not None: is_finished = next_token.item() == self.tokenizer.eos_token_id - if DEBUG >= 4: - print(f"\ninput_ids: {input_ids}") - print(f"\nshard_hidden_states: {shard_hidden_states}\n") - print(f"\nshard_past_kvs {shard_past_kvs}\n") - print(f"\nshard_logits: {shard_logits}") - return_values = ( input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), json.dumps({"cached_iids": cached_iids}), @@ -213,6 +240,22 @@ async def infer_tensor( input_data: np.ndarray, inference_state: Optional[str] = None ) -> Tuple[np.ndarray, str, bool]: + """ + Asynchronously processes input tensor data using the specified shard and returns the inference result. + + Args: + request_id (str): The unique identifier for the request. + shard (Shard): The model shard used for inference. + input_data (np.ndarray): The input data in NumPy array format to be processed by the model. + inference_state (str, optional): The cached inference state for resuming or continuing inference. Defaults to None. + + Returns: + A tuple containing: + + - input_ids (np.ndarray): The processed token IDs as a NumPy array if logits were generated. Otherwise, it returns hidden states. + - cache_json (str): A JSON string containing the cached input IDs for further inference steps. + - is_finished (bool): A boolean indicating whether the model has reached the end-of-sequence (EOS) token. + """ if DEBUG >= 4: print("infer_tensor called") print(f"input_data: {input_data}") @@ -239,9 +282,9 @@ async def infer_tensor( self.past_input_ids = input_ids if DEBUG >= 4: - print(f"past_input_ids: {self.past_input_ids}") - print(f"hidden_state: {hidden_states}") - print(f"inference_state: {inference_state}") + print(f"\npast_input_ids: {self.past_input_ids}") + print(f"\nhidden_state: {hidden_states}") + print(f"\ninference_state: {inference_state}") shard_hidden_states, shard_past_kvs, shard_logits = await self.async_forward( input_ids=self.past_input_ids, @@ -309,26 +352,14 @@ async def ensure_shard(self, shard: Shard): shard=shard, local_model_path=model_path, device=self.device, - dtype=self.torch_dtype, + dtype=self.dtype, top_k=TOP_K, temp=TEMP, - top_p=TOP_P, - max_length=MAX_LENGTH, - max_time=MAX_TIME + top_p=TOP_P ) self.shard = shard - if isinstance(self.stateful_sharded_model.model, LlamaModel): - self.tokenizer = AutoTokenizer.from_pretrained( - model_path if model_path is not None else shard.model_id, - trust_remote_code=True - ) - - if len(re.findall(r"3\.1", shard.model_id)) > 0: - self.tokenizer.add_special_tokens({"pad_token":""}) - - else: - self.tokenizer = await resolve_tokenizer(shard.model_id) + self.tokenizer = await resolve_tokenizer(shard.model_id) if DEBUG >= 4: print(f"Shard loaded successfully: {shard}") diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index d6038f043..254c1dd3e 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -27,10 +27,21 @@ def __init__( dtype, top_k: int = 25, temp: float = 0.7, - top_p: float = 0.9, - max_length: int = 50, - max_time: float = 10.0 + top_p: float = 0.9 ): + """ + Initializes the ShardedHuggingFaceModel with a specified shard, model path, and device. + + Args: + shard (Shard): The model shard containing the start and end layers. + local_model_path (str): The local path to the model. + device (str): The device on which to run the model, e.g., "cuda" or "cpu". + dtype (torch.dtype): The data type (precision) to be used for model computations. + top_k (int, optional): The number of top tokens to consider for sampling. Defaults to 25. + temp (float, optional): The temperature for softmax sampling. Defaults to 0.7. + top_p (float, optional): The cumulative probability threshold for nucleus sampling. Defaults to 0.9. + """ + # class vars self.shard = shard self.hidden_states = None @@ -52,14 +63,14 @@ def __init__( ]) self.device = device - self.torch_dtype = dtype + self.dtype = dtype # setup pytorch and transformer llm try: self.llm_model = AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=self.local_model_path, - torch_dtype=self.torch_dtype, - device_map="auto", + torch_dtype=self.dtype, + device_map={"", self.device}, offload_buffers=True ) @@ -77,25 +88,20 @@ def forward( use_legacy_cache: bool = False ) -> Tuple[Optional[torch.Tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.Tensor]]: """ - Generate hidden states or logits via passing through set amount of layers of a model - To be passed only input_ids OR hidden_state and not both. This is for connecting the model - layer to generate a complete output + Performs a forward pass through the model shard, computing hidden states, past key values, and logits. Args: - model: base llm model tramsformers class - llm_model: llm chat model class - input_ids: tensor optional - attention_mask: tensor optional - past_key_values: Cache or list[tensor] optional - use_legacy_cache: bool optional - infer_tensor: bool optional, lets forward know to handle tensors + input_ids (torch.Tensor, optional): The input token IDs for the model. Either input_ids or hidden_states must be provided. + hidden_states (torch.Tensor, optional): The hidden states of the model at the current layer. + attention_mask (torch.Tensor, optional): The attention mask to prevent attending to padding tokens. + past_key_values (Union[Cache, List[torch.FloatTensor]], optional): Cached past key values for fast autoregressive generation. + use_legacy_cache (bool, optional): Whether to use the legacy cache format for past key values. Defaults to False. Returns: - Tuple of - - hidden_states: tensor optional - - past_key_values: Cache or list[tensor] optional - - logits: tensor Optional - + Tuple: + - hidden_states (torch.Tensor, optional): The hidden states after the forward pass. + - past_key_values (Union[Cache, List[torch.FloatTensor]], optional): The updated past key values. + - logits (torch.Tensor, optional): The logits produced by the model if the last layer is processed. """ model_inputs = None self.hidden_states = hidden_states @@ -246,14 +252,14 @@ def logits_sample( use_max: Optional[bool] = False ) -> torch.Tensor: """ - Get a sample of the logits from end of model run for next token + Samples the next token from the model's output logits, either by using argmax or probabilistic sampling. Args: - logits: tensor - use_max: bool, if function should sample with argmax + logits (torch.Tensor): The logits output from the model's final layer. + use_max (bool, optional): If True, uses torch.argmax to select the next token from logits. Defaults to False. Returns: - next_token: tensor + torch.Tensor: The next predicted token. """ # get a single cloned logit diff --git a/exo/inference/torch/tests/test_inference_engine.py b/exo/inference/torch/tests/test_inference_engine.py index e8b0b14c3..b326a0b68 100644 --- a/exo/inference/torch/tests/test_inference_engine.py +++ b/exo/inference/torch/tests/test_inference_engine.py @@ -1,12 +1,12 @@ +""" +Test inference engine and model sharding +""" import asyncio from exo.inference.shard import Shard from exo.inference.torch.inference import TorchDynamicShardInferenceEngine from exo.download.hf.hf_shard_download import HFShardDownloader from exo.inference.inference_engine import InferenceEngine -from exo.inference.shard import Shard -from exo.helpers import DEBUG -import os import numpy as np import time @@ -15,8 +15,7 @@ async def test_inference_engine( inference_engine_2: InferenceEngine, model_id: str, n_layers: int): - - # prompt = "Why is the sky blue?" + prompt = "In a single word only, what is the last name of the current president of the USA?" shard = Shard( @@ -129,11 +128,11 @@ async def test_inference_engine( # print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") try: - print("\n-------- Test meta-llama/Llama-3.2-1B-Instruct ----------\n") + print("\n-------- Test unsloth/Llama-3.2-1B-Instruct ----------\n") asyncio.run(test_inference_engine( TorchDynamicShardInferenceEngine(HFShardDownloader()), TorchDynamicShardInferenceEngine(HFShardDownloader()), - "meta-llama/Llama-3.2-1B-Instruct", + "unsloth/Llama-3.2-1B-Instruct", 24 )) except Exception as err: diff --git a/exo/inference/torch/tests/test_simple_model.py b/exo/inference/torch/tests/test_simple_model.py index 1b08a1801..2a36717f7 100644 --- a/exo/inference/torch/tests/test_simple_model.py +++ b/exo/inference/torch/tests/test_simple_model.py @@ -1,5 +1,8 @@ +""" +Simple model test using basic pytorch/huggingface LLM model loading, inference and generation +with logit sampling +""" from transformers import AutoModelForCausalLM, AutoTokenizer -device = "cuda" # the device to load the model onto model = AutoModelForCausalLM.from_pretrained( "Qwen/Qwen2-0.5B-Instruct", @@ -19,7 +22,7 @@ tokenize=False, add_generation_prompt=True ) -model_inputs = tokenizer([text], return_tensors="pt").to(device) +model_inputs = tokenizer([text], return_tensors="pt") print(f"model_inputs:\n{model_inputs}") @@ -29,11 +32,9 @@ model_inputs.input_ids, attention_mask=model_inputs.attention_mask, max_new_tokens=512, - do_sample=True, - #top_k=20, - #num_beams=5, - #early_stopping=True + do_sample=True ) + generated_ids = [ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) ] diff --git a/exo/inference/torch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py deleted file mode 100644 index 157a215d1..000000000 --- a/exo/inference/torch/tests/test_split_model.py +++ /dev/null @@ -1,378 +0,0 @@ -import torch -import torch.nn as nn -import asyncio -import gc -from transformers import ( - AutoModelForCausalLM, - AutoTokenizer, - DynamicCache, - Cache, - LogitsProcessorList, - TopKLogitsWarper, - TopPLogitsWarper, - TemperatureLogitsWarper, - StoppingCriteriaList, - MaxLengthCriteria, - MaxTimeCriteria -) - -from transformers.generation.configuration_utils import ( - GenerationConfig, - GenerationMode -) - -# llama -from transformers.models.llama.modeling_llama import LlamaModel - -# qwen2 -from transformers.models.qwen2.modeling_qwen2 import Qwen2Model - -from exo.api.chatgpt_api import resolve_tokenizer -from typing import Tuple, Optional, Union, List -import re - -TEMP = 0.6 -TOP_K = 60 - -class OnionHuggingFaceLM(): - def __init__(self, layers, is_last=False): - self.layers = layers - self.is_last = is_last - self.past_key_values = None - self.cache_position = None - self.position_ids = None - self.input_embed = None - self.causal_mask = None - self.position_embeddings = None - self.attention_mask = None - self.input_ids = None - self.hidden_states = None - self.next_decoder_cache = None - - def forward( - self, - model, - llm_model, - input_ids: Optional[torch.tensor] = None, - hidden_states: Optional[torch.tensor] = None, - attention_mask: Optional[torch.tensor] = None, - past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, - **kwargs - ) -> Tuple[Optional[torch.tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.tensor]]: - - """ - Generate hidden states or logits via passing through set amount of layers of a model - To be passed only input_ids OR hidden_state and not both. This is for connecting the model - layer to generate a complete output - - Args: - model: base llm model tramsformers class - llm_model: llm chat model class - input_ids: tensor Optional - hidden_states: tensor Optional - - Returns: - Tuple of - - hidden_states: tensor Optional - - past_key_values - - logits: tensor Optional - - """ - output_attentions = False # outputting attention not needed - use_legacy_cache = False # some models still use legacy kv store - - if input_ids is not None and hidden_states is not None: - raise ValueError - - if hidden_states is not None: - self.hidden_states = hidden_states - - if input_ids is not None: - self.input_ids = input_ids - - # embed input_ids - self.inputs_embeds = model.embed_tokens(self.input_ids) - - # cache - if past_key_values and not isinstance(past_key_values, Cache): - print("Using legacy cache") - use_legacy_cache = True - past_key_values = DynamicCache.from_legacy_cache(past_key_values) - - past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 - cache_position = torch.arange( - past_seen_tokens, - past_seen_tokens + self.inputs_embeds.shape[1], - device=self.inputs_embeds.device - ) - - # position id - position_ids = cache_position.unsqueeze(0) - - # causal mask - self.attention_mask = attention_mask - self.causal_mask = model._update_causal_mask( - None, - self.inputs_embeds, - cache_position, - past_key_values, - output_attentions - ) - - #print(f"causal_mask.dim(): {self.causal_mask.dim()}") - - print(f"\ncausal_mask:{self.causal_mask}\n\n") - - # embed positions, some models require and some dont - if isinstance(model, LlamaModel): - self.position_embeddings = model.rotary_emb( - self.inputs_embeds, - position_ids - ) - - model_inputs = llm_model.prepare_inputs_for_generation( - self.input_ids, - past_key_values=past_key_values, - attention_mask=self.attention_mask, - inputs_embeds=self.inputs_embeds, - position_ids=position_ids, - cache_position=cache_position - ) - - print(f"model_inputs\n{model_inputs}") - - self.hidden_states = self.inputs_embeds - self.position_ids = model_inputs["position_ids"] - self.cache_position = model_inputs["cache_position"] - self.past_key_values = model_inputs["past_key_values"] - - - for decoder_layer in self.layers: - layer_outputs = decoder_layer( - self.hidden_states, - attention_mask=self.causal_mask, - position_ids=self.position_ids, - past_key_values=self.past_key_values, - use_cache=True, - cache_position=self.cache_position - - ) - - self.hidden_states = layer_outputs[0] - self.next_decoder_cache = layer_outputs[1] - - if self.is_last: - self.hidden_states = model.norm(self.hidden_states) - - if use_legacy_cache: - self.past_key_values = self.next_decoder_cache.to_legacy_cache() - else: - self.past_key_values = self.next_decoder_cache - - # lm_head - logits = llm_model.lm_head(self.hidden_states).to("cuda") - - return ( - None, - None, - logits - ) - - return ( - self.hidden_states, - self.past_key_values, - None - ) - -async def model_half_split_test(prompt: str, model_id: str, layers: int): - """ - Test for splitting in half - """ - - half_layers = int(layers / 2) - - # inference - tokenizer = AutoTokenizer.from_pretrained(model_id) - max_length = 512 #tokenizer.model_max_length - - # get llm model - llm_model = AutoModelForCausalLM.from_pretrained( - model_id, - torch_dtype="auto", - device_map="auto", - use_cache=True - ) - - # get base model - model = llm_model.model - - # add pad token if none, depending on model - if tokenizer.pad_token == None: - if re.match(r"Llama|llama", model_id): - tokenizer.add_special_tokens({"pad_token":""}) - model.resize_token_embeddings(len(tokenizer)) - - - # generate input_ids - messages = [{"role": "user", "content": prompt}] - txt = tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True - ) - - inputs = tokenizer([txt], return_tensors="pt") - input_ids = inputs.input_ids.to("cuda") - input_attention_mask = inputs.attention_mask.to("cuda") - batch_size, seq_length = input_ids.shape[:2] - - is_finished = False - unfinished_sequences = torch.ones(batch_size, dtype=torch.long, device=input_ids.device) - logit_runs = 1 - - raw_logits = None - - while not is_finished: - print(f"\n\nLOGIT RUN {logit_runs}\n\n") - - print(f"input_ids:\n{input_ids}\n") - print(input_ids.shape) - - print("\n first half of layers") - shard_layers = nn.ModuleList(model.layers[:half_layers])#.to("cuda") - #shard_layers = nn.ModuleList(model.layers) - sharded_model = OnionHuggingFaceLM(layers=shard_layers) - #sharded_model.is_last = True - - # generate first half - # add if first layer of model check - shard_hidden_states, shard_past_kvs, shard_logits = sharded_model.forward( - model=model, - llm_model=llm_model, - attention_mask=input_attention_mask, - input_ids=input_ids, - hidden_states=None - ) - - # second half - print(f"\n second half of layers") - sharded_model.layers = nn.ModuleList(model.layers[half_layers:]) - sharded_model.is_last = True - - shard_hidden_states, shard_past_kvs, shard_logits = sharded_model.forward( - model=model, - llm_model=llm_model, - hidden_states=shard_hidden_states, - past_key_values=shard_past_kvs - ) - - # this part of the generation and _sample functions for transformers GenerationMixin - # ref: https://github.com/huggingface/transformers/blob/0a55d9f7376f72ad3ff296d4249840021b03bcc4/src/transformers/generation/utils.py#L1301 - - # clone logit sample - logits = shard_logits[:, -1, :].clone().float() - - raw_logits = logits - - # distribute - logits_processor = LogitsProcessorList([ - TopKLogitsWarper(35), - TemperatureLogitsWarper(0.6), - TopPLogitsWarper(0.8) - ]) - - stopping_critera = StoppingCriteriaList( - [ - MaxLengthCriteria(max_length=255), - MaxTimeCriteria(max_time=100.0), - ] - ) - - next_token_scores = logits_processor(input_ids, logits) - - probs = nn.functional.softmax(next_token_scores, dim=-1) - next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1) - #next_tokens = torch.argmax(next_token_scores, dim=-1) - - # get inputs ready incase not finished - input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1) - - unfinished_sequences = unfinished_sequences & ~stopping_critera(input_ids, None) - is_finished = unfinished_sequences.max() == 0 - - print(f"is_finished?:\n{is_finished}\n") - - logit_runs += 1 - - del logits - del shard_logits - - print(f"model.generation_config\n{llm_model.generation_config}") - - generated_text = tokenizer.batch_decode( - input_ids, - skip_special_tokens=True, - clean_up_tokenization_spaces=False - )[0] - - print(f"generated_text:\n{generated_text}\n") - - # free model from memory - del model - gc.collect() - torch.cuda.empty_cache() - - -if __name__ == "__main__": - #prompt = "In a single word only, what is the last name of the current president of the USA?" - prompt = "What color is the sky? Explain why" - #prompt = "In a single word only, what is the color of an apple?" - - #print("\n-------- Test TinyLlama/TinyLlama_v1.1 ----------\n") - #model_id = "TinyLlama/TinyLlama_v1.1" - #model_layers = 22 - - #asyncio.run( - # model_half_split_test( - # prompt=prompt, - # model_id=model_id, - # layers=model_layers - # ) - #) - - #print("\n-------- Test meta-llama/Meta-Llama-3.1-8B ----------\n") - #model_id = "meta-llama/Meta-Llama-3.1-8B" - #model_layers = 32 - - #asyncio.run( - # model_half_split_test( - # prompt=prompt, - # model_id=model_id, - # layers=model_layers - # ) - #) - - #print("\n-------- Test Qwen/Qwen2-0.5B-Instruct ----------\n") - #model_id = "Qwen/Qwen2-0.5B-Instruct" - #model_layers = 24 - - #asyncio.run( - # model_half_split_test( - # prompt=prompt, - # model_id=model_id, - # layers=model_layers - # ) - #) - - print("\n-------- Test meta-llama/Llama-3.2-1B-Instruct ----------\n") - model_id = "meta-llama/Llama-3.2-1B-Instruct" - model_layers = 32 - - asyncio.run( - model_half_split_test( - prompt=prompt, - model_id=model_id, - layers=model_layers - ) - ) - diff --git a/exo/models.py b/exo/models.py index b6a7092b5..d5d69164c 100644 --- a/exo/models.py +++ b/exo/models.py @@ -4,7 +4,7 @@ ### llama "llama-3.2-1b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Llama-3.2-1B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=16), - "TorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Llama-3.2-1B-Instruct", start_layer=0, end_layer=0, n_layers=16), + "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Llama-3.2-1B-Instruct", start_layer=0, end_layer=0, n_layers=16), }, "llama-3.2-3b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Llama-3.2-3B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28), @@ -12,7 +12,7 @@ "llama-3.1-8b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32), "TinygradDynamicShardInferenceEngine": Shard(model_id="mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", start_layer=0, end_layer=0, n_layers=32), - "TorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct", start_layer=0, end_layer=0, n_layers=32), + "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Meta-Llama-3.1-8B-Instruct", start_layer=0, end_layer=0, n_layers=32), }, "llama-3.1-70b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), diff --git a/exo/networking/grpc/grpc_peer_handle.py b/exo/networking/grpc/grpc_peer_handle.py index 6e8e586ed..14a01f7da 100644 --- a/exo/networking/grpc/grpc_peer_handle.py +++ b/exo/networking/grpc/grpc_peer_handle.py @@ -12,8 +12,6 @@ from exo.topology.device_capabilities import DeviceCapabilities from exo.helpers import DEBUG -from exo.helpers import DEBUG - class GRPCPeerHandle(PeerHandle): def __init__(self, _id: str, address: str, device_capabilities: DeviceCapabilities): self._id = _id @@ -78,7 +76,6 @@ async def send_prompt(self, shard: Shard, prompt: str, image_str: Optional[str] inference_state=inference_state, ) - print(f"request: {request}") response = await self.stub.SendPrompt(request) if not response.tensor_data or not response.shape or not response.dtype: @@ -98,6 +95,7 @@ async def send_tensor(self, shard: Shard, tensor: np.ndarray, request_id: Option request_id=request_id, inference_state=inference_state, ) + response = await self.stub.SendTensor(request) if not response.tensor_data or not response.shape or not response.dtype: diff --git a/setup.py b/setup.py index 432a1c3de..5a4e04bcc 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ "uuid==1.30", "tinygrad @ git+https://github.com/tinygrad/tinygrad.git@232edcfd4f8b388807c64fb1817a7668ce27cbad", "torch==2.4.0", - "accelerate" + "accelerate==0.34.2" ] # Add macOS-specific packages if on Darwin (macOS) From 83a723b9e6b156d54a9906f655ffe8d041062108 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 12 Oct 2024 21:09:06 -0800 Subject: [PATCH 412/589] doing more work toward individual safetensor loading, adding back device mapping auto --- exo/inference/torch/inference.py | 5 ++ exo/inference/torch/model/hf.py | 2 +- .../torch/tests/test_inference_engine.py | 18 ++++- exo/inference/torch/tests/test_split_model.py | 71 +++++++++++++++++++ 4 files changed, 92 insertions(+), 4 deletions(-) create mode 100644 exo/inference/torch/tests/test_split_model.py diff --git a/exo/inference/torch/inference.py b/exo/inference/torch/inference.py index d3f4e8536..52db0c0ae 100644 --- a/exo/inference/torch/inference.py +++ b/exo/inference/torch/inference.py @@ -348,6 +348,11 @@ async def ensure_shard(self, shard: Shard): model_path = await self.shard_downloader.ensure_shard(shard) + # get model weight map + model_wm = await get_weight_map(repo_id=shard.model_id) + + print(f"model_wm: {model_wm}") + self.stateful_sharded_model = ShardedHuggingFaceModel( shard=shard, local_model_path=model_path, diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index 254c1dd3e..9d524de00 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -70,7 +70,7 @@ def __init__( self.llm_model = AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=self.local_model_path, torch_dtype=self.dtype, - device_map={"", self.device}, + device_map="auto", offload_buffers=True ) diff --git a/exo/inference/torch/tests/test_inference_engine.py b/exo/inference/torch/tests/test_inference_engine.py index b326a0b68..a03c5c9a8 100644 --- a/exo/inference/torch/tests/test_inference_engine.py +++ b/exo/inference/torch/tests/test_inference_engine.py @@ -127,14 +127,26 @@ async def test_inference_engine( # except Exception as err: # print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") + #try: + # print("\n-------- Test unsloth/Llama-3.2-1B-Instruct ----------\n") + # asyncio.run(test_inference_engine( + # TorchDynamicShardInferenceEngine(HFShardDownloader()), + # TorchDynamicShardInferenceEngine(HFShardDownloader()), + # "unsloth/Llama-3.2-1B-Instruct", + # 24 + # )) + #except Exception as err: + # print(f"\n\n !!!!!!!!!!! meta-llama/Llama-3.2-1B-Instruct TEST FAILED \n{err}\n") + try: - print("\n-------- Test unsloth/Llama-3.2-1B-Instruct ----------\n") + print("\n-------- Test unsloth/Meta-Llama-3.1-8B-Instruct ----------\n") asyncio.run(test_inference_engine( TorchDynamicShardInferenceEngine(HFShardDownloader()), TorchDynamicShardInferenceEngine(HFShardDownloader()), - "unsloth/Llama-3.2-1B-Instruct", - 24 + "unsloth/Meta-Llama-3.1-8B-Instruct", + 32 )) except Exception as err: print(f"\n\n !!!!!!!!!!! meta-llama/Llama-3.2-1B-Instruct TEST FAILED \n{err}\n") + diff --git a/exo/inference/torch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py new file mode 100644 index 000000000..95f0694db --- /dev/null +++ b/exo/inference/torch/tests/test_split_model.py @@ -0,0 +1,71 @@ +""" +Testing of loading model by layer +""" +import asyncio +import re + +from exo.download.hf.hf_helpers import get_weight_map +from exo.download.hf.hf_shard_download import HFShardDownloader +from exo.inference.shard import Shard + +from typing import Optional, Union, Tuple + +from transformers import AutoModel + +async def load_model( + repo_id: str, + shard: Shard +) -> Optional[AutoModel]: + """ + load model by layer and safetensors + """ + + shard_downloader = HFShardDownloader() + model_path = await shard_downloader.ensure_shard(shard) + weight_map = await get_weight_map(repo_id) + + if weight_map: + for wname, wtensor in weight_map.items(): + # get layer number + layer_rgx = r'^model\.layers\.(\d+)\.(\w+)\.(\w+)$' + layer_found = re.findall(layer_rgx, wname) + if layer_found: + try: + layer_idx = int(layer_found[0][0]) + print(f"layer_idx: {layer_idx}") + if shard.start_layer <= layer_idx <= shard.end_layer: + print(f"wtensor: {wtensor}") + + # move to local .tmp folder that can be removed later + # check if files not already there, if there, reuse + # create automodel with rest of layers + # lm_head needed at end + except Exception as err: + print(f"err: {err}") + +async def test_split_model(model_id: str, n_layers: int): + """ + Test to load split models + """ + + shard = Shard( + model_id=model_id, + start_layer=0, + end_layer=n_layers-1, + n_layers=n_layers + ) + + await load_model( + model_id, + shard + ) + +if __name__ == "__main__": + try: + print("\n-------- Test unsloth/Meta-Llama-3.1-8B-Instruct ----------\n") + asyncio.run(test_split_model( + "unsloth/Meta-Llama-3.1-8B-Instruct", + 32 + )) + except Exception as err: + print(f"\n\n !!!!!!!!!!! meta-llama/Llama-3.2-1B-Instruct TEST FAILED \n{err}\n") From 47be250dbe743d3624f3ae2195ed50e9a2704373 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 13 Oct 2024 04:58:25 -0800 Subject: [PATCH 413/589] working on split model, moving to server for more vram --- exo/inference/torch/tests/test_split_model.py | 145 ++++++++++++++---- 1 file changed, 113 insertions(+), 32 deletions(-) diff --git a/exo/inference/torch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py index 95f0694db..df0d5497a 100644 --- a/exo/inference/torch/tests/test_split_model.py +++ b/exo/inference/torch/tests/test_split_model.py @@ -3,69 +3,150 @@ """ import asyncio import re +import json +import os +from pathlib import Path +from typing import Optional -from exo.download.hf.hf_helpers import get_weight_map +from exo.download.hf.hf_helpers import ( + get_weight_map, + download_repo_files +) from exo.download.hf.hf_shard_download import HFShardDownloader from exo.inference.shard import Shard -from typing import Optional, Union, Tuple - -from transformers import AutoModel +from transformers import AutoModelForCausalLM async def load_model( repo_id: str, - shard: Shard -) -> Optional[AutoModel]: + shard: Shard, + model_path: Path, + weight_map: Optional[dict] +) -> Optional[AutoModelForCausalLM]: """ load model by layer and safetensors + return causal llm automodel with only requested layers, if weight maps + if no weight map, return and load the whole model """ - - shard_downloader = HFShardDownloader() - model_path = await shard_downloader.ensure_shard(shard) - weight_map = await get_weight_map(repo_id) - + print("load_model called") if weight_map: + layer_weight_map = {} + skip_layers = [] + for wname, wtensor in weight_map.items(): # get layer number - layer_rgx = r'^model\.layers\.(\d+)\.(\w+)\.(\w+)$' + layer_rgx = r'^model\.layers\.(\d+)\.*' layer_found = re.findall(layer_rgx, wname) + print(f"wname: {wname}") if layer_found: - try: - layer_idx = int(layer_found[0][0]) - print(f"layer_idx: {layer_idx}") - if shard.start_layer <= layer_idx <= shard.end_layer: - print(f"wtensor: {wtensor}") - - # move to local .tmp folder that can be removed later - # check if files not already there, if there, reuse - # create automodel with rest of layers - # lm_head needed at end - except Exception as err: - print(f"err: {err}") - -async def test_split_model(model_id: str, n_layers: int): + print(f"layer_found: {layer_found}") + # slice up layer map to start and end layers + # from shard + layer_idx = int(layer_found[0]) + if shard.start_layer <= layer_idx <= shard.end_layer: + layer_weight_map[wname] = wtensor + else: + skip_layers.append(wname) + print(f"SKIPPING LAYER {layer_idx}") + + if wname not in skip_layers: + print(f"adding non-layer: {wname}") + layer_weight_map[wname] = wtensor + + # will manipulate current model.safetensors.index.json + # but set back at end of inference + print(layer_weight_map) + + # rewrite model.safetensors.index.json + try: + model_st_snapshot = model_path/"model.safetensors.index.json" + # call download repo files again to reload original safetensors json + os.remove(model_st_snapshot) + + await download_repo_files( + repo_id=shard.model_id, + revision="main", + allow_patterns="model.safetensors.index.json") + + mst_json = {} + with open(model_st_snapshot, "r") as mst_file: + mst_json = json.load(mst_file) + + mst_json["weight_map"] = layer_weight_map + + print(f"mst_json: {json.dumps(mst_json, indent=4)}") + + with open(model_st_snapshot, "w") as mst_file: + json.dump(mst_json, mst_file, indent=4) + print(f"{model_st_snapshot} rewritten with {shard.n_layers} weights") + except Exception as err: + print(f"err: {err}") + raise + + else: + print("weight_map not found, loading whole model") + + # load model with layer edits + # or whole model if no weight_map + shard_model = AutoModelForCausalLM.from_pretrained( + model_path, + device_map="auto", + offload_buffers=True + ) + + return shard_model + + +async def test_split_model( + model_id: str, + start_layer: int, + end_layer: int, + n_layers: int +): """ Test to load split models """ shard = Shard( model_id=model_id, - start_layer=0, - end_layer=n_layers-1, + start_layer=start_layer, + end_layer=end_layer-1, n_layers=n_layers ) + print(f"loading shard: {shard}") + shard_downloader = HFShardDownloader() + model_path = await shard_downloader.ensure_shard(shard) + weight_map = await get_weight_map(model_id) + await load_model( model_id, - shard + shard, + model_path, + weight_map ) if __name__ == "__main__": + #Qwen/Qwen2.5-3B try: - print("\n-------- Test unsloth/Meta-Llama-3.1-8B-Instruct ----------\n") + print("\n-------- Test Qwen/Qwen2.5-3B ----------\n") asyncio.run(test_split_model( - "unsloth/Meta-Llama-3.1-8B-Instruct", - 32 + "Qwen/Qwen2.5-3B", + 0, + 1, + 36 )) except Exception as err: print(f"\n\n !!!!!!!!!!! meta-llama/Llama-3.2-1B-Instruct TEST FAILED \n{err}\n") + + # unsloth/Meta-Llama-3.1-8B-Instruct + #try: + # print("\n-------- Test unsloth/Meta-Llama-3.1-8B-Instruct ----------\n") + # asyncio.run(test_split_model( + # "unsloth/Meta-Llama-3.1-8B-Instruct", + # 0, + # 1, + # 32 + # )) + #except Exception as err: + # print(f"\n\n !!!!!!!!!!! meta-llama/Llama-3.2-1B-Instruct TEST FAILED \n{err}\n") From ea0d4b154e827aedd05c97afbeb595451d88a5b8 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 13 Oct 2024 05:59:26 -0800 Subject: [PATCH 414/589] change to hf downloader as was not getting all safetensor files --- exo/download/hf/hf_helpers.py | 34 ++++++++++++------- exo/inference/torch/tests/test_split_model.py | 33 +++++++++++------- 2 files changed, 42 insertions(+), 25 deletions(-) diff --git a/exo/download/hf/hf_helpers.py b/exo/download/hf/hf_helpers.py index a548df2ee..4c22763db 100644 --- a/exo/download/hf/hf_helpers.py +++ b/exo/download/hf/hf_helpers.py @@ -394,19 +394,27 @@ def extract_layer_num(tensor_name: str) -> Optional[int]: def get_allow_patterns(weight_map: Dict[str, str], shard: Shard) -> List[str]: - default_patterns = set(["*.json","*.py","tokenizer.model","*.tiktoken","*.txt"]) + default_patterns = set([ + "*.json", + "*.py", + "tokenizer.model", + "*.tiktoken", + "*.txt", + "*.safetensors" + ]) + shard_specific_patterns = set() - if weight_map: - for tensor_name, filename in weight_map.items(): - layer_num = extract_layer_num(tensor_name) - if layer_num is not None and shard.start_layer <= layer_num <= shard.end_layer: - shard_specific_patterns.add(filename) - sorted_file_names = sorted(weight_map.values()) - if shard.is_first_layer(): - shard_specific_patterns.add(sorted_file_names[0]) - elif shard.is_last_layer(): - shard_specific_patterns.add(sorted_file_names[-1]) - else: - shard_specific_patterns = set("*.safetensors") + #if weight_map: + # for tensor_name, filename in weight_map.items(): + # layer_num = extract_layer_num(tensor_name) + # if layer_num is not None and shard.start_layer <= layer_num <= shard.end_layer: + # shard_specific_patterns.add(filename) + # sorted_file_names = sorted(weight_map.values()) + # if shard.is_first_layer(): + # shard_specific_patterns.add(sorted_file_names[0]) + # elif shard.is_last_layer(): + # shard_specific_patterns.add(sorted_file_names[-1]) + #else: + #shard_specific_patterns = set("*.safetensors") if DEBUG >= 2: print(f"get_allow_patterns {weight_map=} {shard=} {shard_specific_patterns=}") return list(default_patterns | shard_specific_patterns) diff --git a/exo/inference/torch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py index df0d5497a..46a3b6a4b 100644 --- a/exo/inference/torch/tests/test_split_model.py +++ b/exo/inference/torch/tests/test_split_model.py @@ -29,6 +29,8 @@ async def load_model( if no weight map, return and load the whole model """ print("load_model called") + model_st_snapshot = model_path/"model.safetensors.index.json" + if weight_map: layer_weight_map = {} skip_layers = [] @@ -59,14 +61,13 @@ async def load_model( # rewrite model.safetensors.index.json try: - model_st_snapshot = model_path/"model.safetensors.index.json" # call download repo files again to reload original safetensors json - os.remove(model_st_snapshot) + #os.remove(model_st_snapshot) - await download_repo_files( - repo_id=shard.model_id, - revision="main", - allow_patterns="model.safetensors.index.json") + #await download_repo_files( + # repo_id=shard.model_id, + # revision="main", + # allow_patterns="model.safetensors.index.json") mst_json = {} with open(model_st_snapshot, "r") as mst_file: @@ -76,9 +77,11 @@ async def load_model( print(f"mst_json: {json.dumps(mst_json, indent=4)}") - with open(model_st_snapshot, "w") as mst_file: - json.dump(mst_json, mst_file, indent=4) - print(f"{model_st_snapshot} rewritten with {shard.n_layers} weights") + os.remove(model_st_snapshot) + + with open(model_st_snapshot, "w") as mst_file: + json.dump(mst_json, mst_file, indent=4) + print(f"{model_st_snapshot} rewritten with {shard.n_layers} weights") except Exception as err: print(f"err: {err}") raise @@ -94,6 +97,9 @@ async def load_model( offload_buffers=True ) + # have to clear out edited model safetensors mst_json + os.remove(model_st_snapshot) + return shard_model @@ -114,6 +120,9 @@ async def test_split_model( n_layers=n_layers ) + # remove old weight json if present + + print(f"loading shard: {shard}") shard_downloader = HFShardDownloader() model_path = await shard_downloader.ensure_shard(shard) @@ -129,11 +138,11 @@ async def test_split_model( if __name__ == "__main__": #Qwen/Qwen2.5-3B try: - print("\n-------- Test Qwen/Qwen2.5-3B ----------\n") + print("\n-------- Test Qwen/Qwen2.5-3B-Instruct ----------\n") asyncio.run(test_split_model( - "Qwen/Qwen2.5-3B", + "Qwen/Qwen2.5-3B-Instruct", 0, - 1, + 18, 36 )) except Exception as err: From 30b799174ae30721ee22637c48eb723129d6d67c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 13 Oct 2024 07:17:16 -0800 Subject: [PATCH 415/589] splitting model still work in progress as transformers still seems to try to load more than needed even witha modified safetensor json file, finished up PR main updates but will continue on this one --- exo/download/hf/hf_helpers.py | 34 ++++++--------- exo/inference/torch/tests/test_split_model.py | 42 +++++++------------ 2 files changed, 28 insertions(+), 48 deletions(-) diff --git a/exo/download/hf/hf_helpers.py b/exo/download/hf/hf_helpers.py index 4c22763db..a548df2ee 100644 --- a/exo/download/hf/hf_helpers.py +++ b/exo/download/hf/hf_helpers.py @@ -394,27 +394,19 @@ def extract_layer_num(tensor_name: str) -> Optional[int]: def get_allow_patterns(weight_map: Dict[str, str], shard: Shard) -> List[str]: - default_patterns = set([ - "*.json", - "*.py", - "tokenizer.model", - "*.tiktoken", - "*.txt", - "*.safetensors" - ]) - + default_patterns = set(["*.json","*.py","tokenizer.model","*.tiktoken","*.txt"]) shard_specific_patterns = set() - #if weight_map: - # for tensor_name, filename in weight_map.items(): - # layer_num = extract_layer_num(tensor_name) - # if layer_num is not None and shard.start_layer <= layer_num <= shard.end_layer: - # shard_specific_patterns.add(filename) - # sorted_file_names = sorted(weight_map.values()) - # if shard.is_first_layer(): - # shard_specific_patterns.add(sorted_file_names[0]) - # elif shard.is_last_layer(): - # shard_specific_patterns.add(sorted_file_names[-1]) - #else: - #shard_specific_patterns = set("*.safetensors") + if weight_map: + for tensor_name, filename in weight_map.items(): + layer_num = extract_layer_num(tensor_name) + if layer_num is not None and shard.start_layer <= layer_num <= shard.end_layer: + shard_specific_patterns.add(filename) + sorted_file_names = sorted(weight_map.values()) + if shard.is_first_layer(): + shard_specific_patterns.add(sorted_file_names[0]) + elif shard.is_last_layer(): + shard_specific_patterns.add(sorted_file_names[-1]) + else: + shard_specific_patterns = set("*.safetensors") if DEBUG >= 2: print(f"get_allow_patterns {weight_map=} {shard=} {shard_specific_patterns=}") return list(default_patterns | shard_specific_patterns) diff --git a/exo/inference/torch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py index 46a3b6a4b..cab9f2211 100644 --- a/exo/inference/torch/tests/test_split_model.py +++ b/exo/inference/torch/tests/test_split_model.py @@ -8,10 +8,7 @@ from pathlib import Path from typing import Optional -from exo.download.hf.hf_helpers import ( - get_weight_map, - download_repo_files -) +from exo.download.hf.hf_helpers import get_weight_map from exo.download.hf.hf_shard_download import HFShardDownloader from exo.inference.shard import Shard @@ -33,7 +30,7 @@ async def load_model( if weight_map: layer_weight_map = {} - skip_layers = [] + non_layer_weights = [] for wname, wtensor in weight_map.items(): # get layer number @@ -47,28 +44,20 @@ async def load_model( layer_idx = int(layer_found[0]) if shard.start_layer <= layer_idx <= shard.end_layer: layer_weight_map[wname] = wtensor - else: - skip_layers.append(wname) - print(f"SKIPPING LAYER {layer_idx}") - - if wname not in skip_layers: - print(f"adding non-layer: {wname}") - layer_weight_map[wname] = wtensor - - # will manipulate current model.safetensors.index.json - # but set back at end of inference - print(layer_weight_map) + else: + non_layer_weights.append((wname, wtensor)) + + if shard.is_first_layer(): + # this assumes at max only one first weight non-layer for model + first_weight = non_layer_weights[0] + layer_weight_map[first_weight[0]] = first_weight[1] + elif shard.is_last_layer(): + last_weights = non_layer_weights[1:] + for last_weight in last_weights: + layer_weight_map[last_weight[0]] = last_weight[1] # rewrite model.safetensors.index.json try: - # call download repo files again to reload original safetensors json - #os.remove(model_st_snapshot) - - #await download_repo_files( - # repo_id=shard.model_id, - # revision="main", - # allow_patterns="model.safetensors.index.json") - mst_json = {} with open(model_st_snapshot, "r") as mst_file: mst_json = json.load(mst_file) @@ -81,7 +70,6 @@ async def load_model( with open(model_st_snapshot, "w") as mst_file: json.dump(mst_json, mst_file, indent=4) - print(f"{model_st_snapshot} rewritten with {shard.n_layers} weights") except Exception as err: print(f"err: {err}") raise @@ -109,7 +97,7 @@ async def test_split_model( end_layer: int, n_layers: int ): - """ + """ Test to load split models """ @@ -142,7 +130,7 @@ async def test_split_model( asyncio.run(test_split_model( "Qwen/Qwen2.5-3B-Instruct", 0, - 18, + 3, 36 )) except Exception as err: From 3a2c431c3102b2a9e2a4bf934bca4e15f001b1e5 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 13 Oct 2024 07:35:42 -0800 Subject: [PATCH 416/589] updating readme --- exo/inference/torch/README.md | 41 ++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/exo/inference/torch/README.md b/exo/inference/torch/README.md index 5cbeeef60..c80d35c23 100644 --- a/exo/inference/torch/README.md +++ b/exo/inference/torch/README.md @@ -1,9 +1,44 @@ # PyTorch & HuggingFace inference engine +## Tech + +Tested on + +```bash +# Laptop/PC +Distributor ID: Pop +Description: Pop!_OS 22.04 LTS +Release: 22.04 +Codename: jammy +CUDA Version: 12.4 +Nvidia Driver Version: 550.107.02 + +GPU 1: Nvidia GeForce RTX 3060 6GB Laptop +``` +```bash +# Server +Distributor ID: Pop +Description: Pop!_OS 22.04 LTS +Release: 22.04 +Codename: jammy +CUDA Version: 12.4 +Nvidia Driver Version: 550.90.07 + +GPU 1: NVIDIA T1000 8GB +GPU 2: NVIDIA Quadro M2000 4GB +GPU 3: NVIDIA Quadro M2000 4GB +GPU 4: NVIDIA Quadro P400 2GB +GPU 5: NVIDIA Quadro P400 2GB +``` + + ## Notes/Issues ### 10/10/2024 - To select a pytorch device via environment variables, set the variable TORCH_DEVICE -- - XLA is currently not installed and will need to be added to inference.py, looking into doing this on a TPU VM -- - With pytorch, CUDA and ROCm are the same so specifying CUDA also enables ROCm support. See this [post](https://github.com/pytorch/pytorch/issues/55223#issuecomment-812587373) -- - Looking into adding mobile device support properly + - XLA is currently not installed and will need to be added to inference.py, looking into doing this on a TPU VM + - With pytorch, CUDA and ROCm are the same so specifying CUDA also enables ROCm support. See this [post](https://github.com/pytorch/pytorch/issues/55223#issuecomment-812587373) +- Looking into adding mobile device support properly - If device is not CPU the data type defaults to float32 else float16. + +### 10/13/2024 +Still working on split model development (see test_split_model.py). Right now, it seems to do it but still transformers is loading more in the RAM and GPU as it loads up a larger models (causing an OOM). Will research and add to next update. Right now, tests are added and are in development. From 6c6e7b2c3bffa0f8bd9c59b01339f1ccb92d9140 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 13 Oct 2024 21:41:04 -0800 Subject: [PATCH 417/589] successful splitting model test with only loading needed weights, implementing it in main inference code --- exo/inference/torch/tests/test_split_model.py | 85 ++++++++++++++++--- 1 file changed, 71 insertions(+), 14 deletions(-) diff --git a/exo/inference/torch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py index cab9f2211..c8c4f3a70 100644 --- a/exo/inference/torch/tests/test_split_model.py +++ b/exo/inference/torch/tests/test_split_model.py @@ -8,17 +8,35 @@ from pathlib import Path from typing import Optional +import torch + +from transformers.modeling_utils import offload_weight + from exo.download.hf.hf_helpers import get_weight_map from exo.download.hf.hf_shard_download import HFShardDownloader from exo.inference.shard import Shard -from transformers import AutoModelForCausalLM +from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer + +def print_ram_stats(): + if torch.cuda.is_available(): + allocated_memory = torch.cuda.memory_allocated() + max_memory = torch.cuda.max_memory_allocated() + cached_memory = torch.cuda.memory_reserved() + + print("Cuda stats") + print(f'Allocated memory: {allocated_memory / 1024**2} MB') + print(f'Max allocated memory: {max_memory / 1024**2} MB') + print(f'Cached memory: {cached_memory / 1024**2} MB') + + async def load_model( repo_id: str, shard: Shard, model_path: Path, - weight_map: Optional[dict] + weight_map: Optional[dict], + device: Optional[str] = "cuda" ) -> Optional[AutoModelForCausalLM]: """ load model by layer and safetensors @@ -61,7 +79,6 @@ async def load_model( mst_json = {} with open(model_st_snapshot, "r") as mst_file: mst_json = json.load(mst_file) - mst_json["weight_map"] = layer_weight_map print(f"mst_json: {json.dumps(mst_json, indent=4)}") @@ -77,27 +94,70 @@ async def load_model( else: print("weight_map not found, loading whole model") + # setup the weight range for init_weights + shard_num_hidden_layers = shard.end_layer - shard.start_layer + print(f"Setting up LLM config with {shard_num_hidden_layers} hidden layers") + llm_config = AutoConfig.from_pretrained( + pretrained_model_name_or_path=model_path, + device_map="cuda", + offload_buffers=True, + local_files_only=True, + num_hidden_layers=shard_num_hidden_layers + ) + # load model with layer edits # or whole model if no weight_map - shard_model = AutoModelForCausalLM.from_pretrained( - model_path, - device_map="auto", - offload_buffers=True + print(f"Loading sharded AutoModelForCausalLM from {model_path}") + shard_model = AutoModelForCausalLM.from_config(llm_config).to(device) + + print("Loading tokenizer") + tokenizer = AutoTokenizer.from_pretrained( + pretrained_model_name_or_path=model_path, + local_files_only=True, ) + print_ram_stats() + + prompt = "In a single word only, what color is a red apple?" + + model_inputs = tokenizer( + [prompt], + return_tensors="pt" + ) + + generated_ids = shard_model.generate( + model_inputs.input_ids.to(device), + attention_mask=model_inputs.attention_mask.to(device), + max_new_tokens=512, + do_sample=True + ) + + generated_ids = [ + output_ids[len(input_ids):] for input_ids, output_ids in zip( + model_inputs.input_ids, + generated_ids + ) + ] + + response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] + + print(f"Prompt: {prompt}\n") + print(f"Response: {response}\n") + + print_ram_stats() + # have to clear out edited model safetensors mst_json os.remove(model_st_snapshot) return shard_model - async def test_split_model( model_id: str, start_layer: int, end_layer: int, n_layers: int ): - """ + """ Test to load split models """ @@ -108,9 +168,6 @@ async def test_split_model( n_layers=n_layers ) - # remove old weight json if present - - print(f"loading shard: {shard}") shard_downloader = HFShardDownloader() model_path = await shard_downloader.ensure_shard(shard) @@ -130,11 +187,11 @@ async def test_split_model( asyncio.run(test_split_model( "Qwen/Qwen2.5-3B-Instruct", 0, - 3, + 6, 36 )) except Exception as err: - print(f"\n\n !!!!!!!!!!! meta-llama/Llama-3.2-1B-Instruct TEST FAILED \n{err}\n") + print(f"\n\n !!!!!!!!!!! Qwen/Qwen2.5-3B-Instruct TEST FAILED \n{err}\n") # unsloth/Meta-Llama-3.1-8B-Instruct #try: From aacdeb595e8a47e9d8a0d45e8e3ac84c92efa4e5 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 14 Oct 2024 00:44:19 -0800 Subject: [PATCH 418/589] adding model sharding to inference engine, doing testing with inference engine and sharding --- exo/inference/torch/inference.py | 11 +- exo/inference/torch/model/hf.py | 110 ++++++++++-- .../torch/tests/test_inference_engine.py | 157 +++++++++--------- exo/inference/torch/tests/test_split_model.py | 42 ++--- exo/inference/torch/utils.py | 49 ++++++ 5 files changed, 252 insertions(+), 117 deletions(-) create mode 100644 exo/inference/torch/utils.py diff --git a/exo/inference/torch/inference.py b/exo/inference/torch/inference.py index 52db0c0ae..63f29284f 100644 --- a/exo/inference/torch/inference.py +++ b/exo/inference/torch/inference.py @@ -60,8 +60,11 @@ def __init__(self, shard_downloader: HFShardDownloader): # setup cude dtype self.dtype = torch.get_default_dtype() - # setup threadding - torch.set_num_threads(torch.get_num_threads()) + # setup device_map + if os.environ.get("TORCH_DEVICE_MAP"): + self.device_map = os.environ["TORCH_DEVICE_MAP"] + else: + self.device_map = str(self.device) def infer_caching( self, @@ -351,13 +354,13 @@ async def ensure_shard(self, shard: Shard): # get model weight map model_wm = await get_weight_map(repo_id=shard.model_id) - print(f"model_wm: {model_wm}") - self.stateful_sharded_model = ShardedHuggingFaceModel( shard=shard, local_model_path=model_path, + weight_map=model_wm, device=self.device, dtype=self.dtype, + device_map=self.device_map, top_k=TOP_K, temp=TEMP, top_p=TOP_P diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index 9d524de00..c188a97a7 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -1,12 +1,18 @@ +import os +import json +from typing import Tuple, Optional, Union, List +from pathlib import Path + import torch import torch.nn as nn -from typing import Tuple, Optional, Union, List from exo.inference.shard import Shard from exo.helpers import DEBUG +from exo.inference.torch.utils import extract_layers from transformers import ( - AutoModelForCausalLM, + AutoConfig, + AutoModelForCausalLM, DynamicCache, Cache, LogitsProcessorList, @@ -22,12 +28,15 @@ class ShardedHuggingFaceModel: def __init__( self, shard: Shard, - local_model_path, - device, - dtype, + local_model_path: Path, + weight_map: Optional[dict], + device: torch.device, + dtype: torch.dtype, + device_map: str, top_k: int = 25, temp: float = 0.7, - top_p: float = 0.9 + top_p: float = 0.9, + offload_buffers: bool = True ): """ Initializes the ShardedHuggingFaceModel with a specified shard, model path, and device. @@ -64,21 +73,96 @@ def __init__( self.device = device self.dtype = dtype + self.device_map = device_map + + self.offload_buffers = offload_buffers + + self.model_safetensors_path = self.local_model_path/"model.safetensors.index.json" # setup pytorch and transformer llm try: - self.llm_model = AutoModelForCausalLM.from_pretrained( - pretrained_model_name_or_path=self.local_model_path, - torch_dtype=self.dtype, - device_map="auto", - offload_buffers=True - ) + if weight_map: + self.llm_model_config = self.load_sharded_model( + shard, + weight_map, + offload_buffers=self.offload_buffers + ) - self.model = self.llm_model.model + # clear out edited safetensor json + # this is needed because shard downloader just + # appends and not redownloads the file + os.remove(self.model_safetensors_path) + else: + self.llm_model_config = AutoConfig.from_pretrained( + pretrained_model_name_or_path=self.local_model_path, + torch_dtype=self.dtype, + device_map=self.device_map, + offload_buffers=self.offload_buffers + ) + + self.llm_model = AutoModelForCausalLM.from_config(self.llm_model_config).to(self.device) + + self.model = self.llm_model.model.to(self.device) except Exception as err: print(f"error loading and splitting model: {err}") raise + def load_sharded_model( + self, + shard: Shard, + weight_map: dict, + offload_buffers: bool + ) -> AutoConfig: + """ + Loads sharded version of model where only needed + weights are loaded for necessary layers + + Args: + + Returns: + """ + if DEBUG >= 4: + print("load_sharded_model called") + print(f"shard: {shard}") + + # break out layers per shard range + layer_weight_map = extract_layers( + weight_map, + shard + ) + + # rewrite model.safetensors.index.json for only needed layers + try: + mst_json = {} + with open(self.model_safetensors_path, "r") as mst_file: + mst_json = json.load(mst_file) + mst_json["weight_map"] = layer_weight_map + + if DEBUG >= 4: + print(f"rewritten safetensor index \n{json.dumps(mst_json, indent=4)}") + + os.remove(self.model_safetensors_path) + + with open(self.model_safetensors_path, "w") as mst_file: + json.dump(mst_json, mst_file, indent=4) + except Exception as err: + print(f"err: {err}") + raise + + # load model + try: + shard_num_hidden_layers = shard.end_layer - shard.start_layer + return AutoConfig.from_pretrained( + pretrained_model_name_or_path=self.local_model_path, + device_map=self.device_map, + offload_buffers=offload_buffers, + local_files_only=True, + num_hidden_layers=shard_num_hidden_layers + ) + except Exception as err: + print(f"err: {err}") + raise + def forward( self, input_ids: Optional[torch.Tensor] = None, diff --git a/exo/inference/torch/tests/test_inference_engine.py b/exo/inference/torch/tests/test_inference_engine.py index a03c5c9a8..e102af694 100644 --- a/exo/inference/torch/tests/test_inference_engine.py +++ b/exo/inference/torch/tests/test_inference_engine.py @@ -15,59 +15,57 @@ async def test_inference_engine( inference_engine_2: InferenceEngine, model_id: str, n_layers: int): - + prompt = "In a single word only, what is the last name of the current president of the USA?" - shard = Shard( +# shard = Shard( +# model_id=model_id, +# start_layer=0, +# end_layer=n_layers-1, +# n_layers=n_layers +# ) +# +# resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( +# "A", +# shard=shard, +# prompt=prompt +# ) +# +# print("\n------------resp_full---------------\n") +# print(resp_full) +# print("\n------------resp_full---------------\n") +# +# time.sleep(5) +# +# next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( +# "A", +# shard=shard, +# input_data=resp_full, +# inference_state=inference_state_full, +# ) +# +# print("\n------------next_resp_full---------------\n") +# print(next_resp_full) +# print("\n------------next_resp_full---------------\n") +# +# time.sleep(5) + + resp_shard = Shard( model_id=model_id, start_layer=0, - end_layer=n_layers-1, + end_layer=1, n_layers=n_layers ) - resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( - "A", - shard=shard, - prompt=prompt - ) - - print("\n------------resp_full---------------\n") - print(resp_full) - print("\n------------resp_full---------------\n") - - time.sleep(5) - - next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( - "A", - shard=shard, - input_data=resp_full, - inference_state=inference_state_full, - ) - - print("\n------------next_resp_full---------------\n") - print(next_resp_full) - print("\n------------next_resp_full---------------\n") - - time.sleep(5) - - pp = int(n_layers/2) - - resp_shard = Shard( - model_id=model_id, - start_layer=0, - end_layer=pp, - n_layers=n_layers - ) - - resp_shard2 = Shard( - model_id=model_id, - start_layer=pp + 1, - end_layer=n_layers-1, - n_layers=n_layers - ) + #resp_shard2 = Shard( + # model_id=model_id, + # start_layer=3, + # end_layer=5, + # n_layers=n_layers + #) resp1, inference_state_1, _ = await inference_engine_1.infer_prompt( - "B", + "B", shard=resp_shard, prompt=prompt ) @@ -78,42 +76,41 @@ async def test_inference_engine( time.sleep(5) - - resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( - "B", - shard=resp_shard2, - input_data=resp1, - inference_state=inference_state_1, - ) - - print("\n------------resp2---------------\n") - print(resp2) - print("\n------------resp2---------------\n") - - resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( - "B", - shard=resp_shard, - input_data=resp2, - inference_state=inference_state_2, - ) - - print("\n------------resp3---------------\n") - print(resp3) - print("\n------------resp3---------------\n") - - resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( - "B", - shard=resp_shard2, - input_data=resp3, - inference_state=inference_state_3, - ) - - print("\n------------resp4---------------\n") - print(resp4) - print("\n------------resp4---------------\n") - - assert np.array_equal(resp_full, resp2) - assert np.array_equal(next_resp_full, resp4) + #resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( + # "B", + # shard=resp_shard2, + # input_data=resp1, + # inference_state=inference_state_1, + #) + + #print("\n------------resp2---------------\n") + #print(resp2) + #print("\n------------resp2---------------\n") + + #resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( + # "B", + # shard=resp_shard, + # input_data=resp2, + # inference_state=inference_state_2, + #) + + #print("\n------------resp3---------------\n") + #print(resp3) + #print("\n------------resp3---------------\n") + + #resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( + # "B", + # shard=resp_shard2, + # input_data=resp3, + # inference_state=inference_state_3, + #) + + #print("\n------------resp4---------------\n") + #print(resp4) + #print("\n------------resp4---------------\n") + + #assert np.array_equal(resp_full, resp2) + #assert np.array_equal(next_resp_full, resp4) if __name__ == '__main__': # try: diff --git a/exo/inference/torch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py index c8c4f3a70..0afc3ed35 100644 --- a/exo/inference/torch/tests/test_split_model.py +++ b/exo/inference/torch/tests/test_split_model.py @@ -29,9 +29,7 @@ def print_ram_stats(): print(f'Max allocated memory: {max_memory / 1024**2} MB') print(f'Cached memory: {cached_memory / 1024**2} MB') - - -async def load_model( +def load_model( repo_id: str, shard: Shard, model_path: Path, @@ -65,6 +63,10 @@ async def load_model( else: non_layer_weights.append((wname, wtensor)) + non_layer_weights = sorted(non_layer_weights, key=lambda x: x[1]) + + print(f"sorted non_layer_weights: {non_layer_weights}") + if shard.is_first_layer(): # this assumes at max only one first weight non-layer for model first_weight = non_layer_weights[0] @@ -173,7 +175,7 @@ async def test_split_model( model_path = await shard_downloader.ensure_shard(shard) weight_map = await get_weight_map(model_id) - await load_model( + load_model( model_id, shard, model_path, @@ -182,25 +184,25 @@ async def test_split_model( if __name__ == "__main__": #Qwen/Qwen2.5-3B + #try: + # print("\n-------- Test Qwen/Qwen2.5-3B-Instruct ----------\n") + # asyncio.run(test_split_model( + # "Qwen/Qwen2.5-3B-Instruct", + # 0, + # 6, + # 36 + # )) + #except Exception as err: + # print(f"\n\n !!!!!!!!!!! Qwen/Qwen2.5-3B-Instruct TEST FAILED \n{err}\n") + + # unsloth/Meta-Llama-3.1-8B-Instruct try: - print("\n-------- Test Qwen/Qwen2.5-3B-Instruct ----------\n") + print("\n-------- Test unsloth/Meta-Llama-3.1-8B-Instruct ----------\n") asyncio.run(test_split_model( - "Qwen/Qwen2.5-3B-Instruct", + "unsloth/Meta-Llama-3.1-8B-Instruct", 0, 6, - 36 + 32 )) except Exception as err: - print(f"\n\n !!!!!!!!!!! Qwen/Qwen2.5-3B-Instruct TEST FAILED \n{err}\n") - - # unsloth/Meta-Llama-3.1-8B-Instruct - #try: - # print("\n-------- Test unsloth/Meta-Llama-3.1-8B-Instruct ----------\n") - # asyncio.run(test_split_model( - # "unsloth/Meta-Llama-3.1-8B-Instruct", - # 0, - # 1, - # 32 - # )) - #except Exception as err: - # print(f"\n\n !!!!!!!!!!! meta-llama/Llama-3.2-1B-Instruct TEST FAILED \n{err}\n") + print(f"\n\n !!!!!!!!!!! meta-llama/Llama-3.2-1B-Instruct TEST FAILED \n{err}\n") diff --git a/exo/inference/torch/utils.py b/exo/inference/torch/utils.py new file mode 100644 index 000000000..994daeebe --- /dev/null +++ b/exo/inference/torch/utils.py @@ -0,0 +1,49 @@ +""" +Utility functions to be used by inference engine +and model +""" +import re + +from exo.inference.shard import Shard + +def extract_layers( + weight_map: dict, + shard: Shard +) -> dict: + """ + Extract layers from weight map in range + + Args: + + Returns: + """ + + layer_rgx = r'^model\.layers\.(\d+)\.*' + layer_weight_map = {} + non_layer_weights = [] + + for wname, wtensor in weight_map.items(): + layer_found = re.findall(layer_rgx, wname) + if layer_found: + layer_idx = int(layer_found[0]) + if shard.start_layer <= layer_idx <= shard.end_layer: + layer_weight_map[wname] = wtensor + else: + non_layer_weights.append((wname, wtensor)) + + non_layer_weights = sorted(non_layer_weights, key=lambda x: x[1]) + + print(non_layer_weights) + print(f"first: {shard.is_first_layer()}") + print(f"last: {shard.is_last_layer()}") + + if shard.is_first_layer(): + # this assumes at max only one first weight non-layer for model + first_weight = non_layer_weights[0] + layer_weight_map[first_weight[0]] = first_weight[1] + elif shard.is_last_layer(): + last_weights = non_layer_weights[1:] + for last_weight in last_weights: + layer_weight_map[last_weight[0]] = last_weight[1] + + return layer_weight_map From ce702d1301a5bd59a1b53a65c7c8e21f4bd768b7 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 14 Oct 2024 00:54:02 -0800 Subject: [PATCH 419/589] fixing layer range issue --- exo/inference/torch/model/hf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index c188a97a7..a8def187b 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -261,11 +261,13 @@ def forward( if DEBUG >= 4: print(f"hidden_states: {self.hidden_states}") + print(f"model layer amt: {len(self.model.layers)}") print(f"layer_amt: {layer_amt}") for i in layer_amt: decoder_layer = self.model.layers[i] if DEBUG >= 5: + print(f"layer #{i}") print("decoder_layer before") print(f"decoder_layer: {decoder_layer}") print(f"hidden_states: {self.hidden_states}") From e387a797fef33b40def3faef6ceb9239806b7308 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 14 Oct 2024 01:00:56 -0800 Subject: [PATCH 420/589] fixing layer range issue --- exo/inference/torch/model/hf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index a8def187b..ebe12d7d6 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -152,6 +152,8 @@ def load_sharded_model( # load model try: shard_num_hidden_layers = shard.end_layer - shard.start_layer + if DEBUG >= 4: + print(f"config with {shard_num_hidden_layers} layers") return AutoConfig.from_pretrained( pretrained_model_name_or_path=self.local_model_path, device_map=self.device_map, From e0ba2bb4182b4c2afe54a538d5dc7642c0d1ad4b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 14 Oct 2024 01:03:53 -0800 Subject: [PATCH 421/589] fixing layer range issue --- exo/inference/torch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index ebe12d7d6..ed3ca2a9d 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -259,7 +259,7 @@ def forward( print(f"model_inputs: {model_inputs}") # run through decoder layers - layer_amt = range(self.shard.start_layer, self.shard.end_layer + 1) + layer_amt = range(self.shard.start_layer, self.shard.end_layer) if DEBUG >= 4: print(f"hidden_states: {self.hidden_states}") From 5b9638f249a4137c0d1e93fc3a81edfc71731aec Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 14 Oct 2024 01:07:14 -0800 Subject: [PATCH 422/589] checking if ram over usaage even if reducing layers on large models --- exo/inference/torch/tests/test_inference_engine.py | 8 ++++++-- exo/inference/torch/utils.py | 13 +++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/exo/inference/torch/tests/test_inference_engine.py b/exo/inference/torch/tests/test_inference_engine.py index e102af694..93960afea 100644 --- a/exo/inference/torch/tests/test_inference_engine.py +++ b/exo/inference/torch/tests/test_inference_engine.py @@ -1,14 +1,16 @@ """ Test inference engine and model sharding """ - +import time import asyncio + from exo.inference.shard import Shard from exo.inference.torch.inference import TorchDynamicShardInferenceEngine from exo.download.hf.hf_shard_download import HFShardDownloader from exo.inference.inference_engine import InferenceEngine +from exo.inference.torch.utils import print_ram_stats + import numpy as np -import time async def test_inference_engine( inference_engine_1: InferenceEngine, @@ -64,6 +66,7 @@ async def test_inference_engine( # n_layers=n_layers #) + print_ram_stats() resp1, inference_state_1, _ = await inference_engine_1.infer_prompt( "B", shard=resp_shard, @@ -74,6 +77,7 @@ async def test_inference_engine( print(resp1) print("\n------------resp1---------------\n") + print_ram_stats() time.sleep(5) #resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( diff --git a/exo/inference/torch/utils.py b/exo/inference/torch/utils.py index 994daeebe..4ad922536 100644 --- a/exo/inference/torch/utils.py +++ b/exo/inference/torch/utils.py @@ -6,6 +6,8 @@ from exo.inference.shard import Shard +import torch + def extract_layers( weight_map: dict, shard: Shard @@ -47,3 +49,14 @@ def extract_layers( layer_weight_map[last_weight[0]] = last_weight[1] return layer_weight_map + +def print_ram_stats(): + if torch.cuda.is_available(): + allocated_memory = torch.cuda.memory_allocated() + max_memory = torch.cuda.max_memory_allocated() + cached_memory = torch.cuda.memory_reserved() + + print("Cuda stats") + print(f'Allocated memory: {allocated_memory / 1024**2} MB') + print(f'Max allocated memory: {max_memory / 1024**2} MB') + print(f'Cached memory: {cached_memory / 1024**2} MB') From 664f29f3e720f89744ef05e4b881d5662675e528 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 14 Oct 2024 01:23:18 -0800 Subject: [PATCH 423/589] half layer inference engine testing --- .../torch/tests/test_inference_engine.py | 74 ++++++++++--------- 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/exo/inference/torch/tests/test_inference_engine.py b/exo/inference/torch/tests/test_inference_engine.py index 93960afea..2d68001f6 100644 --- a/exo/inference/torch/tests/test_inference_engine.py +++ b/exo/inference/torch/tests/test_inference_engine.py @@ -52,19 +52,21 @@ async def test_inference_engine( # # time.sleep(5) + half_layer = int(n_layers/2) + resp_shard = Shard( model_id=model_id, start_layer=0, - end_layer=1, + end_layer=half_layer, n_layers=n_layers ) - #resp_shard2 = Shard( - # model_id=model_id, - # start_layer=3, - # end_layer=5, - # n_layers=n_layers - #) + resp_shard2 = Shard( + model_id=model_id, + start_layer=half_layer+1, + end_layer=n_layers-1, + n_layers=n_layers + ) print_ram_stats() resp1, inference_state_1, _ = await inference_engine_1.infer_prompt( @@ -80,16 +82,16 @@ async def test_inference_engine( print_ram_stats() time.sleep(5) - #resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( - # "B", - # shard=resp_shard2, - # input_data=resp1, - # inference_state=inference_state_1, - #) + resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( + "B", + shard=resp_shard2, + input_data=resp1, + inference_state=inference_state_1, + ) - #print("\n------------resp2---------------\n") - #print(resp2) - #print("\n------------resp2---------------\n") + print("\n------------resp2---------------\n") + print(resp2) + print("\n------------resp2---------------\n") #resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( # "B", @@ -117,16 +119,16 @@ async def test_inference_engine( #assert np.array_equal(next_resp_full, resp4) if __name__ == '__main__': - # try: - # print("\n\n -------- TEST QWEN2 -------- \n\n") - # asyncio.run(test_inference_engine( - # TorchDynamicShardInferenceEngine(HFShardDownloader()), - # TorchDynamicShardInferenceEngine(HFShardDownloader()), - # "Qwen/Qwen2-0.5B-Instruct", - # 24 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") + try: + print("\n\n -------- TEST Qwen/Qwen2.5-3B-Instruct -------- \n\n") + asyncio.run(test_inference_engine( + TorchDynamicShardInferenceEngine(HFShardDownloader()), + TorchDynamicShardInferenceEngine(HFShardDownloader()), + "Qwen/Qwen2.5-3B-Instruct", + 36 + )) + except Exception as err: + print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") #try: # print("\n-------- Test unsloth/Llama-3.2-1B-Instruct ----------\n") @@ -139,15 +141,15 @@ async def test_inference_engine( #except Exception as err: # print(f"\n\n !!!!!!!!!!! meta-llama/Llama-3.2-1B-Instruct TEST FAILED \n{err}\n") - try: - print("\n-------- Test unsloth/Meta-Llama-3.1-8B-Instruct ----------\n") - asyncio.run(test_inference_engine( - TorchDynamicShardInferenceEngine(HFShardDownloader()), - TorchDynamicShardInferenceEngine(HFShardDownloader()), - "unsloth/Meta-Llama-3.1-8B-Instruct", - 32 - )) - except Exception as err: - print(f"\n\n !!!!!!!!!!! meta-llama/Llama-3.2-1B-Instruct TEST FAILED \n{err}\n") + #try: + # print("\n-------- Test unsloth/Meta-Llama-3.1-8B-Instruct ----------\n") + # asyncio.run(test_inference_engine( + # TorchDynamicShardInferenceEngine(HFShardDownloader()), + # TorchDynamicShardInferenceEngine(HFShardDownloader()), + # "unsloth/Meta-Llama-3.1-8B-Instruct", + # 32 + # )) + #except Exception as err: + # print(f"\n\n !!!!!!!!!!! unsloth/Llama-3.1-8B-Instruct TEST FAILED \n{err}\n") From 2591fab98103666910626d3b89513122a8fbef87 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 14 Oct 2024 01:28:43 -0800 Subject: [PATCH 424/589] fixing layer amount with sharded modeling --- exo/inference/torch/model/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index ed3ca2a9d..3b6fefc28 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -259,7 +259,7 @@ def forward( print(f"model_inputs: {model_inputs}") # run through decoder layers - layer_amt = range(self.shard.start_layer, self.shard.end_layer) + layer_amt = range(self.shard.end_layer - self.shard.start_layer) if DEBUG >= 4: print(f"hidden_states: {self.hidden_states}") From 99dac57b17df542bfb670a98d90bbb8a7410ea01 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 14 Oct 2024 01:36:55 -0800 Subject: [PATCH 425/589] adding qwen2.5 3B for testing --- exo/models.py | 4 ++++ exo/tinychat/index.html | 1 + 2 files changed, 5 insertions(+) diff --git a/exo/models.py b/exo/models.py index fe608f05e..a8a482df8 100644 --- a/exo/models.py +++ b/exo/models.py @@ -71,6 +71,10 @@ "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-1.5B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28), "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Qwen2.5-Coder-1.5B-Instruct", start_layer=0, end_layer=0, n_layers=28), }, + "qwen-2.5-3B-Instruct": { + "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-3B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=36), + "TorchDynamicShardInferenceEngine": Shard(model_id="Qwen/Qwen2.5-3B-Instruct", start_layer=0, end_layer=0, n_layers=36), + }, "qwen-2.5-coder-7b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-7B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28), "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Qwen2.5-Coder-7B-Instruct", start_layer=0, end_layer=0, n_layers=28), diff --git a/exo/tinychat/index.html b/exo/tinychat/index.html index e9be92184..8d4d4ee8b 100644 --- a/exo/tinychat/index.html +++ b/exo/tinychat/index.html @@ -44,6 +44,7 @@ + From 493cd3e38bcd4eedc35d6f0f8e2956014f8d22f8 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 14 Oct 2024 02:00:02 -0800 Subject: [PATCH 426/589] updating inference engine test --- .../torch/tests/test_inference_engine.py | 144 ++++++++---------- 1 file changed, 65 insertions(+), 79 deletions(-) diff --git a/exo/inference/torch/tests/test_inference_engine.py b/exo/inference/torch/tests/test_inference_engine.py index 2d68001f6..2d24c8b28 100644 --- a/exo/inference/torch/tests/test_inference_engine.py +++ b/exo/inference/torch/tests/test_inference_engine.py @@ -8,7 +8,6 @@ from exo.inference.torch.inference import TorchDynamicShardInferenceEngine from exo.download.hf.hf_shard_download import HFShardDownloader from exo.inference.inference_engine import InferenceEngine -from exo.inference.torch.utils import print_ram_stats import numpy as np @@ -20,37 +19,37 @@ async def test_inference_engine( prompt = "In a single word only, what is the last name of the current president of the USA?" -# shard = Shard( -# model_id=model_id, -# start_layer=0, -# end_layer=n_layers-1, -# n_layers=n_layers -# ) -# -# resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( -# "A", -# shard=shard, -# prompt=prompt -# ) -# -# print("\n------------resp_full---------------\n") -# print(resp_full) -# print("\n------------resp_full---------------\n") -# -# time.sleep(5) -# -# next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( -# "A", -# shard=shard, -# input_data=resp_full, -# inference_state=inference_state_full, -# ) -# -# print("\n------------next_resp_full---------------\n") -# print(next_resp_full) -# print("\n------------next_resp_full---------------\n") -# -# time.sleep(5) + shard = Shard( + model_id=model_id, + start_layer=0, + end_layer=n_layers-1, + n_layers=n_layers + ) + + resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( + "A", + shard=shard, + prompt=prompt + ) + + print("\n------------resp_full---------------\n") + print(resp_full) + print("\n------------resp_full---------------\n") + + time.sleep(5) + + next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( + "A", + shard=shard, + input_data=resp_full, + inference_state=inference_state_full, + ) + + print("\n------------next_resp_full---------------\n") + print(next_resp_full) + print("\n------------next_resp_full---------------\n") + + time.sleep(5) half_layer = int(n_layers/2) @@ -68,7 +67,6 @@ async def test_inference_engine( n_layers=n_layers ) - print_ram_stats() resp1, inference_state_1, _ = await inference_engine_1.infer_prompt( "B", shard=resp_shard, @@ -79,7 +77,6 @@ async def test_inference_engine( print(resp1) print("\n------------resp1---------------\n") - print_ram_stats() time.sleep(5) resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( @@ -93,63 +90,52 @@ async def test_inference_engine( print(resp2) print("\n------------resp2---------------\n") - #resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( - # "B", - # shard=resp_shard, - # input_data=resp2, - # inference_state=inference_state_2, - #) + resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( + "B", + shard=resp_shard, + input_data=resp2, + inference_state=inference_state_2, + ) - #print("\n------------resp3---------------\n") - #print(resp3) - #print("\n------------resp3---------------\n") + print("\n------------resp3---------------\n") + print(resp3) + print("\n------------resp3---------------\n") - #resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( - # "B", - # shard=resp_shard2, - # input_data=resp3, - # inference_state=inference_state_3, - #) + resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( + "B", + shard=resp_shard2, + input_data=resp3, + inference_state=inference_state_3, + ) - #print("\n------------resp4---------------\n") - #print(resp4) - #print("\n------------resp4---------------\n") + print("\n------------resp4---------------\n") + print(resp4) + print("\n------------resp4---------------\n") - #assert np.array_equal(resp_full, resp2) - #assert np.array_equal(next_resp_full, resp4) + assert np.array_equal(resp_full, resp2) + assert np.array_equal(next_resp_full, resp4) if __name__ == '__main__': - try: - print("\n\n -------- TEST Qwen/Qwen2.5-3B-Instruct -------- \n\n") - asyncio.run(test_inference_engine( - TorchDynamicShardInferenceEngine(HFShardDownloader()), - TorchDynamicShardInferenceEngine(HFShardDownloader()), - "Qwen/Qwen2.5-3B-Instruct", - 36 - )) - except Exception as err: - print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") - #try: - # print("\n-------- Test unsloth/Llama-3.2-1B-Instruct ----------\n") + # print("\n\n -------- TEST Qwen/Qwen2.5-3B-Instruct -------- \n\n") # asyncio.run(test_inference_engine( # TorchDynamicShardInferenceEngine(HFShardDownloader()), # TorchDynamicShardInferenceEngine(HFShardDownloader()), - # "unsloth/Llama-3.2-1B-Instruct", - # 24 + # "Qwen/Qwen2.5-3B-Instruct", + # 36 # )) #except Exception as err: - # print(f"\n\n !!!!!!!!!!! meta-llama/Llama-3.2-1B-Instruct TEST FAILED \n{err}\n") + # print(f"\n!!!! QWEN2 TEST FAILED \n{err}\n") - #try: - # print("\n-------- Test unsloth/Meta-Llama-3.1-8B-Instruct ----------\n") - # asyncio.run(test_inference_engine( - # TorchDynamicShardInferenceEngine(HFShardDownloader()), - # TorchDynamicShardInferenceEngine(HFShardDownloader()), - # "unsloth/Meta-Llama-3.1-8B-Instruct", - # 32 - # )) - #except Exception as err: - # print(f"\n\n !!!!!!!!!!! unsloth/Llama-3.1-8B-Instruct TEST FAILED \n{err}\n") + try: + print("\n-------- Test unsloth/Meta-Llama-3.1-8B-Instruct ----------\n") + asyncio.run(test_inference_engine( + TorchDynamicShardInferenceEngine(HFShardDownloader()), + TorchDynamicShardInferenceEngine(HFShardDownloader()), + "unsloth/Meta-Llama-3.1-8B-Instruct", + 32 + )) + except Exception as err: + print(f"\n!!!! unsloth/Meta-Llama-3.1-8B-Instruct TEST FAILED \n{err}\n") From de232946eaa5a023710372a6e0cc6a351185af99 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 14 Oct 2024 02:05:09 -0800 Subject: [PATCH 427/589] cleaning up utils and split model --- exo/inference/torch/tests/test_split_model.py | 15 +++---------- exo/inference/torch/utils.py | 22 ++++++++++--------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/exo/inference/torch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py index 0afc3ed35..935f74dff 100644 --- a/exo/inference/torch/tests/test_split_model.py +++ b/exo/inference/torch/tests/test_split_model.py @@ -15,20 +15,10 @@ from exo.download.hf.hf_helpers import get_weight_map from exo.download.hf.hf_shard_download import HFShardDownloader from exo.inference.shard import Shard +from exo.inference.torch.utils import print_cuda_vram_stats from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer -def print_ram_stats(): - if torch.cuda.is_available(): - allocated_memory = torch.cuda.memory_allocated() - max_memory = torch.cuda.max_memory_allocated() - cached_memory = torch.cuda.memory_reserved() - - print("Cuda stats") - print(f'Allocated memory: {allocated_memory / 1024**2} MB') - print(f'Max allocated memory: {max_memory / 1024**2} MB') - print(f'Cached memory: {cached_memory / 1024**2} MB') - def load_model( repo_id: str, shard: Shard, @@ -118,7 +108,8 @@ def load_model( local_files_only=True, ) - print_ram_stats() + if torch.cuda.is_available() and device == "cuda": + print_cuda_vram_stats() prompt = "In a single word only, what color is a red apple?" diff --git a/exo/inference/torch/utils.py b/exo/inference/torch/utils.py index 4ad922536..e5fc80e5a 100644 --- a/exo/inference/torch/utils.py +++ b/exo/inference/torch/utils.py @@ -50,13 +50,15 @@ def extract_layers( return layer_weight_map -def print_ram_stats(): - if torch.cuda.is_available(): - allocated_memory = torch.cuda.memory_allocated() - max_memory = torch.cuda.max_memory_allocated() - cached_memory = torch.cuda.memory_reserved() - - print("Cuda stats") - print(f'Allocated memory: {allocated_memory / 1024**2} MB') - print(f'Max allocated memory: {max_memory / 1024**2} MB') - print(f'Cached memory: {cached_memory / 1024**2} MB') +def print_cuda_vram_stats(): + """ + Prints CUDA VRAM stats being used by pytorch + """ + allocated_memory = torch.cuda.memory_allocated() + max_memory = torch.cuda.max_memory_allocated() + cached_memory = torch.cuda.memory_reserved() + + print("CUDA stats") + print(f'Allocated memory: {allocated_memory / 1024**2} MB') + print(f'Max allocated memory: {max_memory / 1024**2} MB') + print(f'Cached memory: {cached_memory / 1024**2} MB') From e7470b1a9ae643a6e41d7eabfc44c63f077c6afa Mon Sep 17 00:00:00 2001 From: Daniel Newman Date: Tue, 15 Oct 2024 13:55:33 -0400 Subject: [PATCH 428/589] bugfix in llm setup --- exo/inference/torch/model/hf.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index 3b6fefc28..20f17ee1c 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -92,17 +92,18 @@ def __init__( # this is needed because shard downloader just # appends and not redownloads the file os.remove(self.model_safetensors_path) + + self.llm_model = AutoModelForCausalLM.from_config(self.llm_model_config).to(self.device) + self.model = self.llm_model.model.to(self.device) else: - self.llm_model_config = AutoConfig.from_pretrained( + self.llm_model = AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=self.local_model_path, torch_dtype=self.dtype, device_map=self.device_map, offload_buffers=self.offload_buffers ) - - self.llm_model = AutoModelForCausalLM.from_config(self.llm_model_config).to(self.device) - - self.model = self.llm_model.model.to(self.device) + self.model = self.llm_model.model + except Exception as err: print(f"error loading and splitting model: {err}") raise From f5a1cef3ab509f8b13e0ae49365ec61c7ef4a306 Mon Sep 17 00:00:00 2001 From: Alex Cheema Date: Wed, 16 Oct 2024 15:02:27 -0700 Subject: [PATCH 429/589] handle range not satisfiable edge case --- exo/download/hf/hf_helpers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/exo/download/hf/hf_helpers.py b/exo/download/hf/hf_helpers.py index a548df2ee..3d5349e57 100644 --- a/exo/download/hf/hf_helpers.py +++ b/exo/download/hf/hf_helpers.py @@ -173,6 +173,8 @@ async def download_file( if progress_callback: await progress_callback(RepoFileProgressEvent(repo_id, revision, file_path, downloaded_size, downloaded_this_session, total_size, 0, timedelta(0), "complete")) return + if DEBUG >= 2: print(f"Range not satisfiable {file_path=} {total_size=} {downloaded_size=}") + return await download_file(session, repo_id, revision, file_path, save_directory, progress_callback, use_range_request=False) except ValueError: if DEBUG >= 1: print(f"Failed to parse Content-Range header: {content_range}. Starting download from scratch...") return await download_file(session, repo_id, revision, file_path, save_directory, progress_callback, use_range_request=False) From 751bd1c3bbee3579e562538da85a3383d6332043 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 15:16:20 -0800 Subject: [PATCH 430/589] updating to use automodelforcausallm instead of autoconfig --- exo/inference/torch/model/hf.py | 16 ++++++++++------ exo/inference/torch/utils.py | 4 ---- exo/models.py | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index 3b6fefc28..78e457c8b 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -82,6 +82,7 @@ def __init__( # setup pytorch and transformer llm try: if weight_map: + print("loading shard model") self.llm_model_config = self.load_sharded_model( shard, weight_map, @@ -92,15 +93,18 @@ def __init__( # this is needed because shard downloader just # appends and not redownloads the file os.remove(self.model_safetensors_path) + + self.llm_model = AutoModelForCausalLM.from_config(self.llm_model_config).to(self.device) else: - self.llm_model_config = AutoConfig.from_pretrained( + print("loading full model") + self.llm_model = AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=self.local_model_path, torch_dtype=self.dtype, device_map=self.device_map, - offload_buffers=self.offload_buffers - ) + offload_buffers=True + ).to(self.device) - self.llm_model = AutoModelForCausalLM.from_config(self.llm_model_config).to(self.device) + self.model = self.llm_model.model.to(self.device) except Exception as err: @@ -112,7 +116,7 @@ def load_sharded_model( shard: Shard, weight_map: dict, offload_buffers: bool - ) -> AutoConfig: + ) -> AutoModelForCausalLM: """ Loads sharded version of model where only needed weights are loaded for necessary layers @@ -154,7 +158,7 @@ def load_sharded_model( shard_num_hidden_layers = shard.end_layer - shard.start_layer if DEBUG >= 4: print(f"config with {shard_num_hidden_layers} layers") - return AutoConfig.from_pretrained( + return AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=self.local_model_path, device_map=self.device_map, offload_buffers=offload_buffers, diff --git a/exo/inference/torch/utils.py b/exo/inference/torch/utils.py index e5fc80e5a..b9c4f1481 100644 --- a/exo/inference/torch/utils.py +++ b/exo/inference/torch/utils.py @@ -35,10 +35,6 @@ def extract_layers( non_layer_weights = sorted(non_layer_weights, key=lambda x: x[1]) - print(non_layer_weights) - print(f"first: {shard.is_first_layer()}") - print(f"last: {shard.is_last_layer()}") - if shard.is_first_layer(): # this assumes at max only one first weight non-layer for model first_weight = non_layer_weights[0] diff --git a/exo/models.py b/exo/models.py index a8a482df8..ab7977069 100644 --- a/exo/models.py +++ b/exo/models.py @@ -4,7 +4,7 @@ ### llama "llama-3.2-1b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Llama-3.2-1B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=16), - "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Llama-3.2-1B-Instruct", start_layer=0, end_layer=0, n_layers=16), + "TorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Llama-3.2-1B-Instruct", start_layer=0, end_layer=0, n_layers=16), }, "llama-3.2-3b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Llama-3.2-3B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28), From 7d866d81d768752a73ac68c47744c2623d5c58ae Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 15:18:05 -0800 Subject: [PATCH 431/589] removing meta model --- exo/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/models.py b/exo/models.py index ab7977069..a8a482df8 100644 --- a/exo/models.py +++ b/exo/models.py @@ -4,7 +4,7 @@ ### llama "llama-3.2-1b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Llama-3.2-1B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=16), - "TorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Llama-3.2-1B-Instruct", start_layer=0, end_layer=0, n_layers=16), + "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Llama-3.2-1B-Instruct", start_layer=0, end_layer=0, n_layers=16), }, "llama-3.2-3b": { "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Llama-3.2-3B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28), From 253237b1adbeec0b00aa8df79ff8749e1b18bc63 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 15:24:28 -0800 Subject: [PATCH 432/589] updating split model test --- exo/inference/torch/tests/test_split_model.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/exo/inference/torch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py index 935f74dff..2783e7f09 100644 --- a/exo/inference/torch/tests/test_split_model.py +++ b/exo/inference/torch/tests/test_split_model.py @@ -89,18 +89,17 @@ def load_model( # setup the weight range for init_weights shard_num_hidden_layers = shard.end_layer - shard.start_layer print(f"Setting up LLM config with {shard_num_hidden_layers} hidden layers") - llm_config = AutoConfig.from_pretrained( + + # load model with layer edits + # or whole model if no weight_map + print(f"Loading sharded AutoModelForCausalLM from {model_path}") + shard_model = AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=model_path, device_map="cuda", offload_buffers=True, local_files_only=True, num_hidden_layers=shard_num_hidden_layers - ) - - # load model with layer edits - # or whole model if no weight_map - print(f"Loading sharded AutoModelForCausalLM from {model_path}") - shard_model = AutoModelForCausalLM.from_config(llm_config).to(device) + ).to(device) print("Loading tokenizer") tokenizer = AutoTokenizer.from_pretrained( From e46ffa4a213d6e386fa5d692813abcc821cf0b7f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 15:42:20 -0800 Subject: [PATCH 433/589] updating split model test --- exo/inference/torch/tests/test_split_model.py | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/exo/inference/torch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py index 2783e7f09..4717311cb 100644 --- a/exo/inference/torch/tests/test_split_model.py +++ b/exo/inference/torch/tests/test_split_model.py @@ -34,6 +34,28 @@ def load_model( print("load_model called") model_st_snapshot = model_path/"model.safetensors.index.json" + if device: + device = device + elif os.environ.get("TORCH_DEVICE"): + device = torch.device(os.environ["TORCH_DEVICE"]) + elif torch.cuda.is_available(): + device = torch.device("cuda") + elif torch.backends.mps.is_available() and torch.backends.mps.is_built(): + device = torch.device("mps") + else: + device = torch.device("cpu") + + torch.set_default_device(device) + + # setup cude dtype + dtype = torch.get_default_dtype() + + # setup device_map + if os.environ.get("TORCH_DEVICE_MAP"): + device_map = os.environ["TORCH_DEVICE_MAP"] + else: + device_map = str(device) + if weight_map: layer_weight_map = {} non_layer_weights = [] @@ -95,7 +117,8 @@ def load_model( print(f"Loading sharded AutoModelForCausalLM from {model_path}") shard_model = AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=model_path, - device_map="cuda", + device_map=device_map, + dtype=dtype, offload_buffers=True, local_files_only=True, num_hidden_layers=shard_num_hidden_layers From 476b6babbad113c6b41de8e3cf0469b4e7f71e21 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 15:49:02 -0800 Subject: [PATCH 434/589] automodel fix --- exo/inference/torch/model/hf.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index 78e457c8b..038fb6d1f 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -83,7 +83,7 @@ def __init__( try: if weight_map: print("loading shard model") - self.llm_model_config = self.load_sharded_model( + self.llm_model = self.load_sharded_model( shard, weight_map, offload_buffers=self.offload_buffers @@ -93,8 +93,6 @@ def __init__( # this is needed because shard downloader just # appends and not redownloads the file os.remove(self.model_safetensors_path) - - self.llm_model = AutoModelForCausalLM.from_config(self.llm_model_config).to(self.device) else: print("loading full model") self.llm_model = AutoModelForCausalLM.from_pretrained( @@ -104,8 +102,6 @@ def __init__( offload_buffers=True ).to(self.device) - - self.model = self.llm_model.model.to(self.device) except Exception as err: print(f"error loading and splitting model: {err}") @@ -161,10 +157,11 @@ def load_sharded_model( return AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=self.local_model_path, device_map=self.device_map, + torch_dtype=self.dtype, offload_buffers=offload_buffers, local_files_only=True, num_hidden_layers=shard_num_hidden_layers - ) + ).to(self.device) except Exception as err: print(f"err: {err}") raise From f7e02e9edbf912044b74d632d347808c2a43ca1b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 15:53:42 -0800 Subject: [PATCH 435/589] fixing split model test --- exo/inference/torch/tests/test_split_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py index 4717311cb..68e9e95e0 100644 --- a/exo/inference/torch/tests/test_split_model.py +++ b/exo/inference/torch/tests/test_split_model.py @@ -118,7 +118,7 @@ def load_model( shard_model = AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=model_path, device_map=device_map, - dtype=dtype, + torch_dtype=dtype, offload_buffers=True, local_files_only=True, num_hidden_layers=shard_num_hidden_layers From bd6322f870fbf9303844c635d4c54e049d745c5b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 16:00:34 -0800 Subject: [PATCH 436/589] pytorch offload buffers error --- exo/inference/torch/tests/test_split_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py index 68e9e95e0..bfdbfb498 100644 --- a/exo/inference/torch/tests/test_split_model.py +++ b/exo/inference/torch/tests/test_split_model.py @@ -119,7 +119,7 @@ def load_model( pretrained_model_name_or_path=model_path, device_map=device_map, torch_dtype=dtype, - offload_buffers=True, + offload_buffers=False, local_files_only=True, num_hidden_layers=shard_num_hidden_layers ).to(device) From c51bd916716ca708cde421e6c0a2bbe9022adacb Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 16:08:37 -0800 Subject: [PATCH 437/589] device_map any issue with split model --- exo/inference/torch/tests/test_split_model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exo/inference/torch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py index bfdbfb498..25a49538a 100644 --- a/exo/inference/torch/tests/test_split_model.py +++ b/exo/inference/torch/tests/test_split_model.py @@ -119,10 +119,10 @@ def load_model( pretrained_model_name_or_path=model_path, device_map=device_map, torch_dtype=dtype, - offload_buffers=False, + offload_buffers=True, local_files_only=True, num_hidden_layers=shard_num_hidden_layers - ).to(device) + ) print("Loading tokenizer") tokenizer = AutoTokenizer.from_pretrained( @@ -218,4 +218,4 @@ async def test_split_model( 32 )) except Exception as err: - print(f"\n\n !!!!!!!!!!! meta-llama/Llama-3.2-1B-Instruct TEST FAILED \n{err}\n") + print(f"\n\n !!!!!!!!!!! meta-llama/Llama-3.1-8B-Instruct TEST FAILED \n{err}\n") From 4a2aef40e375b920622bb9198ce640a698121383 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 16:40:31 -0800 Subject: [PATCH 438/589] updating split model test --- exo/inference/torch/model/hf.py | 12 ++++++--- exo/inference/torch/tests/test_split_model.py | 25 ++++++++----------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index 038fb6d1f..1850469bb 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -11,7 +11,6 @@ from exo.inference.torch.utils import extract_layers from transformers import ( - AutoConfig, AutoModelForCausalLM, DynamicCache, Cache, @@ -154,14 +153,21 @@ def load_sharded_model( shard_num_hidden_layers = shard.end_layer - shard.start_layer if DEBUG >= 4: print(f"config with {shard_num_hidden_layers} layers") - return AutoModelForCausalLM.from_pretrained( + + llm_model = AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=self.local_model_path, device_map=self.device_map, torch_dtype=self.dtype, offload_buffers=offload_buffers, local_files_only=True, num_hidden_layers=shard_num_hidden_layers - ).to(self.device) + ) + + if self.device_map == "auto": + return llm_model + else: + return llm_model.to(self.device) + except Exception as err: print(f"err: {err}") raise diff --git a/exo/inference/torch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py index 25a49538a..183406a53 100644 --- a/exo/inference/torch/tests/test_split_model.py +++ b/exo/inference/torch/tests/test_split_model.py @@ -17,14 +17,13 @@ from exo.inference.shard import Shard from exo.inference.torch.utils import print_cuda_vram_stats -from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer +from transformers import AutoModelForCausalLM, AutoTokenizer def load_model( - repo_id: str, shard: Shard, model_path: Path, weight_map: Optional[dict], - device: Optional[str] = "cuda" + device: Optional[torch.device] = torch.device("cpu") ) -> Optional[AutoModelForCausalLM]: """ load model by layer and safetensors @@ -34,16 +33,12 @@ def load_model( print("load_model called") model_st_snapshot = model_path/"model.safetensors.index.json" - if device: - device = device - elif os.environ.get("TORCH_DEVICE"): + if os.environ.get("TORCH_DEVICE"): device = torch.device(os.environ["TORCH_DEVICE"]) elif torch.cuda.is_available(): device = torch.device("cuda") elif torch.backends.mps.is_available() and torch.backends.mps.is_built(): device = torch.device("mps") - else: - device = torch.device("cpu") torch.set_default_device(device) @@ -122,7 +117,7 @@ def load_model( offload_buffers=True, local_files_only=True, num_hidden_layers=shard_num_hidden_layers - ) + ).to(device) print("Loading tokenizer") tokenizer = AutoTokenizer.from_pretrained( @@ -159,8 +154,6 @@ def load_model( print(f"Prompt: {prompt}\n") print(f"Response: {response}\n") - print_ram_stats() - # have to clear out edited model safetensors mst_json os.remove(model_st_snapshot) @@ -189,13 +182,15 @@ async def test_split_model( weight_map = await get_weight_map(model_id) load_model( - model_id, shard, model_path, weight_map ) if __name__ == "__main__": + n_layers = int(os.environ["N_LAYERS"]) if os.environ.get("N_LAYERS") else 32 + start_layer = int(os.environ["START_LAYER"]) if os.environ.get("START_LAYER") else 0 + end_layer = int(os.environ["END_LAYER"]) if os.environ.get("END_LAYER") else int(n_layers/2) #Qwen/Qwen2.5-3B #try: # print("\n-------- Test Qwen/Qwen2.5-3B-Instruct ----------\n") @@ -213,9 +208,9 @@ async def test_split_model( print("\n-------- Test unsloth/Meta-Llama-3.1-8B-Instruct ----------\n") asyncio.run(test_split_model( "unsloth/Meta-Llama-3.1-8B-Instruct", - 0, - 6, - 32 + start_layer, + end_layer, + n_layers )) except Exception as err: print(f"\n\n !!!!!!!!!!! meta-llama/Llama-3.1-8B-Instruct TEST FAILED \n{err}\n") From 79f0763d59fcddb20ce024e85f8a8f151621eb2e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 17:10:58 -0800 Subject: [PATCH 439/589] fixing split model issue --- exo/inference/torch/model/hf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index 1850469bb..eb6957c3e 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -86,7 +86,7 @@ def __init__( shard, weight_map, offload_buffers=self.offload_buffers - ) + ).to(self.device) # clear out edited safetensor json # this is needed because shard downloader just @@ -311,7 +311,7 @@ def forward( # shard is last layer says true at the start and not detecting last layer correctly if self.shard.is_last_layer(): self.hidden_states = self.model.norm(self.hidden_states) - if use_legacy_cache: + if use_legacy_cache and self.next_decoder_cache is not None: self.past_key_values = self.next_decoder_cache.to_legacy_cache() else: self.past_key_values = self.next_decoder_cache From cbbc9cf1aeeb9fdee496ce4ea4da2d67d44144dd Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 17:16:31 -0800 Subject: [PATCH 440/589] fixing node issues --- exo/inference/torch/inference.py | 1 + exo/inference/torch/model/hf.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/inference/torch/inference.py b/exo/inference/torch/inference.py index 63f29284f..5459664c3 100644 --- a/exo/inference/torch/inference.py +++ b/exo/inference/torch/inference.py @@ -91,6 +91,7 @@ def infer_caching( cached_iids = {"input_ids": past_iids.tolist()} if DEBUG >= 4: + print(f"cached_iids len: {len(cached_iids)}") print(f"cached_iids: {cached_iids}") return (past_iids, cached_iids) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index eb6957c3e..c7153b5d5 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -98,7 +98,7 @@ def __init__( pretrained_model_name_or_path=self.local_model_path, torch_dtype=self.dtype, device_map=self.device_map, - offload_buffers=True + offload_buffers=offload_buffers ).to(self.device) self.model = self.llm_model.model.to(self.device) From 58cebabd85c8354e570d486a4b31f56b1ce8b1d3 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 18:06:45 -0800 Subject: [PATCH 441/589] fixing node issues --- exo/inference/torch/inference.py | 4 ++++ exo/inference/torch/model/hf.py | 7 ++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/exo/inference/torch/inference.py b/exo/inference/torch/inference.py index 5459664c3..47ccc5992 100644 --- a/exo/inference/torch/inference.py +++ b/exo/inference/torch/inference.py @@ -127,6 +127,10 @@ async def async_forward( attention_mask=attention_mask )) + if DEBUG >=4 : + print("async_forward") + print(f"result: {result}") + return result async def async_logit_sample( diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index c7153b5d5..2dab660c3 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -86,7 +86,7 @@ def __init__( shard, weight_map, offload_buffers=self.offload_buffers - ).to(self.device) + ) # clear out edited safetensor json # this is needed because shard downloader just @@ -163,10 +163,7 @@ def load_sharded_model( num_hidden_layers=shard_num_hidden_layers ) - if self.device_map == "auto": - return llm_model - else: - return llm_model.to(self.device) + return llm_model.to(self.device) except Exception as err: print(f"err: {err}") From 7f9b1bb1833a8d05780ec3a2ce316bbe86662957 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 18:11:10 -0800 Subject: [PATCH 442/589] fixing node issues --- exo/inference/torch/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/torch/inference.py b/exo/inference/torch/inference.py index 47ccc5992..f89f2367f 100644 --- a/exo/inference/torch/inference.py +++ b/exo/inference/torch/inference.py @@ -127,11 +127,11 @@ async def async_forward( attention_mask=attention_mask )) - if DEBUG >=4 : + if DEBUG >=4: print("async_forward") print(f"result: {result}") - return result + return result[0], result[1], result[2] async def async_logit_sample( self, From c3adec5bc2719ffe90e9a1984cb2436a2ce62b65 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 18:24:51 -0800 Subject: [PATCH 443/589] fixing node issues --- exo/inference/torch/inference.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/exo/inference/torch/inference.py b/exo/inference/torch/inference.py index f89f2367f..8c543850e 100644 --- a/exo/inference/torch/inference.py +++ b/exo/inference/torch/inference.py @@ -204,10 +204,14 @@ async def infer_prompt( if DEBUG >= 4: print(f"past_input_ids: {self.past_input_ids}\n") - shard_hidden_states, shard_past_kvs, shard_logits = await self.async_forward( + shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( input_ids=self.past_input_ids, attention_mask=input_attention_mask ) + #shard_hidden_states, shard_past_kvs, shard_logits = await self.async_forward( + # input_ids=self.past_input_ids, + # attention_mask=input_attention_mask + #) if DEBUG >= 4: print(f"\nshard_hidden_states: {shard_hidden_states}\n") From c8e6acc49de461b7a3fa9c3699f1ae80e0cb840f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 18:35:39 -0800 Subject: [PATCH 444/589] fixing node issues --- exo/inference/torch/inference.py | 6 +--- exo/inference/torch/model/hf.py | 60 ++++++++++++++++---------------- 2 files changed, 31 insertions(+), 35 deletions(-) diff --git a/exo/inference/torch/inference.py b/exo/inference/torch/inference.py index 8c543850e..f89f2367f 100644 --- a/exo/inference/torch/inference.py +++ b/exo/inference/torch/inference.py @@ -204,14 +204,10 @@ async def infer_prompt( if DEBUG >= 4: print(f"past_input_ids: {self.past_input_ids}\n") - shard_hidden_states, shard_past_kvs, shard_logits = self.stateful_sharded_model.forward( + shard_hidden_states, shard_past_kvs, shard_logits = await self.async_forward( input_ids=self.past_input_ids, attention_mask=input_attention_mask ) - #shard_hidden_states, shard_past_kvs, shard_logits = await self.async_forward( - # input_ids=self.past_input_ids, - # attention_mask=input_attention_mask - #) if DEBUG >= 4: print(f"\nshard_hidden_states: {shard_hidden_states}\n") diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index 2dab660c3..21d5b3459 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -226,41 +226,41 @@ def forward( print(f"position_ids: {self.position_ids}") print(f"past_key_values: {past_key_values}") - if self.hidden_states is None: - # casual mask and attention_mask - self.attention_mask = attention_mask - self.causal_mask = self.model._update_causal_mask( - None, + #if self.hidden_states is None: + # casual mask and attention_mask + self.attention_mask = attention_mask + self.causal_mask = self.model._update_causal_mask( + None, + self.inputs_embeds, + cache_position, + past_key_values, + False # dont out attentions + ) + + # embed positions, some models require and some dont + if isinstance(self.model, LlamaModel): + self.position_embeddings = self.model.rotary_emb( self.inputs_embeds, - cache_position, - past_key_values, - False # dont out attentions + self.position_ids ) - # embed positions, some models require and some dont - if isinstance(self.model, LlamaModel): - self.position_embeddings = self.model.rotary_emb( - self.inputs_embeds, - self.position_ids - ) - - # prepare inputs for decoder layers - model_inputs = self.llm_model.prepare_inputs_for_generation( - self.input_ids, - past_key_values=past_key_values, - attention_mask=self.attention_mask, - inputs_embeds=self.inputs_embeds, - position_ids=self.position_ids, - cache_position=cache_position - ) + # prepare inputs for decoder layers + model_inputs = self.llm_model.prepare_inputs_for_generation( + self.input_ids, + past_key_values=past_key_values, + attention_mask=self.attention_mask, + inputs_embeds=self.inputs_embeds, + position_ids=self.position_ids, + cache_position=cache_position + ) - self.hidden_states = self.inputs_embeds - self.position_ids = model_inputs["position_ids"] - self.cache_position = model_inputs["cache_position"] - self.past_key_values = model_inputs["past_key_values"] + self.hidden_states = self.inputs_embeds + self.position_ids = model_inputs["position_ids"] + self.cache_position = model_inputs["cache_position"] + self.past_key_values = model_inputs["past_key_values"] - if DEBUG >= 4: - print(f"model_inputs: {model_inputs}") + if DEBUG >= 4: + print(f"model_inputs: {model_inputs}") # run through decoder layers layer_amt = range(self.shard.end_layer - self.shard.start_layer) From df028e2219b7dcda0cae7b983026a65788d790cc Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 18:51:19 -0800 Subject: [PATCH 445/589] fixing node issues, range issue --- exo/inference/torch/model/hf.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index 21d5b3459..636147301 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -62,6 +62,7 @@ def __init__( self.position_ids = None self.causal_mask = None self.local_model_path = local_model_path + self.is_sharded_model = False # setup logit processors self.logits_processor = LogitsProcessorList([ @@ -88,6 +89,8 @@ def __init__( offload_buffers=self.offload_buffers ) + self.is_sharded_model = True + # clear out edited safetensor json # this is needed because shard downloader just # appends and not redownloads the file @@ -263,7 +266,10 @@ def forward( print(f"model_inputs: {model_inputs}") # run through decoder layers - layer_amt = range(self.shard.end_layer - self.shard.start_layer) + if self.is_sharded_model: + layer_amt = range(self.shard.end_layer - self.shard.start_layer) + else: + layer_amt = range(self.shard.start_layer, self.shard.end_layer) if DEBUG >= 4: print(f"hidden_states: {self.hidden_states}") From e5a1939694a332004e30b57f0cfc29a3a3f8e7f3 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 16 Oct 2024 18:55:27 -0800 Subject: [PATCH 446/589] fixing node issues, range issue --- exo/inference/torch/model/hf.py | 56 ++++++++++++++++----------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index 636147301..2d543ce55 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -229,38 +229,38 @@ def forward( print(f"position_ids: {self.position_ids}") print(f"past_key_values: {past_key_values}") - #if self.hidden_states is None: - # casual mask and attention_mask - self.attention_mask = attention_mask - self.causal_mask = self.model._update_causal_mask( - None, - self.inputs_embeds, - cache_position, - past_key_values, - False # dont out attentions - ) - - # embed positions, some models require and some dont - if isinstance(self.model, LlamaModel): - self.position_embeddings = self.model.rotary_emb( + if self.hidden_states is None: + # casual mask and attention_mask + self.attention_mask = attention_mask + self.causal_mask = self.model._update_causal_mask( + None, self.inputs_embeds, - self.position_ids + cache_position, + past_key_values, + False # dont out attentions ) - # prepare inputs for decoder layers - model_inputs = self.llm_model.prepare_inputs_for_generation( - self.input_ids, - past_key_values=past_key_values, - attention_mask=self.attention_mask, - inputs_embeds=self.inputs_embeds, - position_ids=self.position_ids, - cache_position=cache_position - ) + # embed positions, some models require and some dont + if isinstance(self.model, LlamaModel): + self.position_embeddings = self.model.rotary_emb( + self.inputs_embeds, + self.position_ids + ) + + # prepare inputs for decoder layers + model_inputs = self.llm_model.prepare_inputs_for_generation( + self.input_ids, + past_key_values=past_key_values, + attention_mask=self.attention_mask, + inputs_embeds=self.inputs_embeds, + position_ids=self.position_ids, + cache_position=cache_position + ) - self.hidden_states = self.inputs_embeds - self.position_ids = model_inputs["position_ids"] - self.cache_position = model_inputs["cache_position"] - self.past_key_values = model_inputs["past_key_values"] + self.hidden_states = self.inputs_embeds + self.position_ids = model_inputs["position_ids"] + self.cache_position = model_inputs["cache_position"] + self.past_key_values = model_inputs["past_key_values"] if DEBUG >= 4: print(f"model_inputs: {model_inputs}") From d07b825cb8e953c339671ba92c3920a5bdcdb2a9 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 18 Oct 2024 12:06:29 -0800 Subject: [PATCH 447/589] adding num hidden layers manipulation for all models --- exo/inference/torch/model/hf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index 70fee8e55..e0d459f88 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -104,7 +104,8 @@ def __init__( pretrained_model_name_or_path=self.local_model_path, torch_dtype=self.dtype, device_map=self.device_map, - offload_buffers=offload_buffers + offload_buffers=offload_buffers, + num_hidden_layers=int(shard.end_layer - shard.start_layer) ).to(self.device) self.model = self.llm_model.model.to(self.device) From a840e7fc3563677d55dac021fed5cae68d246a5f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 18 Oct 2024 12:08:43 -0800 Subject: [PATCH 448/589] updating to use shard_num_hidden_layers --- exo/inference/torch/model/hf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index e0d459f88..2b9252141 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -99,13 +99,14 @@ def __init__( self.llm_model = AutoModelForCausalLM.from_config(self.llm_model_config).to(self.device) self.model = self.llm_model.model.to(self.device) else: - print("loading full model") + shard_num_hidden_layers = shard.end_layer - shard.start_layer + print(f"loading safetensor in {shard_num_hidden_layers} layer model") self.llm_model = AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=self.local_model_path, torch_dtype=self.dtype, device_map=self.device_map, offload_buffers=offload_buffers, - num_hidden_layers=int(shard.end_layer - shard.start_layer) + num_hidden_layers=shard_num_hidden_layers ).to(self.device) self.model = self.llm_model.model.to(self.device) From 52fa3f877fcb35a7d3dcc18aba3f374c583dab85 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 18 Oct 2024 12:31:30 -0800 Subject: [PATCH 449/589] adding in better layer manipulation --- exo/inference/torch/model/hf.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index 2b9252141..a8b6e50f4 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -89,15 +89,12 @@ def __init__( offload_buffers=self.offload_buffers ) - self.is_sharded_model = True + # self.is_sharded_model = True # clear out edited safetensor json # this is needed because shard downloader just # appends and not redownloads the file os.remove(self.model_safetensors_path) - - self.llm_model = AutoModelForCausalLM.from_config(self.llm_model_config).to(self.device) - self.model = self.llm_model.model.to(self.device) else: shard_num_hidden_layers = shard.end_layer - shard.start_layer print(f"loading safetensor in {shard_num_hidden_layers} layer model") @@ -271,10 +268,10 @@ def forward( print(f"model_inputs: {model_inputs}") # run through decoder layers - if self.is_sharded_model: - layer_amt = range(self.shard.end_layer - self.shard.start_layer) - else: - layer_amt = range(self.shard.start_layer, self.shard.end_layer) + # if self.is_sharded_model: + layer_amt = range(self.shard.end_layer - self.shard.start_layer) + # else: + # layer_amt = range(self.shard.start_layer, self.shard.end_layer) if DEBUG >= 4: print(f"hidden_states: {self.hidden_states}") From ec49e316c28c272ef71a0f7cfd83d4e4fc576e54 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 18 Oct 2024 16:53:45 -0800 Subject: [PATCH 450/589] adding in safe tensor sharding, generate model.safetensors.index.json from single safetensor, starting safetensor sharding test --- .../torch/model/hf_safe_tensor_shard.py | 89 +++++++++++++ .../torch/tests/test_safetensor_json.py | 120 ++++++++++++++++++ .../torch/tests/test_safetensor_shard.py | 3 + exo/inference/torch/tests/test_split_model.py | 2 - 4 files changed, 212 insertions(+), 2 deletions(-) create mode 100644 exo/inference/torch/model/hf_safe_tensor_shard.py create mode 100644 exo/inference/torch/tests/test_safetensor_json.py create mode 100644 exo/inference/torch/tests/test_safetensor_shard.py diff --git a/exo/inference/torch/model/hf_safe_tensor_shard.py b/exo/inference/torch/model/hf_safe_tensor_shard.py new file mode 100644 index 000000000..caa23e4a0 --- /dev/null +++ b/exo/inference/torch/model/hf_safe_tensor_shard.py @@ -0,0 +1,89 @@ +""" +HuggingFace Safetensor Shard +Sharding of safetensors to only use weights of models needed +""" +import os +import shutil +from safetensors import safe_open +from safetensors.torch import save_file + +class HFSafeTensorShard: + def __init__(self, model_folder, start_layer, end_layer): + self.model_folder = model_folder + self.start_layer = start_layer + self.end_layer = end_layer + self.safetensor_path = self.get_safetensor_path() + self.backup_path = self.safetensor_path + ".backup" + + def get_safetensor_path(self): + try: + for file_name in os.listdir(self.model_folder): + if file_name.endswith(".safetensors"): + return os.path.join(self.model_folder, file_name) + raise FileNotFoundError("No safetensors file found in the provided model folder.") + except Exception as err: + print(f"Error in get_safetensor_path: {err}") + raise + + def backup_safetensor(self): + try: + if not os.path.exists(self.backup_path): + shutil.copy(self.safetensor_path, self.backup_path) + print(f"Backup created at {self.backup_path}") + else: + print("Backup already exists. Skipping backup.") + except Exception as err: + print(f"Error in backup_safetensor: {err}") + raise + + def modify_safetensor(self): + # Ensure the safetensor is backed up before modifying + self.backup_safetensor() + + try: + with safe_open(self.safetensor_path, framework="pt", device="cpu") as f: + metadata = f.metadata() + new_tensors = {} + + # Iterate over tensors, including only those within the specified layer range + for key in f.keys(): + layer_number = self.extract_layer_number(key) + if self.start_layer <= layer_number <= self.end_layer: + new_tensors[key] = f.get_tensor(key) + else: + print(f"Excluding layer {layer_number}: {key}") + + # Save the modified safetensor + save_file(new_tensors, self.safetensor_path, metadata) + print(f"Safetensor modified and saved to {self.safetensor_path}") + except Exception as err: + print(f"Error modifying safetensor: {err}") + raise + + def extract_layer_number(self, key): + """ + Extract the layer number from a tensor key. + This function assumes keys follow the format 'transformer.h..'. + """ + try: + parts = key.split(".") + layer_idx = next(i for i, part in enumerate(parts) if part.startswith("h")) + return int(parts[layer_idx + 1]) + except (IndexError, ValueError) as err: + print(f"Error extracting layer number from key '{key}': {err}") + return -1 + + def restore_backup(self): + """ + Restore the original safetensor from the backup file. + This is useful when you want to reset to the original before making new modifications. + """ + try: + if os.path.exists(self.backup_path): + shutil.copy(self.backup_path, self.safetensor_path) + print(f"Safetensor restored from backup at {self.backup_path}") + else: + print("No backup found. Cannot restore.") + except Exception as err: + print(f"Error in restore_backup: {err}") + raise diff --git a/exo/inference/torch/tests/test_safetensor_json.py b/exo/inference/torch/tests/test_safetensor_json.py new file mode 100644 index 000000000..3ec02c715 --- /dev/null +++ b/exo/inference/torch/tests/test_safetensor_json.py @@ -0,0 +1,120 @@ +""" +Create a model.safetensors.index.json from safetensors +""" +import json +import os + +import asyncio + +from safetensors import safe_open + +from exo.download.hf.hf_shard_download import HFShardDownloader +from exo.inference.shard import Shard + +import torch + +def create_safetensor_index(safetensor_files: list, index_file: str): + """ + Creates a model.safetensors.index.json file from a list of safetensor files. + + Args: + safetensor_files (list): List of paths to the safetensor files. + index_file (str): Path where the index JSON file should be saved. + + Raises: + ValueError: If an unsupported data type is encountered. + """ + if safetensor_files: + # Initialize the metadata and weight_map + metadata = { + "metadata": { + "total_size": 0 + }, + "weight_map": {} + } + + for safetensor_file in safetensor_files: + # Use the safetensor file name as the shard_name + shard_name = os.path.basename(safetensor_file) + + # Open the safetensor file to read the metadata + with safe_open(safetensor_file, framework="pt") as f: + # Get tensor names + tensor_names = f.keys() + + # Collect metadata for each tensor + for name in tensor_names: + tensor_data = f.get_tensor(name) + print(f"tensor_data: {tensor_data}") + shape = tensor_data.shape + dtype = tensor_data.dtype + print(f"shape: {shape}") + print(f"dtype: {str(dtype) == "torch.bfloat16"}") + + # Calculate the tensor size in bytes based on dtype + total_elements = 1 + for dim in shape: + total_elements *= dim + + if dtype == torch.float32: + element_size = 4 + elif dtype == torch.float16 or dtype == torch.bfloat16: + element_size = 2 + # Extend this to support more data types if needed + else: + raise ValueError(f"Unsupported dtype: {dtype}") + + tensor_size = total_elements * element_size + metadata["metadata"]["total_size"] += tensor_size + + # Add to weight_map, mapping the tensor to the shard (file) name + metadata["weight_map"][name] = shard_name + + # Write the metadata and weight map to the index file + with open(index_file, "w") as f: + json.dump(metadata, f, indent=4) + + print(f"Index file created: {index_file}") + else: + print("No safetensor files provided.") + + +async def main(): + """ + Main asynchronous function to download the model shard and create an index file for safetensors. + + This function downloads a model shard from Hugging Face, identifies safetensor files, and + generates a corresponding index file using the `create_safetensor_index` function. + """ + start_layer = 3 + end_layer = 5 + + # Create a Shard object + shard = Shard( + model_id="meta-llama/Llama-3.2-1B-Instruct", + start_layer=start_layer, + end_layer=end_layer-1, + n_layers=32 + ) + + print(f"Loading shard: {shard}") + shard_downloader = HFShardDownloader() + + # Ensure shard is downloaded + model_path = await shard_downloader.ensure_shard(shard) + + # Collect all safetensor files from the model path + safetensor_files = [ + os.path.join(model_path, file_name) + for file_name in os.listdir(model_path) if file_name.endswith(".safetensors") + ] + + # Create the index file + if safetensor_files: + create_safetensor_index(safetensor_files, os.path.join(model_path, "model.safetensors.index.json")) + else: + print("No safetensor files found in the model path.") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/exo/inference/torch/tests/test_safetensor_shard.py b/exo/inference/torch/tests/test_safetensor_shard.py new file mode 100644 index 000000000..d18e3a954 --- /dev/null +++ b/exo/inference/torch/tests/test_safetensor_shard.py @@ -0,0 +1,3 @@ +""" +Sharding safetensor +""" \ No newline at end of file diff --git a/exo/inference/torch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py index 183406a53..197a7c07a 100644 --- a/exo/inference/torch/tests/test_split_model.py +++ b/exo/inference/torch/tests/test_split_model.py @@ -10,8 +10,6 @@ import torch -from transformers.modeling_utils import offload_weight - from exo.download.hf.hf_helpers import get_weight_map from exo.download.hf.hf_shard_download import HFShardDownloader from exo.inference.shard import Shard From f45b51444bb9bf6c1e7bf2fd853dc4895c8c9e79 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 19 Oct 2024 04:51:29 -0800 Subject: [PATCH 451/589] implementing sharding tests, fixing bugs with safetensor recompile --- exo/inference/torch/model/hf.py | 23 +-- .../torch/model/hf_safe_tensor_shard.py | 190 ++++++++++++++---- .../torch/tests/test_safetensor_shard.py | 42 +++- 3 files changed, 199 insertions(+), 56 deletions(-) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index a8b6e50f4..1850469bb 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -62,7 +62,6 @@ def __init__( self.position_ids = None self.causal_mask = None self.local_model_path = local_model_path - self.is_sharded_model = False # setup logit processors self.logits_processor = LogitsProcessorList([ @@ -89,21 +88,17 @@ def __init__( offload_buffers=self.offload_buffers ) - # self.is_sharded_model = True - # clear out edited safetensor json # this is needed because shard downloader just # appends and not redownloads the file os.remove(self.model_safetensors_path) else: - shard_num_hidden_layers = shard.end_layer - shard.start_layer - print(f"loading safetensor in {shard_num_hidden_layers} layer model") + print("loading full model") self.llm_model = AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=self.local_model_path, torch_dtype=self.dtype, device_map=self.device_map, - offload_buffers=offload_buffers, - num_hidden_layers=shard_num_hidden_layers + offload_buffers=True ).to(self.device) self.model = self.llm_model.model.to(self.device) @@ -168,7 +163,10 @@ def load_sharded_model( num_hidden_layers=shard_num_hidden_layers ) - return llm_model.to(self.device) + if self.device_map == "auto": + return llm_model + else: + return llm_model.to(self.device) except Exception as err: print(f"err: {err}") @@ -264,14 +262,11 @@ def forward( self.cache_position = model_inputs["cache_position"] self.past_key_values = model_inputs["past_key_values"] - if DEBUG >= 4: - print(f"model_inputs: {model_inputs}") + if DEBUG >= 4: + print(f"model_inputs: {model_inputs}") # run through decoder layers - # if self.is_sharded_model: layer_amt = range(self.shard.end_layer - self.shard.start_layer) - # else: - # layer_amt = range(self.shard.start_layer, self.shard.end_layer) if DEBUG >= 4: print(f"hidden_states: {self.hidden_states}") @@ -316,7 +311,7 @@ def forward( # shard is last layer says true at the start and not detecting last layer correctly if self.shard.is_last_layer(): self.hidden_states = self.model.norm(self.hidden_states) - if use_legacy_cache and self.next_decoder_cache is not None: + if use_legacy_cache: self.past_key_values = self.next_decoder_cache.to_legacy_cache() else: self.past_key_values = self.next_decoder_cache diff --git a/exo/inference/torch/model/hf_safe_tensor_shard.py b/exo/inference/torch/model/hf_safe_tensor_shard.py index caa23e4a0..e0737292d 100644 --- a/exo/inference/torch/model/hf_safe_tensor_shard.py +++ b/exo/inference/torch/model/hf_safe_tensor_shard.py @@ -4,58 +4,72 @@ """ import os import shutil +import json + +from pathlib import Path + from safetensors import safe_open from safetensors.torch import save_file +import torch + +from exo.inference.shard import Shard +from exo.helpers import DEBUG +from exo.inference.torch.utils import extract_layers + class HFSafeTensorShard: - def __init__(self, model_folder, start_layer, end_layer): - self.model_folder = model_folder - self.start_layer = start_layer - self.end_layer = end_layer - self.safetensor_path = self.get_safetensor_path() - self.backup_path = self.safetensor_path + ".backup" - - def get_safetensor_path(self): + def __init__(self, model_path: Path, shard: Shard): + self.model_path = model_path + self.shard = shard + self.safetensors_path = self.get_safetensors() + + def get_safetensors(self) -> list: + safetensors_path = [] try: - for file_name in os.listdir(self.model_folder): + for file_name in os.listdir(self.model_path): if file_name.endswith(".safetensors"): - return os.path.join(self.model_folder, file_name) - raise FileNotFoundError("No safetensors file found in the provided model folder.") + safetensors_path.append(os.path.join(self.model_path, file_name)) except Exception as err: print(f"Error in get_safetensor_path: {err}") raise + return safetensors_path + def backup_safetensor(self): try: - if not os.path.exists(self.backup_path): - shutil.copy(self.safetensor_path, self.backup_path) - print(f"Backup created at {self.backup_path}") - else: - print("Backup already exists. Skipping backup.") + for safetensor_path in self.safetensors_path: + backup_path = safetensor_path+".backup" + if not os.path.exists(backup_path): + shutil.copy(safetensor_path, backup_path) + print(f"Backup created at {backup_path}") + else: + print("Backup already exists. Skipping backup.") except Exception as err: print(f"Error in backup_safetensor: {err}") raise def modify_safetensor(self): # Ensure the safetensor is backed up before modifying - self.backup_safetensor() - try: - with safe_open(self.safetensor_path, framework="pt", device="cpu") as f: - metadata = f.metadata() - new_tensors = {} - - # Iterate over tensors, including only those within the specified layer range - for key in f.keys(): - layer_number = self.extract_layer_number(key) - if self.start_layer <= layer_number <= self.end_layer: - new_tensors[key] = f.get_tensor(key) - else: - print(f"Excluding layer {layer_number}: {key}") - - # Save the modified safetensor - save_file(new_tensors, self.safetensor_path, metadata) - print(f"Safetensor modified and saved to {self.safetensor_path}") + self.backup_safetensor() + + for safetensor_path in self.safetensors_path: + with safe_open(safetensor_path, framework="pt") as f: + metadata = f.metadata() + new_tensors = {} + + # Iterate over tensors, including only those within the specified layer range + print(f"\n{f.keys()}\n") + for key in f.keys(): + layer_number = self.extract_layer_number(key) + if self.shard.start_layer <= layer_number <= self.shard.end_layer: + if DEBUG >= 4: + print(f"modify_safetensor [{layer_number}] extracting {key}") + new_tensors[key] = f.get_tensor(key) + + # Save the modified safetensor + save_file(new_tensors, safetensor_path, metadata) + print(f"Safetensor modified and saved to {safetensor_path}") except Exception as err: print(f"Error modifying safetensor: {err}") raise @@ -63,12 +77,16 @@ def modify_safetensor(self): def extract_layer_number(self, key): """ Extract the layer number from a tensor key. - This function assumes keys follow the format 'transformer.h..'. + This function assumes keys follow the format 'model.layers..'. """ try: parts = key.split(".") - layer_idx = next(i for i, part in enumerate(parts) if part.startswith("h")) - return int(parts[layer_idx + 1]) + layer_idx = 0 + if parts[0] == "model" and parts[1] == "layers": + layer_idx = int(parts[2]) + return layer_idx + #layer_idx = next(i for i, part in enumerate(parts) if part.startswith("h")) + #return int(parts[layer_idx + 1]) except (IndexError, ValueError) as err: print(f"Error extracting layer number from key '{key}': {err}") return -1 @@ -79,11 +97,101 @@ def restore_backup(self): This is useful when you want to reset to the original before making new modifications. """ try: - if os.path.exists(self.backup_path): - shutil.copy(self.backup_path, self.safetensor_path) - print(f"Safetensor restored from backup at {self.backup_path}") - else: - print("No backup found. Cannot restore.") + for safetensor_path in self.safetensors_path: + backup_path = safetensor_path+".backup" + if os.path.exists(backup_path): + shutil.copy(backup_path, safetensor_path) + print(f"Safetensor restored from backup at {backup_path}") + else: + print("No backup found. Cannot restore.") except Exception as err: print(f"Error in restore_backup: {err}") raise + + def create_safetensor_index(self): + """ + Creates a model.safetensors.index.json file from a list of safetensor files. + + Args: + + Raises: + """ + if self.safetensors_path: + # initialize the metadata and weight_map + metadata = { + "metadata": { + "total_size": 0 + }, + "weight_map": {} + } + + for safetensor_file in self.safetensors_path: + # use the safetensor file name as the shard_name + shard_name = os.path.basename(safetensor_file) + + # open the safetensor file to read the metadata + with safe_open(safetensor_file, framework="pt") as f: + # get tensor names + tensor_names = f.keys() + + # collect metadata for each tensor + for name in tensor_names: + tensor_data = f.get_tensor(name) + print(f"tensor_data: {tensor_data}") + shape = tensor_data.shape + dtype = tensor_data.dtype + print(f"shape: {shape}") + print(f"dtype: {str(dtype) == "torch.bfloat16"}") + + # calculate the tensor size in bytes based on dtype + total_elements = 1 + for dim in shape: + total_elements *= dim + + if dtype == torch.float32: + element_size = 4 + elif dtype == torch.float16 or dtype == torch.bfloat16: + element_size = 2 + # extend this to support more data types if needed + else: + raise ValueError(f"unsupported dtype: {dtype}") + + tensor_size = total_elements * element_size + metadata["metadata"]["total_size"] += tensor_size + + # add to weight_map, mapping the tensor to the shard (file) name + metadata["weight_map"][name] = shard_name + + # write the metadata and weight map to the index file + with open(f"{self.model_path}/model.safetensors.index.json", "w") as f: + json.dump(metadata, f, indent=4) + + print("model.safetensors.index.json created") + else: + print("No safetensor files provided.") + + def shard_safetensor_index(self, weight_map): + layer_weight_map = extract_layers( + weight_map, + self.shard + ) + + # rewrite model.safetensors.index.json for only needed layers + try: + mst_json = {} + for safetensor_path in self.safetensors_path: + with open(safetensor_path, "r") as mst_file: + mst_json = json.load(mst_file) + mst_json["weight_map"] = layer_weight_map + + if DEBUG >= 4: + print(f"rewritten safetensor index \n{json.dumps(mst_json, indent=4)}") + + os.remove(safetensor_path) + + with open(safetensor_path, "w") as mst_file: + json.dump(mst_json, mst_file, indent=4) + except Exception as err: + print(f"err: {err}") + raise + diff --git a/exo/inference/torch/tests/test_safetensor_shard.py b/exo/inference/torch/tests/test_safetensor_shard.py index d18e3a954..148212487 100644 --- a/exo/inference/torch/tests/test_safetensor_shard.py +++ b/exo/inference/torch/tests/test_safetensor_shard.py @@ -1,3 +1,43 @@ """ Sharding safetensor -""" \ No newline at end of file +""" + +import asyncio + +from exo.inference.shard import Shard +from exo.inference.torch.model.hf_safe_tensor_shard import HFSafeTensorShard +from exo.download.hf.hf_shard_download import HFShardDownloader +from exo.download.hf.hf_helpers import get_weight_map + + +async def main(): + start_layer = 3 + end_layer = 5 + + # Create a Shard object + shard = Shard( + model_id="meta-llama/Llama-3.2-1B-Instruct", + start_layer=start_layer, + end_layer=end_layer-1, + n_layers=32 + ) + + print(f"Loading shard: {shard}") + shard_downloader = HFShardDownloader() + + # Ensure shard is downloaded + model_path = await shard_downloader.ensure_shard(shard) + + # weight map, if any + model_wm = await get_weight_map( + repo_id=shard.model_id + ) + + tensor_shard = HFSafeTensorShard(model_path, shard) + tensor_shard.modify_safetensor() + tensor_shard.create_safetensor_index() + tensor_shard.restore_backup() + + +if __name__ == "__main__": + asyncio.run(main()) From f90c24a2b67c4aa68493e2be8470c7be304af716 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 20 Oct 2024 04:03:33 -0800 Subject: [PATCH 452/589] adding safetensor sharding, implementing it into model inference engine --- exo/inference/torch/model/hf.py | 118 +++++++----------- .../torch/model/hf_safe_tensor_shard.py | 102 +++++++++------ .../torch/tests/test_inference_engine.py | 2 +- .../torch/tests/test_safetensor_shard.py | 30 ++++- .../torch/tests/test_simple_model.py | 83 ++++++------ 5 files changed, 176 insertions(+), 159 deletions(-) diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py index 1850469bb..f15d5d19f 100644 --- a/exo/inference/torch/model/hf.py +++ b/exo/inference/torch/model/hf.py @@ -9,6 +9,7 @@ from exo.inference.shard import Shard from exo.helpers import DEBUG from exo.inference.torch.utils import extract_layers +from exo.inference.torch.model.hf_safe_tensor_shard import HFSafeTensorShard from transformers import ( AutoModelForCausalLM, @@ -52,17 +53,17 @@ def __init__( # class vars self.shard = shard - self.hidden_states = None - self.input_ids = None - self.inputs_embeds = None - self.attention_mask = None - self.position_embeddings = None - self.past_key_values = None - self.cache_position = None - self.position_ids = None - self.causal_mask = None self.local_model_path = local_model_path - + self.weight_map = weight_map + self.device = device + self.dtype = dtype + self.device_map = device_map + self.offload_buffers = offload_buffers + self.model_safetensors_path = self.local_model_path/"model.safetensors.index.json" + self.safetensor_sharder = HFSafeTensorShard( + self.local_model_path, + self.shard + ) # setup logit processors self.logits_processor = LogitsProcessorList([ TopKLogitsWarper(top_k), @@ -70,87 +71,47 @@ def __init__( TopPLogitsWarper(top_p) ]) - self.device = device - self.dtype = dtype - self.device_map = device_map - - self.offload_buffers = offload_buffers - - self.model_safetensors_path = self.local_model_path/"model.safetensors.index.json" - - # setup pytorch and transformer llm + # setup sharded llm try: - if weight_map: - print("loading shard model") - self.llm_model = self.load_sharded_model( - shard, - weight_map, - offload_buffers=self.offload_buffers - ) - - # clear out edited safetensor json - # this is needed because shard downloader just - # appends and not redownloads the file - os.remove(self.model_safetensors_path) - else: - print("loading full model") - self.llm_model = AutoModelForCausalLM.from_pretrained( - pretrained_model_name_or_path=self.local_model_path, - torch_dtype=self.dtype, - device_map=self.device_map, - offload_buffers=True - ).to(self.device) - + self.llm_model = self.load_sharded_model() self.model = self.llm_model.model.to(self.device) + + # restore originals for next run, if one + self.safetensor_sharder.restore_backups() except Exception as err: - print(f"error loading and splitting model: {err}") + print(f"error loading and sharding model: {err}") raise - def load_sharded_model( - self, - shard: Shard, - weight_map: dict, - offload_buffers: bool - ) -> AutoModelForCausalLM: + # forward variables + self.hidden_states = None + self.input_ids = None + self.inputs_embeds = None + self.attention_mask = None + self.position_embeddings = None + self.past_key_values = None + self.cache_position = None + self.position_ids = None + self.causal_mask = None + + def load_sharded_model(self) -> AutoModelForCausalLM: """ Loads sharded version of model where only needed weights are loaded for necessary layers - Args: - Returns: + llm_model (AutoModelForCausalLM) - sharded llm model with only needed layers loaded """ if DEBUG >= 4: print("load_sharded_model called") - print(f"shard: {shard}") - - # break out layers per shard range - layer_weight_map = extract_layers( - weight_map, - shard - ) - # rewrite model.safetensors.index.json for only needed layers - try: - mst_json = {} - with open(self.model_safetensors_path, "r") as mst_file: - mst_json = json.load(mst_file) - mst_json["weight_map"] = layer_weight_map - - if DEBUG >= 4: - print(f"rewritten safetensor index \n{json.dumps(mst_json, indent=4)}") - - os.remove(self.model_safetensors_path) - - with open(self.model_safetensors_path, "w") as mst_file: - json.dump(mst_json, mst_file, indent=4) - except Exception as err: - print(f"err: {err}") - raise + # modify safetensor + self.safetensor_sharder.modify_safetensor() + self.safetensor_sharder.create_safetensor_index() + self.safetensor_sharder.shard_safetensor_index(self.weight_map) # load model try: - shard_num_hidden_layers = shard.end_layer - shard.start_layer + shard_num_hidden_layers = (self.shard.end_layer - self.shard.start_layer) + 1 if DEBUG >= 4: print(f"config with {shard_num_hidden_layers} layers") @@ -158,11 +119,16 @@ def load_sharded_model( pretrained_model_name_or_path=self.local_model_path, device_map=self.device_map, torch_dtype=self.dtype, - offload_buffers=offload_buffers, + offload_buffers=self.offload_buffers, local_files_only=True, - num_hidden_layers=shard_num_hidden_layers + num_hidden_layers=shard_num_hidden_layers, + use_safetensors=True, + low_cpu_mem_usage=True ) + # restore backup for next run + self.safetensor_sharder.restore_backups() + if self.device_map == "auto": return llm_model else: diff --git a/exo/inference/torch/model/hf_safe_tensor_shard.py b/exo/inference/torch/model/hf_safe_tensor_shard.py index e0737292d..95162d3f3 100644 --- a/exo/inference/torch/model/hf_safe_tensor_shard.py +++ b/exo/inference/torch/model/hf_safe_tensor_shard.py @@ -19,9 +19,10 @@ class HFSafeTensorShard: def __init__(self, model_path: Path, shard: Shard): - self.model_path = model_path + self.model_path = model_path self.shard = shard self.safetensors_path = self.get_safetensors() + self.safetensor_index_path = f"{self.model_path}/model.safetensors.index.json" def get_safetensors(self) -> list: safetensors_path = [] @@ -41,7 +42,7 @@ def backup_safetensor(self): backup_path = safetensor_path+".backup" if not os.path.exists(backup_path): shutil.copy(safetensor_path, backup_path) - print(f"Backup created at {backup_path}") + print(f"Backup created at {backup_path}") else: print("Backup already exists. Skipping backup.") except Exception as err: @@ -49,27 +50,40 @@ def backup_safetensor(self): raise def modify_safetensor(self): - # Ensure the safetensor is backed up before modifying + """ + Extract needed weights for layers from safetensor files + and create a new safetensor with same names + """ try: self.backup_safetensor() - + safetensor_is_used = False for safetensor_path in self.safetensors_path: + initial_size = os.path.getsize(safetensor_path) with safe_open(safetensor_path, framework="pt") as f: metadata = f.metadata() new_tensors = {} # Iterate over tensors, including only those within the specified layer range - print(f"\n{f.keys()}\n") for key in f.keys(): layer_number = self.extract_layer_number(key) if self.shard.start_layer <= layer_number <= self.shard.end_layer: if DEBUG >= 4: print(f"modify_safetensor [{layer_number}] extracting {key}") new_tensors[key] = f.get_tensor(key) - + safetensor_is_used = True + # Save the modified safetensor - save_file(new_tensors, safetensor_path, metadata) - print(f"Safetensor modified and saved to {safetensor_path}") + if safetensor_is_used: + save_file(new_tensors, safetensor_path, metadata) + modified_size = os.path.getsize(safetensor_path) + + print(f"Safetensor modified and saved to {safetensor_path}") + print(f"Initial size: {initial_size / (1024**3):.2f} GB") + print(f"Modified size: {modified_size / (1024**3):.2f} GB") + else: + # remove unused safetensors + os.remove(safetensor_path) + print(f"Removed safetensor: {safetensor_path}") except Exception as err: print(f"Error modifying safetensor: {err}") raise @@ -91,23 +105,6 @@ def extract_layer_number(self, key): print(f"Error extracting layer number from key '{key}': {err}") return -1 - def restore_backup(self): - """ - Restore the original safetensor from the backup file. - This is useful when you want to reset to the original before making new modifications. - """ - try: - for safetensor_path in self.safetensors_path: - backup_path = safetensor_path+".backup" - if os.path.exists(backup_path): - shutil.copy(backup_path, safetensor_path) - print(f"Safetensor restored from backup at {backup_path}") - else: - print("No backup found. Cannot restore.") - except Exception as err: - print(f"Error in restore_backup: {err}") - raise - def create_safetensor_index(self): """ Creates a model.safetensors.index.json file from a list of safetensor files. @@ -116,6 +113,12 @@ def create_safetensor_index(self): Raises: """ + if os.path.exists(self.safetensor_index_path): + backup_index_path = f"{self.model_path}/model.safetensors.index.json.backup" + if not os.path.exists(backup_index_path): + shutil.copy(self.safetensor_index_path, backup_index_path) + print(f"backed up index json {self.safetensor_index_path}") + if self.safetensors_path: # initialize the metadata and weight_map metadata = { @@ -130,18 +133,15 @@ def create_safetensor_index(self): shard_name = os.path.basename(safetensor_file) # open the safetensor file to read the metadata - with safe_open(safetensor_file, framework="pt") as f: + with safe_open(safetensor_file, framework="pt", device="cpu") as f: # get tensor names tensor_names = f.keys() # collect metadata for each tensor for name in tensor_names: tensor_data = f.get_tensor(name) - print(f"tensor_data: {tensor_data}") shape = tensor_data.shape dtype = tensor_data.dtype - print(f"shape: {shape}") - print(f"dtype: {str(dtype) == "torch.bfloat16"}") # calculate the tensor size in bytes based on dtype total_elements = 1 @@ -163,7 +163,7 @@ def create_safetensor_index(self): metadata["weight_map"][name] = shard_name # write the metadata and weight map to the index file - with open(f"{self.model_path}/model.safetensors.index.json", "w") as f: + with open(self.safetensor_index_path, "w") as f: json.dump(metadata, f, indent=4) print("model.safetensors.index.json created") @@ -179,19 +179,41 @@ def shard_safetensor_index(self, weight_map): # rewrite model.safetensors.index.json for only needed layers try: mst_json = {} - for safetensor_path in self.safetensors_path: - with open(safetensor_path, "r") as mst_file: - mst_json = json.load(mst_file) - mst_json["weight_map"] = layer_weight_map + with open(self.safetensor_index_path, "r") as mst_file: + mst_json = json.load(mst_file) + mst_json["weight_map"] = layer_weight_map - if DEBUG >= 4: - print(f"rewritten safetensor index \n{json.dumps(mst_json, indent=4)}") + if DEBUG >= 4: + print(f"new safetensor index\n{json.dumps(mst_json, indent=4)}\n") - os.remove(safetensor_path) + os.remove(self.safetensor_index_path) - with open(safetensor_path, "w") as mst_file: - json.dump(mst_json, mst_file, indent=4) + with open(self.safetensor_index_path, "w") as mst_file: + json.dump(mst_json, mst_file, indent=4) except Exception as err: print(f"err: {err}") raise - + + def restore_backups(self): + """ + Restore the original safetensor and index json, if any, from the backup file. + """ + try: + for safetensor_path in self.safetensors_path: + backup_path = safetensor_path+".backup" + if os.path.exists(backup_path): + shutil.copy(backup_path, safetensor_path) + print(f"Safetensor restored from backup at {backup_path}") + else: + print("No backup found. Cannot restore.") + + backup_index_path = self.safetensor_index_path+".backup" + if os.path.exists(backup_index_path): + shutil.copy(backup_index_path, self.safetensor_index_path) + print(f"Safetensor index JSON restored from backup at {backup_index_path}") + else: + print("No backup found. Cannot restore") + except Exception as err: + print(f"Error in restore_backup: {err}") + raise + diff --git a/exo/inference/torch/tests/test_inference_engine.py b/exo/inference/torch/tests/test_inference_engine.py index 2d24c8b28..1594551c8 100644 --- a/exo/inference/torch/tests/test_inference_engine.py +++ b/exo/inference/torch/tests/test_inference_engine.py @@ -22,7 +22,7 @@ async def test_inference_engine( shard = Shard( model_id=model_id, start_layer=0, - end_layer=n_layers-1, + end_layer=0, n_layers=n_layers ) diff --git a/exo/inference/torch/tests/test_safetensor_shard.py b/exo/inference/torch/tests/test_safetensor_shard.py index 148212487..88c12ec01 100644 --- a/exo/inference/torch/tests/test_safetensor_shard.py +++ b/exo/inference/torch/tests/test_safetensor_shard.py @@ -9,10 +9,11 @@ from exo.download.hf.hf_shard_download import HFShardDownloader from exo.download.hf.hf_helpers import get_weight_map +from transformers import AutoModelForCausalLM, AutoTokenizer async def main(): - start_layer = 3 - end_layer = 5 + start_layer = 0 + end_layer = 1 # Create a Shard object shard = Shard( @@ -36,8 +37,31 @@ async def main(): tensor_shard = HFSafeTensorShard(model_path, shard) tensor_shard.modify_safetensor() tensor_shard.create_safetensor_index() - tensor_shard.restore_backup() + # load model and test + model = AutoModelForCausalLM.from_pretrained( + pretrained_model_name_or_path=shard.model_id, + local_files_only=True, + num_hidden_layers=shard.end_layer - shard.start_layer + ).to("cuda") + + tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") + + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "In one simple word, what is the color of a red apple?"} + ] + + text = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) + model_inputs = tokenizer([text], return_tensors="pt") + + print(f"model_inputs:\n{model_inputs}") + + tensor_shard.restore_backup() if __name__ == "__main__": asyncio.run(main()) diff --git a/exo/inference/torch/tests/test_simple_model.py b/exo/inference/torch/tests/test_simple_model.py index 2a36717f7..5ffd30ef9 100644 --- a/exo/inference/torch/tests/test_simple_model.py +++ b/exo/inference/torch/tests/test_simple_model.py @@ -4,42 +4,47 @@ """ from transformers import AutoModelForCausalLM, AutoTokenizer -model = AutoModelForCausalLM.from_pretrained( - "Qwen/Qwen2-0.5B-Instruct", - torch_dtype="auto", - device_map="auto" -) -tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") - -prompt = "In a single word only, what is the last name of the current president of the USA?" - -messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": prompt} -] -text = tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True -) -model_inputs = tokenizer([text], return_tensors="pt") - -print(f"model_inputs:\n{model_inputs}") - -print(f"generation_config:\n{model.generation_config}") - -generated_ids = model.generate( - model_inputs.input_ids, - attention_mask=model_inputs.attention_mask, - max_new_tokens=512, - do_sample=True -) - -generated_ids = [ - output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) -] - -response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] - -print(f"Prompt: {prompt}\n") -print(f"Response: {response}\n") +def run_simple(prompt: str): + model = AutoModelForCausalLM.from_pretrained( + "Qwen/Qwen2-0.5B-Instruct", + torch_dtype="auto", + device_map="auto" + ) + + tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") + + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": prompt} + ] + text = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) + model_inputs = tokenizer([text], return_tensors="pt") + + print(f"model_inputs:\n{model_inputs}") + + print(f"generation_config:\n{model.generation_config}") + + generated_ids = model.generate( + model_inputs.input_ids, + attention_mask=model_inputs.attention_mask, + max_new_tokens=512, + do_sample=True + ) + + generated_ids = [ + output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) + ] + + response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] + + print(f"Prompt: {prompt}\n") + print(f"Response: {response}\n") + +if __name__ == "__main__": + run_simple( + "In a single word only, what is the last name of the current president of the USA?" + ) From 696c264d45e36b832309bc9e61e08e2fcaf94ae1 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 20 Oct 2024 04:05:12 -0800 Subject: [PATCH 453/589] updating backup and backup restore --- exo/inference/torch/model/hf_safe_tensor_shard.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/exo/inference/torch/model/hf_safe_tensor_shard.py b/exo/inference/torch/model/hf_safe_tensor_shard.py index 95162d3f3..052fafb06 100644 --- a/exo/inference/torch/model/hf_safe_tensor_shard.py +++ b/exo/inference/torch/model/hf_safe_tensor_shard.py @@ -202,6 +202,7 @@ def restore_backups(self): for safetensor_path in self.safetensors_path: backup_path = safetensor_path+".backup" if os.path.exists(backup_path): + os.remove(safetensor_path) shutil.copy(backup_path, safetensor_path) print(f"Safetensor restored from backup at {backup_path}") else: @@ -209,6 +210,7 @@ def restore_backups(self): backup_index_path = self.safetensor_index_path+".backup" if os.path.exists(backup_index_path): + os.remove(self.safetensor_index_path) shutil.copy(backup_index_path, self.safetensor_index_path) print(f"Safetensor index JSON restored from backup at {backup_index_path}") else: From 9514e922f73250c5df37ccd29f61ee88d37e9d8c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 20 Oct 2024 04:12:05 -0800 Subject: [PATCH 454/589] added removing backup when restoring --- exo/inference/torch/model/hf_safe_tensor_shard.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/exo/inference/torch/model/hf_safe_tensor_shard.py b/exo/inference/torch/model/hf_safe_tensor_shard.py index 052fafb06..2dc6c7ff4 100644 --- a/exo/inference/torch/model/hf_safe_tensor_shard.py +++ b/exo/inference/torch/model/hf_safe_tensor_shard.py @@ -204,6 +204,7 @@ def restore_backups(self): if os.path.exists(backup_path): os.remove(safetensor_path) shutil.copy(backup_path, safetensor_path) + os.remove(backup_path) print(f"Safetensor restored from backup at {backup_path}") else: print("No backup found. Cannot restore.") @@ -212,6 +213,7 @@ def restore_backups(self): if os.path.exists(backup_index_path): os.remove(self.safetensor_index_path) shutil.copy(backup_index_path, self.safetensor_index_path) + os.remove(backup_index_path) print(f"Safetensor index JSON restored from backup at {backup_index_path}") else: print("No backup found. Cannot restore") From d65505ee8b59c7d5b14ee301c9655b7cfcdefb97 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 20 Oct 2024 04:45:50 -0800 Subject: [PATCH 455/589] added generating weight map if none, did updates to backup and restore process for sharding HF safetensors --- .../torch/model/hf_safe_tensor_shard.py | 38 ++++++++++--------- .../torch/tests/test_inference_engine.py | 30 +++++++-------- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/exo/inference/torch/model/hf_safe_tensor_shard.py b/exo/inference/torch/model/hf_safe_tensor_shard.py index 2dc6c7ff4..537e73d56 100644 --- a/exo/inference/torch/model/hf_safe_tensor_shard.py +++ b/exo/inference/torch/model/hf_safe_tensor_shard.py @@ -6,6 +6,7 @@ import shutil import json +from typing import Optional from pathlib import Path from safetensors import safe_open @@ -23,13 +24,21 @@ def __init__(self, model_path: Path, shard: Shard): self.shard = shard self.safetensors_path = self.get_safetensors() self.safetensor_index_path = f"{self.model_path}/model.safetensors.index.json" + self.metadata = { + "metadata": { + "total_size": 0 + }, + "weight_map": {} + } def get_safetensors(self) -> list: safetensors_path = [] try: for file_name in os.listdir(self.model_path): if file_name.endswith(".safetensors"): - safetensors_path.append(os.path.join(self.model_path, file_name)) + safetensor_path = os.path.join(self.model_path, file_name) + if safetensor_path not in safetensors_path: + safetensors_path.append(safetensor_path) except Exception as err: print(f"Error in get_safetensor_path: {err}") raise @@ -42,7 +51,7 @@ def backup_safetensor(self): backup_path = safetensor_path+".backup" if not os.path.exists(backup_path): shutil.copy(safetensor_path, backup_path) - print(f"Backup created at {backup_path}") + print(f"Backup created at {backup_path}") else: print("Backup already exists. Skipping backup.") except Exception as err: @@ -120,14 +129,7 @@ def create_safetensor_index(self): print(f"backed up index json {self.safetensor_index_path}") if self.safetensors_path: - # initialize the metadata and weight_map - metadata = { - "metadata": { - "total_size": 0 - }, - "weight_map": {} - } - + # initialize the metadata and weight_map for safetensor_file in self.safetensors_path: # use the safetensor file name as the shard_name shard_name = os.path.basename(safetensor_file) @@ -157,20 +159,24 @@ def create_safetensor_index(self): raise ValueError(f"unsupported dtype: {dtype}") tensor_size = total_elements * element_size - metadata["metadata"]["total_size"] += tensor_size + self.metadata["metadata"]["total_size"] += tensor_size # add to weight_map, mapping the tensor to the shard (file) name - metadata["weight_map"][name] = shard_name + self.metadata["weight_map"][name] = shard_name # write the metadata and weight map to the index file with open(self.safetensor_index_path, "w") as f: - json.dump(metadata, f, indent=4) + json.dump(self.metadata, f, indent=4) print("model.safetensors.index.json created") else: print("No safetensor files provided.") - def shard_safetensor_index(self, weight_map): + def shard_safetensor_index(self, weight_map: Optional[dict] = None): + if weight_map is None: + weight_map = self.metadata["weight_map"] + + print(f"shard\n{weight_map}") layer_weight_map = extract_layers( weight_map, self.shard @@ -206,8 +212,6 @@ def restore_backups(self): shutil.copy(backup_path, safetensor_path) os.remove(backup_path) print(f"Safetensor restored from backup at {backup_path}") - else: - print("No backup found. Cannot restore.") backup_index_path = self.safetensor_index_path+".backup" if os.path.exists(backup_index_path): @@ -215,8 +219,6 @@ def restore_backups(self): shutil.copy(backup_index_path, self.safetensor_index_path) os.remove(backup_index_path) print(f"Safetensor index JSON restored from backup at {backup_index_path}") - else: - print("No backup found. Cannot restore") except Exception as err: print(f"Error in restore_backup: {err}") raise diff --git a/exo/inference/torch/tests/test_inference_engine.py b/exo/inference/torch/tests/test_inference_engine.py index 1594551c8..2b72b8592 100644 --- a/exo/inference/torch/tests/test_inference_engine.py +++ b/exo/inference/torch/tests/test_inference_engine.py @@ -116,26 +116,26 @@ async def test_inference_engine( assert np.array_equal(next_resp_full, resp4) if __name__ == '__main__': - #try: - # print("\n\n -------- TEST Qwen/Qwen2.5-3B-Instruct -------- \n\n") - # asyncio.run(test_inference_engine( - # TorchDynamicShardInferenceEngine(HFShardDownloader()), - # TorchDynamicShardInferenceEngine(HFShardDownloader()), - # "Qwen/Qwen2.5-3B-Instruct", - # 36 - # )) - #except Exception as err: - # print(f"\n!!!! QWEN2 TEST FAILED \n{err}\n") - try: - print("\n-------- Test unsloth/Meta-Llama-3.1-8B-Instruct ----------\n") + print("\n\n -------- TEST Qwen/Qwen2-0.5B-Instruct -------- \n\n") asyncio.run(test_inference_engine( TorchDynamicShardInferenceEngine(HFShardDownloader()), TorchDynamicShardInferenceEngine(HFShardDownloader()), - "unsloth/Meta-Llama-3.1-8B-Instruct", - 32 + "Qwen/Qwen2-0.5B-Instruct", + 36 )) except Exception as err: - print(f"\n!!!! unsloth/Meta-Llama-3.1-8B-Instruct TEST FAILED \n{err}\n") + print(f"\n!!!! QWEN2 TEST FAILED \n{err}\n") + + #try: + # print("\n-------- Test unsloth/Meta-Llama-3.1-8B-Instruct ----------\n") + # asyncio.run(test_inference_engine( + # TorchDynamicShardInferenceEngine(HFShardDownloader()), + # TorchDynamicShardInferenceEngine(HFShardDownloader()), + # "unsloth/Meta-Llama-3.1-8B-Instruct", + # 32 + # )) + #except Exception as err: + # print(f"\n!!!! unsloth/Meta-Llama-3.1-8B-Instruct TEST FAILED \n{err}\n") From d5b61131c79b2bab01685913e4838c1743b0b959 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 20 Oct 2024 04:56:50 -0800 Subject: [PATCH 456/589] cleaning up logging --- .../torch/model/hf_safe_tensor_shard.py | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/exo/inference/torch/model/hf_safe_tensor_shard.py b/exo/inference/torch/model/hf_safe_tensor_shard.py index 537e73d56..34c9c411e 100644 --- a/exo/inference/torch/model/hf_safe_tensor_shard.py +++ b/exo/inference/torch/model/hf_safe_tensor_shard.py @@ -51,9 +51,9 @@ def backup_safetensor(self): backup_path = safetensor_path+".backup" if not os.path.exists(backup_path): shutil.copy(safetensor_path, backup_path) - print(f"Backup created at {backup_path}") - else: - print("Backup already exists. Skipping backup.") + + if DEBUG >= 4: + print(f"Backup created at {backup_path}") except Exception as err: print(f"Error in backup_safetensor: {err}") raise @@ -80,19 +80,22 @@ def modify_safetensor(self): print(f"modify_safetensor [{layer_number}] extracting {key}") new_tensors[key] = f.get_tensor(key) safetensor_is_used = True - + # Save the modified safetensor if safetensor_is_used: save_file(new_tensors, safetensor_path, metadata) modified_size = os.path.getsize(safetensor_path) - print(f"Safetensor modified and saved to {safetensor_path}") - print(f"Initial size: {initial_size / (1024**3):.2f} GB") - print(f"Modified size: {modified_size / (1024**3):.2f} GB") + if DEBUG >= 4: + print(f"Safetensor modified and saved to {safetensor_path}") + print(f"Initial size: {initial_size / (1024**3):.2f} GB") + print(f"Modified size: {modified_size / (1024**3):.2f} GB") else: # remove unused safetensors os.remove(safetensor_path) - print(f"Removed safetensor: {safetensor_path}") + + if DEBUG >= 4: + print(f"Removed safetensor: {safetensor_path}") except Exception as err: print(f"Error modifying safetensor: {err}") raise @@ -126,7 +129,9 @@ def create_safetensor_index(self): backup_index_path = f"{self.model_path}/model.safetensors.index.json.backup" if not os.path.exists(backup_index_path): shutil.copy(self.safetensor_index_path, backup_index_path) - print(f"backed up index json {self.safetensor_index_path}") + + if DEBUG >= 4: + print(f"backed up index json {self.safetensor_index_path}") if self.safetensors_path: # initialize the metadata and weight_map @@ -168,7 +173,8 @@ def create_safetensor_index(self): with open(self.safetensor_index_path, "w") as f: json.dump(self.metadata, f, indent=4) - print("model.safetensors.index.json created") + if DEBUG >= 4: + print(f"created new {self.safetensor_index_path}") else: print("No safetensor files provided.") @@ -176,7 +182,6 @@ def shard_safetensor_index(self, weight_map: Optional[dict] = None): if weight_map is None: weight_map = self.metadata["weight_map"] - print(f"shard\n{weight_map}") layer_weight_map = extract_layers( weight_map, self.shard @@ -211,14 +216,18 @@ def restore_backups(self): os.remove(safetensor_path) shutil.copy(backup_path, safetensor_path) os.remove(backup_path) - print(f"Safetensor restored from backup at {backup_path}") + + if DEBUG >= 4: + print(f"Safetensor restored from backup at {backup_path}") backup_index_path = self.safetensor_index_path+".backup" if os.path.exists(backup_index_path): os.remove(self.safetensor_index_path) shutil.copy(backup_index_path, self.safetensor_index_path) os.remove(backup_index_path) - print(f"Safetensor index JSON restored from backup at {backup_index_path}") + + if DEBUG >= 4: + print(f"Safetensor index JSON restored from backup at {backup_index_path}") except Exception as err: print(f"Error in restore_backup: {err}") raise From d2302ccd424e072d84cab0ad9be88ff91b6f2a58 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 20 Oct 2024 05:04:51 -0800 Subject: [PATCH 457/589] updating docstring in newest class file --- .../torch/model/hf_safe_tensor_shard.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/exo/inference/torch/model/hf_safe_tensor_shard.py b/exo/inference/torch/model/hf_safe_tensor_shard.py index 34c9c411e..c3afdea52 100644 --- a/exo/inference/torch/model/hf_safe_tensor_shard.py +++ b/exo/inference/torch/model/hf_safe_tensor_shard.py @@ -32,6 +32,12 @@ def __init__(self, model_path: Path, shard: Shard): } def get_safetensors(self) -> list: + """ + Gets a list of all files that have the extension .safetensors + + Return: + list: A list of all the safetensors file paths + """ safetensors_path = [] try: for file_name in os.listdir(self.model_path): @@ -120,10 +126,6 @@ def extract_layer_number(self, key): def create_safetensor_index(self): """ Creates a model.safetensors.index.json file from a list of safetensor files. - - Args: - - Raises: """ if os.path.exists(self.safetensor_index_path): backup_index_path = f"{self.model_path}/model.safetensors.index.json.backup" @@ -179,6 +181,13 @@ def create_safetensor_index(self): print("No safetensor files provided.") def shard_safetensor_index(self, weight_map: Optional[dict] = None): + """ + Modify the weight_map of the safetensors index json to only + get weights for the working layers + + Args: + weight_map(dict, Optional): holds which weight maps to which layer + """ if weight_map is None: weight_map = self.metadata["weight_map"] From 72fcf9bb7ebe7b78972a369a9312113638cf1a38 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 21 Oct 2024 13:15:32 -0800 Subject: [PATCH 458/589] starting write of llama3 model outside of transformers and using pytorch --- exo/inference/torch/model/llama3_tokenizer.py | 219 ++++++++++++++++++ exo/inference/torch/model/llm_utils.py | 23 ++ 2 files changed, 242 insertions(+) create mode 100644 exo/inference/torch/model/llama3_tokenizer.py create mode 100644 exo/inference/torch/model/llm_utils.py diff --git a/exo/inference/torch/model/llama3_tokenizer.py b/exo/inference/torch/model/llama3_tokenizer.py new file mode 100644 index 000000000..e595d4b33 --- /dev/null +++ b/exo/inference/torch/model/llama3_tokenizer.py @@ -0,0 +1,219 @@ +""" +Llama3 tokenizer from https://github.com/meta-llama/llama-models/blob/main/models/llama3/api/tokenizer.py +""" +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# top-level folder for each specific model found within the models/ directory at +# the top-level of this source tree. + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement. + +import os +from logging import getLogger +from pathlib import Path +from typing import ( + AbstractSet, + cast, + Collection, + Dict, + Iterator, + List, + Literal, + Optional, + Sequence, + Union, +) + +import tiktoken + +from tiktoken.load import load_tiktoken_bpe + +logger = getLogger(__name__) + + +# The tiktoken tokenizer can handle <=400k chars without +# pyo3_runtime.PanicException. +TIKTOKEN_MAX_ENCODE_CHARS = 400_000 + +# https://github.com/openai/tiktoken/issues/195 +# Here we iterate over subsequences and split if we exceed the limit +# of max consecutive non-whitespace or whitespace characters. +MAX_NO_WHITESPACES_CHARS = 25_000 + + +_INSTANCE = None + + +class Tokenizer: + """ + Tokenizing and encoding/decoding text using the Tiktoken tokenizer. + """ + + special_tokens: Dict[str, int] + + num_reserved_special_tokens = 256 + + pat_str = r"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+" # noqa: E501 + + @classmethod + def get_instance(cls): + global _INSTANCE + + if _INSTANCE is None: + _INSTANCE = Tokenizer( + os.path.join(os.path.dirname(__file__), "tokenizer.model") + ) + return _INSTANCE + + def __init__(self, model_path: str): + """ + Initializes the Tokenizer with a Tiktoken model. + + Args: + model_path (str): The path to the Tiktoken model file. + """ + assert os.path.isfile(model_path), model_path + + mergeable_ranks = load_tiktoken_bpe(model_path) + num_base_tokens = len(mergeable_ranks) + special_tokens = [ + "<|begin_of_text|>", + "<|end_of_text|>", + "<|reserved_special_token_0|>", + "<|reserved_special_token_1|>", + "<|finetune_right_pad_id|>", + "<|step_id|>", + "<|start_header_id|>", + "<|end_header_id|>", + "<|eom_id|>", # end of message + "<|eot_id|>", # end of turn + "<|python_tag|>", + "<|image|>", + ] + reserved_tokens = [ + f"<|reserved_special_token_{2 + i}|>" + for i in range(self.num_reserved_special_tokens - len(special_tokens)) + ] + special_tokens = special_tokens + reserved_tokens + + self.special_tokens = { + token: num_base_tokens + i for i, token in enumerate(special_tokens) + } + self.model = tiktoken.Encoding( + name=Path(model_path).name, + pat_str=self.pat_str, + mergeable_ranks=mergeable_ranks, + special_tokens=self.special_tokens, + ) + + self.n_words: int = num_base_tokens + len(special_tokens) + # BOS / EOS token IDs + self.bos_id: int = self.special_tokens["<|begin_of_text|>"] + self.eos_id: int = self.special_tokens["<|end_of_text|>"] + self.eot_id: int = self.special_tokens["<|eot_id|>"] + self.eom_id: int = self.special_tokens["<|eom_id|>"] + self.python_tag_id = self.special_tokens["<|python_tag|>"] + self.pad_id: int = self.special_tokens["<|finetune_right_pad_id|>"] + self.stop_tokens = [ + self.eos_id, + self.special_tokens["<|eom_id|>"], + self.special_tokens["<|eot_id|>"], + ] + + def encode( + self, + s: str, + *, + bos: bool, + eos: bool, + allowed_special: Optional[Union[Literal["all"], AbstractSet[str]]] = None, + disallowed_special: Union[Literal["all"], Collection[str]] = (), + ) -> List[int]: + """ + Encodes a string into a list of token IDs. + + Args: + s (str): The input string to be encoded. + bos (bool): Whether to prepend the beginning-of-sequence token. + eos (bool): Whether to append the end-of-sequence token. + allowed_special ("all"|set[str]): allowed special tokens in string + disallowed_special ("all"|set[str]): special tokens that raise an error when in string + + Returns: + list[int]: A list of token IDs. + + By default, setting disallowed_special=() encodes a string by ignoring + special tokens. Specifically: + - Setting `disallowed_special` to () will cause all text corresponding + to special tokens to be encoded as natural text (insteading of raising + an error). + - Setting `allowed_special` to "all" will treat all text corresponding + to special tokens to be encoded as special tokens. + """ + if allowed_special is None: + allowed_special = set() + assert type(s) is str + + substrs = ( + substr + for i in range(0, len(s), TIKTOKEN_MAX_ENCODE_CHARS) + for substr in self._split_whitespaces_or_nonwhitespaces( + s[i : i + TIKTOKEN_MAX_ENCODE_CHARS], MAX_NO_WHITESPACES_CHARS + ) + ) + t: List[int] = [] + for substr in substrs: + t.extend( + self.model.encode( + substr, + allowed_special=allowed_special, + disallowed_special=disallowed_special, + ) + ) + if bos: + t.insert(0, self.bos_id) + if eos: + t.append(self.eos_id) + return t + + def decode(self, t: Sequence[int]) -> str: + """ + Decodes a list of token IDs into a string. + + Args: + t (List[int]): The list of token IDs to be decoded. + + Returns: + str: The decoded string. + """ + # Typecast is safe here. Tiktoken doesn't do anything list-related with the sequence. + return self.model.decode(cast(List[int], t)) + + @staticmethod + def _split_whitespaces_or_nonwhitespaces( + s: str, max_consecutive_slice_len: int + ) -> Iterator[str]: + """ + Splits the string `s` so that each substring contains no more than `max_consecutive_slice_len` + consecutive whitespaces or consecutive non-whitespaces. + """ + current_slice_len = 0 + current_slice_is_space = s[0].isspace() if len(s) > 0 else False + slice_start = 0 + + for i in range(len(s)): + is_now_space = s[i].isspace() + + if current_slice_is_space ^ is_now_space: + current_slice_len = 1 + current_slice_is_space = is_now_space + else: + current_slice_len += 1 + if current_slice_len > max_consecutive_slice_len: + yield s[slice_start:i] + slice_start = i + current_slice_len = 1 + yield s[slice_start:] diff --git a/exo/inference/torch/model/llm_utils.py b/exo/inference/torch/model/llm_utils.py new file mode 100644 index 000000000..0868207d5 --- /dev/null +++ b/exo/inference/torch/model/llm_utils.py @@ -0,0 +1,23 @@ +""" +Utility methods used by LLMs +""" +import torch +import torch.nn as nn +import torch.nn.functional as F +from torchtune.modules import RotaryPositionalEmbeddings + +def rope_embed( + self, + input_embeddings: torch.Tensor, + position_ids: torch.Tensor, +): + """ + Wrapper of rotary embeddings using pytorch module + + Args: + input_embeddings (torch.Tensor): token embeddings from input + position_ids (torch.Tensor): position ids of tokens + """ + rotary_emb = RotaryPositionalEmbeddings() + + From 9cac5ab706af117de2255a0bf515a1cf84a573f7 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 21 Oct 2024 13:38:04 -0800 Subject: [PATCH 459/589] moving llama3 modeling source code, updating readme file --- exo/inference/torch/README.md | 30 +++++++++++-------- exo/inference/torch/model/llama3/model.py | 5 ++++ .../tokenizer.py} | 0 exo/inference/torch/model/llm_utils.py | 24 +++++++++++---- 4 files changed, 42 insertions(+), 17 deletions(-) create mode 100644 exo/inference/torch/model/llama3/model.py rename exo/inference/torch/model/{llama3_tokenizer.py => llama3/tokenizer.py} (100%) diff --git a/exo/inference/torch/README.md b/exo/inference/torch/README.md index 1beca27cc..da67faa28 100644 --- a/exo/inference/torch/README.md +++ b/exo/inference/torch/README.md @@ -1,4 +1,21 @@ -# PyTorch & HuggingFace inference engine +# PyTorch inference engine + +## Devs +- [Vincent Castro](https://github.com/risingsunomi) + +## Notes/Issues +### 10/10/2024 +- To select a pytorch device via environment variables, set the variable TORCH_DEVICE + - XLA is currently not installed and will need to be added to inference.py, looking into doing this on a TPU VM + - With pytorch, CUDA and ROCm are the same so specifying CUDA also enables ROCm support. See this [post](https://github.com/pytorch/pytorch/issues/55223#issuecomment-812587373) + - Looking into adding mobile device support properly +- If device is not CPU the data type defaults to float32 else float16. + +### 10/13/2024 +Still working on split model development (see test_split_model.py). Right now, it seems to do it but still transformers is loading more in the RAM and GPU as it loads up a larger models (causing an OOM). Will research and add to next update. Right now, tests are added and are in development. + +### 10/21/2024 +Working on removing transformers due to inference and VRAM usage [issues](https://github.com/exo-explore/exo/pull/139#issuecomment-2424953962). Creating a pure pytorch implementation of llama3 as using transformers wont work for exo. Using some code from meta but also implementing the use of torchtune. ## Tech @@ -31,14 +48,3 @@ GPU 4: NVIDIA Quadro P400 2GB GPU 5: NVIDIA Quadro P400 2GB ``` - -## Notes/Issues -### 10/10/2024 -- To select a pytorch device via environment variables, set the variable TORCH_DEVICE - - XLA is currently not installed and will need to be added to inference.py, looking into doing this on a TPU VM - - With pytorch, CUDA and ROCm are the same so specifying CUDA also enables ROCm support. See this [post](https://github.com/pytorch/pytorch/issues/55223#issuecomment-812587373) - - Looking into adding mobile device support properly -- If device is not CPU the data type defaults to float32 else float16. - -### 10/13/2024 -Still working on split model development (see test_split_model.py). Right now, it seems to do it but still transformers is loading more in the RAM and GPU as it loads up a larger models (causing an OOM). Will research and add to next update. Right now, tests are added and are in development. diff --git a/exo/inference/torch/model/llama3/model.py b/exo/inference/torch/model/llama3/model.py new file mode 100644 index 000000000..d26c0c02f --- /dev/null +++ b/exo/inference/torch/model/llama3/model.py @@ -0,0 +1,5 @@ +""" +llama3 model + +Written with pytorch using torchtune and other methods +""" diff --git a/exo/inference/torch/model/llama3_tokenizer.py b/exo/inference/torch/model/llama3/tokenizer.py similarity index 100% rename from exo/inference/torch/model/llama3_tokenizer.py rename to exo/inference/torch/model/llama3/tokenizer.py diff --git a/exo/inference/torch/model/llm_utils.py b/exo/inference/torch/model/llm_utils.py index 0868207d5..2b95e5479 100644 --- a/exo/inference/torch/model/llm_utils.py +++ b/exo/inference/torch/model/llm_utils.py @@ -6,18 +6,32 @@ import torch.nn.functional as F from torchtune.modules import RotaryPositionalEmbeddings +from typing import Optional + def rope_embed( - self, + head_dim: int, input_embeddings: torch.Tensor, - position_ids: torch.Tensor, -): + position_ids: Optional[torch.Tensor], +) -> torch.Tensor: """ Wrapper of rotary embeddings using pytorch module Args: - input_embeddings (torch.Tensor): token embeddings from input + input_embeddings (torch.Tensor): token embeddings from input position_ids (torch.Tensor): position ids of tokens + + Returns: + torch.Tensor: output with RoPE applied """ - rotary_emb = RotaryPositionalEmbeddings() + try: + rotary_emb = RotaryPositionalEmbeddings(head_dim) + output = rotary_emb.forward( + input_embeddings, + input_pos=position_ids + ) + except Exception: + raise + + return output From 80120084722384ec1fe8318c73dea5d7a13fe0bb Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 22 Oct 2024 17:32:17 -0800 Subject: [PATCH 460/589] adding pytorch based llama model, added testing and working through bugs --- exo/inference/torch/inference.py | 2 +- exo/inference/torch/model/llama3/model.py | 5 - exo/inference/torch/model/llama3/tokenizer.py | 219 -------------- exo/inference/torch/model/llm_utils.py | 37 --- .../torch/{model => models}/__init__.py | 0 exo/inference/torch/{model => models}/hf.py | 0 .../{model => models}/hf_safe_tensor_shard.py | 0 exo/inference/torch/models/llama3.py | 275 ++++++++++++++++++ exo/inference/torch/models/llm_utils.py | 92 ++++++ .../torch/tests/test_llama3_model.py | 129 ++++++++ .../torch/tests/test_safetensor_shard.py | 2 +- 11 files changed, 498 insertions(+), 263 deletions(-) delete mode 100644 exo/inference/torch/model/llama3/model.py delete mode 100644 exo/inference/torch/model/llama3/tokenizer.py delete mode 100644 exo/inference/torch/model/llm_utils.py rename exo/inference/torch/{model => models}/__init__.py (100%) rename exo/inference/torch/{model => models}/hf.py (100%) rename exo/inference/torch/{model => models}/hf_safe_tensor_shard.py (100%) create mode 100644 exo/inference/torch/models/llama3.py create mode 100644 exo/inference/torch/models/llm_utils.py create mode 100644 exo/inference/torch/tests/test_llama3_model.py diff --git a/exo/inference/torch/inference.py b/exo/inference/torch/inference.py index f89f2367f..23bbe814a 100644 --- a/exo/inference/torch/inference.py +++ b/exo/inference/torch/inference.py @@ -12,7 +12,7 @@ from typing import Optional, Tuple, Union, List from exo.inference.shard import Shard from exo.inference.inference_engine import InferenceEngine -from exo.inference.torch.model.hf import ShardedHuggingFaceModel +from exo.inference.torch.models.hf import ShardedHuggingFaceModel from exo.inference.tokenizers import resolve_tokenizer from exo.helpers import DEBUG from exo.download.hf.hf_shard_download import HFShardDownloader diff --git a/exo/inference/torch/model/llama3/model.py b/exo/inference/torch/model/llama3/model.py deleted file mode 100644 index d26c0c02f..000000000 --- a/exo/inference/torch/model/llama3/model.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -llama3 model - -Written with pytorch using torchtune and other methods -""" diff --git a/exo/inference/torch/model/llama3/tokenizer.py b/exo/inference/torch/model/llama3/tokenizer.py deleted file mode 100644 index e595d4b33..000000000 --- a/exo/inference/torch/model/llama3/tokenizer.py +++ /dev/null @@ -1,219 +0,0 @@ -""" -Llama3 tokenizer from https://github.com/meta-llama/llama-models/blob/main/models/llama3/api/tokenizer.py -""" -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# top-level folder for each specific model found within the models/ directory at -# the top-level of this source tree. - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement. - -import os -from logging import getLogger -from pathlib import Path -from typing import ( - AbstractSet, - cast, - Collection, - Dict, - Iterator, - List, - Literal, - Optional, - Sequence, - Union, -) - -import tiktoken - -from tiktoken.load import load_tiktoken_bpe - -logger = getLogger(__name__) - - -# The tiktoken tokenizer can handle <=400k chars without -# pyo3_runtime.PanicException. -TIKTOKEN_MAX_ENCODE_CHARS = 400_000 - -# https://github.com/openai/tiktoken/issues/195 -# Here we iterate over subsequences and split if we exceed the limit -# of max consecutive non-whitespace or whitespace characters. -MAX_NO_WHITESPACES_CHARS = 25_000 - - -_INSTANCE = None - - -class Tokenizer: - """ - Tokenizing and encoding/decoding text using the Tiktoken tokenizer. - """ - - special_tokens: Dict[str, int] - - num_reserved_special_tokens = 256 - - pat_str = r"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+" # noqa: E501 - - @classmethod - def get_instance(cls): - global _INSTANCE - - if _INSTANCE is None: - _INSTANCE = Tokenizer( - os.path.join(os.path.dirname(__file__), "tokenizer.model") - ) - return _INSTANCE - - def __init__(self, model_path: str): - """ - Initializes the Tokenizer with a Tiktoken model. - - Args: - model_path (str): The path to the Tiktoken model file. - """ - assert os.path.isfile(model_path), model_path - - mergeable_ranks = load_tiktoken_bpe(model_path) - num_base_tokens = len(mergeable_ranks) - special_tokens = [ - "<|begin_of_text|>", - "<|end_of_text|>", - "<|reserved_special_token_0|>", - "<|reserved_special_token_1|>", - "<|finetune_right_pad_id|>", - "<|step_id|>", - "<|start_header_id|>", - "<|end_header_id|>", - "<|eom_id|>", # end of message - "<|eot_id|>", # end of turn - "<|python_tag|>", - "<|image|>", - ] - reserved_tokens = [ - f"<|reserved_special_token_{2 + i}|>" - for i in range(self.num_reserved_special_tokens - len(special_tokens)) - ] - special_tokens = special_tokens + reserved_tokens - - self.special_tokens = { - token: num_base_tokens + i for i, token in enumerate(special_tokens) - } - self.model = tiktoken.Encoding( - name=Path(model_path).name, - pat_str=self.pat_str, - mergeable_ranks=mergeable_ranks, - special_tokens=self.special_tokens, - ) - - self.n_words: int = num_base_tokens + len(special_tokens) - # BOS / EOS token IDs - self.bos_id: int = self.special_tokens["<|begin_of_text|>"] - self.eos_id: int = self.special_tokens["<|end_of_text|>"] - self.eot_id: int = self.special_tokens["<|eot_id|>"] - self.eom_id: int = self.special_tokens["<|eom_id|>"] - self.python_tag_id = self.special_tokens["<|python_tag|>"] - self.pad_id: int = self.special_tokens["<|finetune_right_pad_id|>"] - self.stop_tokens = [ - self.eos_id, - self.special_tokens["<|eom_id|>"], - self.special_tokens["<|eot_id|>"], - ] - - def encode( - self, - s: str, - *, - bos: bool, - eos: bool, - allowed_special: Optional[Union[Literal["all"], AbstractSet[str]]] = None, - disallowed_special: Union[Literal["all"], Collection[str]] = (), - ) -> List[int]: - """ - Encodes a string into a list of token IDs. - - Args: - s (str): The input string to be encoded. - bos (bool): Whether to prepend the beginning-of-sequence token. - eos (bool): Whether to append the end-of-sequence token. - allowed_special ("all"|set[str]): allowed special tokens in string - disallowed_special ("all"|set[str]): special tokens that raise an error when in string - - Returns: - list[int]: A list of token IDs. - - By default, setting disallowed_special=() encodes a string by ignoring - special tokens. Specifically: - - Setting `disallowed_special` to () will cause all text corresponding - to special tokens to be encoded as natural text (insteading of raising - an error). - - Setting `allowed_special` to "all" will treat all text corresponding - to special tokens to be encoded as special tokens. - """ - if allowed_special is None: - allowed_special = set() - assert type(s) is str - - substrs = ( - substr - for i in range(0, len(s), TIKTOKEN_MAX_ENCODE_CHARS) - for substr in self._split_whitespaces_or_nonwhitespaces( - s[i : i + TIKTOKEN_MAX_ENCODE_CHARS], MAX_NO_WHITESPACES_CHARS - ) - ) - t: List[int] = [] - for substr in substrs: - t.extend( - self.model.encode( - substr, - allowed_special=allowed_special, - disallowed_special=disallowed_special, - ) - ) - if bos: - t.insert(0, self.bos_id) - if eos: - t.append(self.eos_id) - return t - - def decode(self, t: Sequence[int]) -> str: - """ - Decodes a list of token IDs into a string. - - Args: - t (List[int]): The list of token IDs to be decoded. - - Returns: - str: The decoded string. - """ - # Typecast is safe here. Tiktoken doesn't do anything list-related with the sequence. - return self.model.decode(cast(List[int], t)) - - @staticmethod - def _split_whitespaces_or_nonwhitespaces( - s: str, max_consecutive_slice_len: int - ) -> Iterator[str]: - """ - Splits the string `s` so that each substring contains no more than `max_consecutive_slice_len` - consecutive whitespaces or consecutive non-whitespaces. - """ - current_slice_len = 0 - current_slice_is_space = s[0].isspace() if len(s) > 0 else False - slice_start = 0 - - for i in range(len(s)): - is_now_space = s[i].isspace() - - if current_slice_is_space ^ is_now_space: - current_slice_len = 1 - current_slice_is_space = is_now_space - else: - current_slice_len += 1 - if current_slice_len > max_consecutive_slice_len: - yield s[slice_start:i] - slice_start = i - current_slice_len = 1 - yield s[slice_start:] diff --git a/exo/inference/torch/model/llm_utils.py b/exo/inference/torch/model/llm_utils.py deleted file mode 100644 index 2b95e5479..000000000 --- a/exo/inference/torch/model/llm_utils.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -Utility methods used by LLMs -""" -import torch -import torch.nn as nn -import torch.nn.functional as F -from torchtune.modules import RotaryPositionalEmbeddings - -from typing import Optional - -def rope_embed( - head_dim: int, - input_embeddings: torch.Tensor, - position_ids: Optional[torch.Tensor], -) -> torch.Tensor: - """ - Wrapper of rotary embeddings using pytorch module - - Args: - input_embeddings (torch.Tensor): token embeddings from input - position_ids (torch.Tensor): position ids of tokens - - Returns: - torch.Tensor: output with RoPE applied - """ - try: - rotary_emb = RotaryPositionalEmbeddings(head_dim) - output = rotary_emb.forward( - input_embeddings, - input_pos=position_ids - ) - except Exception: - raise - - return output - - diff --git a/exo/inference/torch/model/__init__.py b/exo/inference/torch/models/__init__.py similarity index 100% rename from exo/inference/torch/model/__init__.py rename to exo/inference/torch/models/__init__.py diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/models/hf.py similarity index 100% rename from exo/inference/torch/model/hf.py rename to exo/inference/torch/models/hf.py diff --git a/exo/inference/torch/model/hf_safe_tensor_shard.py b/exo/inference/torch/models/hf_safe_tensor_shard.py similarity index 100% rename from exo/inference/torch/model/hf_safe_tensor_shard.py rename to exo/inference/torch/models/hf_safe_tensor_shard.py diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py new file mode 100644 index 000000000..c0af9ea3f --- /dev/null +++ b/exo/inference/torch/models/llama3.py @@ -0,0 +1,275 @@ +""" +llama3 model + +Written with pytorch using torchtune and other methods +""" +from typing import Optional, Tuple + +import torch +import torch.nn as nn +from torchtune.modules import MultiHeadAttention, RotaryPositionalEmbeddings, KVCache + +class LlamaBlock(nn.Module): + """ + Encoder block class for the LLaMA model without residual connections. + """ + def __init__( + self, + dim, + heads, + num_kv_heads, + head_dim, + ff_dim, + rms_norm_eps, + attention_dropout=0.0, + use_bias=False, + max_seq_len=4096, + pos_embeddings=None + ): + super(LlamaBlock, self).__init__() + + # Define linear projections for Q, K, V, and Output + self.q_proj = nn.Linear(dim, heads * head_dim, bias=use_bias) + self.k_proj = nn.Linear(dim, num_kv_heads * head_dim, bias=use_bias) + self.v_proj = nn.Linear(dim, num_kv_heads * head_dim, bias=use_bias) + self.output_proj = nn.Linear(heads * head_dim, dim, bias=use_bias) + + # Define optional query normalization + self.q_norm = nn.LayerNorm(head_dim, eps=rms_norm_eps) + + # MultiHeadAttention from torchtune + self.attn = MultiHeadAttention( + embed_dim=dim, + num_heads=heads, + num_kv_heads=num_kv_heads, + head_dim=head_dim, + q_proj=self.q_proj, + k_proj=self.k_proj, + v_proj=self.v_proj, + output_proj=self.output_proj, + pos_embeddings=pos_embeddings, + q_norm=self.q_norm, + k_norm=self.q_norm, + kv_cache=None, + max_seq_len=max_seq_len, + is_causal=True, + attn_dropout=attention_dropout + ) + + # RMSNorm layers before and after attention and feed-forward layers + self.norm1 = nn.LayerNorm(dim, eps=rms_norm_eps) + self.norm2 = nn.LayerNorm(dim, eps=rms_norm_eps) + + # Feed-forward layer with SwiGLU activation + self.feed_forward = nn.Sequential( + nn.Linear(dim, ff_dim), + nn.GLU(), # SwiGLU approximation + nn.Linear(ff_dim // 2, dim) + ) + + def forward( + self, + x, + kv_cache: Optional[KVCache] = None, + attention_mask: Optional[torch.Tensor] = None, + input_pos: Optional[torch.Tensor] = None + ) -> Tuple[torch.Tensor, KVCache]: + """ + Forward pass with integrated attention and key-value caching. + + Args: + x (torch.Tensor): Input tensor of shape (batch_size, seq_len, dim). + kv_cache (Optional[KVCache]): KVCache object for managing past key-value states. + attention_mask (Optional[torch.Tensor]): Attention mask of shape (batch_size, 1, 1, seq_len). + input_pos (Optional[torch.Tensor]): Position IDs tensor of shape (batch_size, seq_len). + + Returns: + Tuple[torch.Tensor, KVCache]: + - x (torch.Tensor): Output tensor of shape (batch_size, seq_len, dim). + - kv_cache (KVCache): Updated KVCache object. + """ + # Apply normalization before attention + residual = x + x = self.norm1(x) + + # Compute Q, K, V projections + q = self.q_proj(x) + k = self.k_proj(x) + v = self.v_proj(x) + + # Initialize or update KVCache + if kv_cache is None: + kv_cache = KVCache( + batch_size=x.size(0), + max_seq_len=x.size(1), + num_heads=self.attn.num_heads, + head_dim=self.attn.head_dim, + dtype=x.dtype + ) + + # Update KVCache with new key-value pairs + k_val, v_val = kv_cache.update(k, v) + + # Apply MultiHeadAttention with key-value caching + x = self.attn(q, k_val, v_val, mask=attention_mask, input_pos=input_pos) + + # Residual connection + x = x + residual + + # Apply feed-forward network with residual connection + residual = x + x = self.norm2(x) + x = self.feed_forward(x) + x = x + residual + + return x, kv_cache + +class LlamaModel(nn.Module): + """ + LlamaModel is a pure PyTorch implementation of the LLaMA architecture + """ + + def __init__(self, config, tokenizer): + """ + Initialize the LlamaModel. + + Args: + config (dict): Configuration dictionary containing model parameters. + - hidden_size (int): Size of the hidden layers. + - num_hidden_layers (int): Number of transformer layers. + - num_attention_heads (int): Number of attention heads. + - intermediate_size (int): Size of the intermediate (feed-forward) layers. + - vocab_size (int): Vocabulary size for the embedding layer. + - max_position_embeddings (int): Maximum number of positional embeddings. + - rms_norm_eps (float): Epsilon for RMS normalization. + - head_dim (int): Dimension of each attention head. + - attention_dropout (float): Dropout rate for attention layers. + tokenizer: Tokenizer used for input preprocessing. + """ + super(LlamaModel, self).__init__() + + # Load configurations from config + self.config = config + self.hidden_size = config['hidden_size'] + self.num_layers = config['num_hidden_layers'] + self.num_heads = config['num_attention_heads'] + self.num_kv_heads = config['num_key_value_heads'] + self.intermediate_size = config['intermediate_size'] + self.vocab_size = config['vocab_size'] + self.max_position_embeddings = config['max_position_embeddings'] + self.rms_norm_eps = config['rms_norm_eps'] + self.head_dim = config['head_dim'] + self.attention_dropout = config.get('attention_dropout', 0.0) + + # Model layers + self.embed = nn.Embedding(self.vocab_size, self.hidden_size) + self.rotary_pos_emb = RotaryPositionalEmbeddings( + self.hidden_size // self.num_heads, + config['rope_scaling']['original_max_position_embeddings'], + config['rope_theta'] + ) + self.layers = nn.ModuleList([ + LlamaBlock( + dim=self.hidden_size, + heads=self.num_heads, + num_kv_heads=self.num_kv_heads, + head_dim=self.head_dim, + ff_dim=self.intermediate_size, + rms_norm_eps=self.rms_norm_eps, + attention_dropout=self.attention_dropout, + use_bias=config.get('attention_bias', False) + ) for _ in range(self.num_layers) + ]) + self.norm = nn.LayerNorm(self.hidden_size, eps=self.rms_norm_eps) + self.to_logits = nn.Linear(self.hidden_size, self.vocab_size) + + def forward( + self, + input_ids: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + pos_ids: Optional[torch.Tensor] = None, + past_kv_cache: Optional[KVCache] = None, + return_hidden_states: bool = False + ) -> Tuple[torch.Tensor, Optional[Tuple[torch.Tensor]], KVCache]: + """ + Forward pass with integrated position ID handling, attention mask, and optional KVCache. + + Args: + input_ids (torch.Tensor): Input token IDs of shape (batch_size, seq_len). + attention_mask (Optional[torch.Tensor]): Attention mask of shape (batch_size, seq_len). + pos_ids (Optional[torch.Tensor]): Position IDs. If None, they are calculated automatically. + past_kv_cache (Optional[KVCache]): Optional KVCache for efficient generation. + If provided, it stores past key-value states for faster autoregressive inference. + return_hidden_states (bool): Whether to return hidden states from each layer. + + Returns: + Tuple[torch.Tensor, Optional[Tuple[torch.Tensor]], KVCache]: + - logits (torch.Tensor): Output logits of shape (batch_size, seq_len, vocab_size). + - hidden_states (Optional[Tuple[torch.Tensor]]): Hidden states from each layer, if return_hidden_states is True. + - past_kv_cache (KVCache): Updated KVCache object. + """ + batch_size, seq_len = input_ids.shape + + # Create initial embeddings + x = self.embed(input_ids) + + # Initialize position IDs if not provided + if pos_ids is None: + past_seen_tokens = past_kv_cache.size if past_kv_cache is not None else 0 + pos_ids = torch.arange( + past_seen_tokens, + past_seen_tokens + seq_len, + device=input_ids.device + ).unsqueeze(0).expand(batch_size, -1) + + # Reshape x to prepare for rotary embeddings: (batch_size, seq_len, num_heads, head_dim) + x = x.view(batch_size, seq_len, self.num_heads, self.head_dim) + + # Apply rotary positional embeddings + x = self.rotary_pos_emb( + x=x, + input_pos=pos_ids + ) + + # Reshape x back to original shape: (batch_size, seq_len, hidden_size) + x = x.view(batch_size, seq_len, self.hidden_size) + + # Initialize or use the provided KVCache + if past_kv_cache is None: + past_kv_cache = KVCache( + batch_size=batch_size, + max_seq_len=self.max_position_embeddings, + num_heads=self.num_heads, + head_dim=self.head_dim, + dtype=x.dtype + ) + + # Apply attention mask if provided (convert to appropriate format) + if attention_mask is not None: + attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) # Shape: (batch_size, 1, 1, seq_len) + attention_mask = (1.0 - attention_mask) * -1e4 # Convert to large negative values + + # Track hidden states if required + hidden_states = [] + + # Forward pass through layers with KVCache + for layer_idx, layer in enumerate(self.layers): + x, k_val, v_val = layer(x, past_kv_cache, layer_idx, attention_mask) + + # Update KVCache + past_kv_cache.update(k_val, v_val) + + if return_hidden_states: + hidden_states.append(x) + + # Apply final layer normalization + x = self.norm(x) + + # Compute logits + logits = self.to_logits(x) + + # Prepare the return values + if return_hidden_states: + return logits, tuple(hidden_states), past_kv_cache + else: + return logits, None, past_kv_cache diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py new file mode 100644 index 000000000..89057021d --- /dev/null +++ b/exo/inference/torch/models/llm_utils.py @@ -0,0 +1,92 @@ +""" +Utility methods used by LLMs +""" +import json +from pathlib import Path + +import torch +import torch.nn.functional as F + +from exo.helpers import DEBUG + +def load_model_config(model_config_path: Path) -> dict: + """ + Loads the config.json of the model + + Args: + model_path (Path): local path to model config json + + Returns: + dict: The config as a dictionary + """ + model_config = {} + with open(model_config_path, "r") as f: + model_config = json.load(f) + return model_config + +def select_next_token( + logits, + top_k=0, + top_p=0.0, + temperature=1.0, + use_max=False, +): + """ + Selects the next token from logits using top-k, top-p, and temperature scaling. + + Args: + logits (torch.Tensor): Logits tensor of shape (batch_size, vocab_size). + top_k (int): Number of top logits to consider for sampling. + top_p (float): Cumulative probability threshold for nucleus sampling. + temperature (float): Scaling factor for temperature. + use_max (bool): Whether to use argmax for next token selection. + debug (bool): If True, prints debugging information. + + Returns: + next_token (torch.Tensor): The next token selected (batch_size,). + """ + # Get logits for the last token in the sequence + logits = logits[:, -1, :].clone().float() + + # Apply temperature scaling + if temperature != 1.0: + logits = logits / temperature + + # Apply top-k filtering + if top_k > 0: + # Get the top-k logits and set the rest to -inf + top_k_values, _ = torch.topk(logits, top_k, dim=-1) + min_top_k_value = top_k_values[:, -1, None] + logits = torch.where(logits < min_top_k_value, torch.tensor(float('-inf'), device=logits.device), logits) + + # Apply top-p (nucleus) filtering + if top_p > 0.0: + sorted_logits, sorted_indices = torch.sort(logits, descending=True, dim=-1) + cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) + + # Mask tokens exceeding the top-p threshold + sorted_indices_to_remove = cumulative_probs > top_p + sorted_indices_to_remove[:, 1:] = sorted_indices_to_remove[:, :-1].clone() # Shift right + sorted_indices_to_remove[:, 0] = 0 # Ensure at least one token is selected + + indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove) + logits = logits.masked_fill(indices_to_remove, float('-inf')) + + # Calculate probabilities + probs = F.softmax(logits, dim=-1) + + # Select next token + if not use_max: + next_token = torch.multinomial(probs, num_samples=1) + else: + next_token = torch.argmax(logits, dim=-1, keepdim=True) + + # Debugging output + if DEBUG >= 4: + print(f"Logits: {logits}") + print(f"Probabilities: {probs}") + print(f"Next token: {next_token}") + + return next_token.squeeze(-1) + + diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py new file mode 100644 index 000000000..25485b11b --- /dev/null +++ b/exo/inference/torch/tests/test_llama3_model.py @@ -0,0 +1,129 @@ +""" +Test of pytorch based llama3 model +""" +from pathlib import Path + +import torch +from transformers import AutoTokenizer +from huggingface_hub import snapshot_download +from safetensors.torch import load_file as load_safetensors +from exo.inference.torch.models.llm_utils import load_model_config, select_next_token +from exo.inference.torch.models.llama3 import LlamaModel, KVCache + +# Constants +MODEL_NAME = "unsloth/Llama-3.2-1B-Instruct" + +# Get the path to the model files from the Hugging Face cache +cache_dir = Path(snapshot_download(MODEL_NAME)) +print(f"Cache directory: {cache_dir}") + +# Load model configuration +config = load_model_config(cache_dir / "config.json") + +# Initialize tokenizer +tokenizer = AutoTokenizer.from_pretrained(cache_dir) + +# Initialize LlamaModel with config and tokenizer +model = LlamaModel(config, tokenizer) + +# Load weights from safetensors files in the cache directory +safetensors_files = list(cache_dir.glob("*.safetensors")) +if not safetensors_files: + raise FileNotFoundError("No safetensors files found in the cache directory.") + +# Load weights from each found safetensors file +for safetensor_file in safetensors_files: + print(f"Loading weights from: {safetensor_file}") + state_dict = load_safetensors(safetensor_file) + model.load_state_dict(state_dict, strict=False) + +model.eval() # Set the model to evaluation mode + +# Sample text for testing +test_text = "Once upon a time," + +def test_forward_pass(model, tokenizer, text): + """ + Test the forward pass of the LlamaModel with given input text. + """ + # Tokenize input text + inputs = tokenizer(text, return_tensors="pt") + input_ids = inputs["input_ids"] + attention_mask = inputs.get("attention_mask") + + # Initialize KVCache + past_kv_cache = KVCache( + batch_size=input_ids.size(0), + max_seq_len=model.max_position_embeddings, + num_heads=model.num_heads, + head_dim=model.head_dim, + dtype=input_ids.dtype + ) + + # Forward pass with KVCache + with torch.no_grad(): + logits, hidden_states, _ = model( + input_ids, + attention_mask=attention_mask, + pos_ids=None, + past_kv_cache=past_kv_cache, + return_hidden_states=True + ) + + # Print logits shape and hidden state information + print(f"Logits shape: {logits.shape}") + if hidden_states: + print(f"Number of hidden states: {len(hidden_states)}") + print(f"Shape of last hidden state: {hidden_states[-1].shape}") + +def test_generation(model, tokenizer, text, max_length=50): + """ + Test the generation capabilities of the LlamaModel with sample text. + """ + # Tokenize input text + inputs = tokenizer(text, return_tensors="pt") + input_ids = inputs["input_ids"] + attention_mask = inputs.get("attention_mask") + + # Initialize KVCache for caching + past_kv_cache = KVCache( + batch_size=input_ids.size(0), + max_seq_len=model.max_position_embeddings, + num_heads=model.num_heads, + head_dim=model.head_dim, + dtype=input_ids.dtype + ) + + # Start with initial input_ids + generated_ids = input_ids.clone() + + # Generate tokens step-by-step + for _ in range(max_length): + with torch.no_grad(): + logits, _, past_kv_cache = model( + generated_ids, + attention_mask=attention_mask, + past_kv_cache=past_kv_cache + ) + + # Select next token using logits + next_token = select_next_token(logits, top_k=50, top_p=0.9, temperature=0.7, use_max=False) + + # Update generated_ids + generated_ids = torch.cat([generated_ids, next_token.unsqueeze(0)], dim=1) + + # Check for EOS token + if next_token.item() == tokenizer.eos_token_id: + break + + # Decode generated text + generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) + print(f"Generated text: {generated_text}") + +if __name__ == "__main__": + print("Testing forward pass:") + test_forward_pass(model, tokenizer, test_text) + + print("\nTesting generation:") + test_generation(model, tokenizer, test_text) + diff --git a/exo/inference/torch/tests/test_safetensor_shard.py b/exo/inference/torch/tests/test_safetensor_shard.py index 88c12ec01..ef72ef580 100644 --- a/exo/inference/torch/tests/test_safetensor_shard.py +++ b/exo/inference/torch/tests/test_safetensor_shard.py @@ -5,7 +5,7 @@ import asyncio from exo.inference.shard import Shard -from exo.inference.torch.model.hf_safe_tensor_shard import HFSafeTensorShard +from exo.inference.torch.models.hf_safe_tensor_shard import HFSafeTensorShard from exo.download.hf.hf_shard_download import HFShardDownloader from exo.download.hf.hf_helpers import get_weight_map From 76323d727dcc3fecbc18cded7c1b1d3cf339f79b Mon Sep 17 00:00:00 2001 From: Vincent C Date: Tue, 22 Oct 2024 17:35:35 -0800 Subject: [PATCH 461/589] Update llama3.py removing some of docstring --- exo/inference/torch/models/llama3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index c0af9ea3f..8156b1101 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -11,7 +11,7 @@ class LlamaBlock(nn.Module): """ - Encoder block class for the LLaMA model without residual connections. + Encoder block class for the LLaMA model """ def __init__( self, From 0d66acdfa369a0d2986bb21cd8c35bf86532a587 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 23 Oct 2024 12:14:16 -0800 Subject: [PATCH 462/589] updating pytorch llama model still, currently broken but backing up as continuing the rewrite/refactor --- exo/inference/torch/models/llama3.py | 191 ++++++++++-------- .../torch/tests/test_safetensor_shard.py | 8 +- 2 files changed, 116 insertions(+), 83 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 8156b1101..23e04958e 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -3,11 +3,19 @@ Written with pytorch using torchtune and other methods """ +import math from typing import Optional, Tuple import torch import torch.nn as nn -from torchtune.modules import MultiHeadAttention, RotaryPositionalEmbeddings, KVCache +from torchtune.modules import ( + MultiHeadAttention, + RotaryPositionalEmbeddings, + KVCache, + RMSNorm +) + +from exo.inference.shard import Shard class LlamaBlock(nn.Module): """ @@ -21,12 +29,25 @@ def __init__( head_dim, ff_dim, rms_norm_eps, + rotary_pos_emb, attention_dropout=0.0, use_bias=False, max_seq_len=4096, pos_embeddings=None ): super(LlamaBlock, self).__init__() + # Class vars + self.dim = dim + self.heads = heads + self.num_kv_heads = num_kv_heads + self.head_dim = head_dim + self.ff_dim = ff_dim + self.rms_norm_eps = rms_norm_eps + self.attention_dropout = attention_dropout + self.use_bias = use_bias + self.max_seq_len = max_seq_len + self.pos_embeddings = pos_embeddings + self.rotary_pos_emb = rotary_pos_emb # Define linear projections for Q, K, V, and Output self.q_proj = nn.Linear(dim, heads * head_dim, bias=use_bias) @@ -68,12 +89,13 @@ def __init__( ) def forward( - self, - x, - kv_cache: Optional[KVCache] = None, - attention_mask: Optional[torch.Tensor] = None, - input_pos: Optional[torch.Tensor] = None - ) -> Tuple[torch.Tensor, KVCache]: + self, + hidden_states: torch.Tensor, + kv_cache: Optional[KVCache] = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.Tensor] = None, + position_embeddings: Optional[torch.FloatTensor] = None + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: """ Forward pass with integrated attention and key-value caching. @@ -81,55 +103,60 @@ def forward( x (torch.Tensor): Input tensor of shape (batch_size, seq_len, dim). kv_cache (Optional[KVCache]): KVCache object for managing past key-value states. attention_mask (Optional[torch.Tensor]): Attention mask of shape (batch_size, 1, 1, seq_len). - input_pos (Optional[torch.Tensor]): Position IDs tensor of shape (batch_size, seq_len). + position_ids (Optional[torch.Tensor]): Position IDs tensor of shape (batch_size, seq_len). Returns: - Tuple[torch.Tensor, KVCache]: + Tuple[torch.Tensor, KVCache]: - x (torch.Tensor): Output tensor of shape (batch_size, seq_len, dim). - kv_cache (KVCache): Updated KVCache object. """ - # Apply normalization before attention - residual = x - x = self.norm1(x) + batch_size, seq_len, _ = hidden_states.shape + attn_output, attn_weights = None - # Compute Q, K, V projections - q = self.q_proj(x) - k = self.k_proj(x) - v = self.v_proj(x) + # Do kvq projection + query_states = self.q_proj(hidden_states) + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + + # Reshape + query_states = query_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2) + key_states = key_states.view(batch_size, seq_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) + value_states = value_states.view(batch_size, seq_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) # Initialize or update KVCache if kv_cache is None: kv_cache = KVCache( - batch_size=x.size(0), - max_seq_len=x.size(1), - num_heads=self.attn.num_heads, + batch_size=batch_size, + max_seq_len=self.attn.max_seq_len, + num_heads=self.heads, head_dim=self.attn.head_dim, - dtype=x.dtype + dtype=hidden_states.dtype ) - # Update KVCache with new key-value pairs - k_val, v_val = kv_cache.update(k, v) + # cache + value_states = kv_cache.update(key_states, value_states) - # Apply MultiHeadAttention with key-value caching - x = self.attn(q, k_val, v_val, mask=attention_mask, input_pos=input_pos) + # Attention weights and causal mask + attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim) + + if attention_mask is not None: + causal_mask = attention_mask[:, :, :, : key_states.shape[-2]] + attn_weights = attn_weights + causal_mask - # Residual connection - x = x + residual + attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) + attn_weights = nn.functional.dropout(attn_weights, p=self.attention_dropout, training=self.training) + attn_output = torch.matmul(attn_weights, value_states) - # Apply feed-forward network with residual connection - residual = x - x = self.norm2(x) - x = self.feed_forward(x) - x = x + residual + - return x, kv_cache + return attn_output, attn_weights, kv_cache class LlamaModel(nn.Module): """ LlamaModel is a pure PyTorch implementation of the LLaMA architecture """ - def __init__(self, config, tokenizer): + def __init__(self, config: dict, shard: Shard): """ Initialize the LlamaModel. @@ -144,10 +171,11 @@ def __init__(self, config, tokenizer): - rms_norm_eps (float): Epsilon for RMS normalization. - head_dim (int): Dimension of each attention head. - attention_dropout (float): Dropout rate for attention layers. - tokenizer: Tokenizer used for input preprocessing. """ super(LlamaModel, self).__init__() + self.shard = shard + # Load configurations from config self.config = config self.hidden_size = config['hidden_size'] @@ -160,9 +188,10 @@ def __init__(self, config, tokenizer): self.rms_norm_eps = config['rms_norm_eps'] self.head_dim = config['head_dim'] self.attention_dropout = config.get('attention_dropout', 0.0) + self.padding_idx = config["pad_token_id"] # Model layers - self.embed = nn.Embedding(self.vocab_size, self.hidden_size) + self.embed = nn.Embedding(self.vocab_size, self.hidden_size, self.padding_idx) self.rotary_pos_emb = RotaryPositionalEmbeddings( self.hidden_size // self.num_heads, config['rope_scaling']['original_max_position_embeddings'], @@ -171,105 +200,107 @@ def __init__(self, config, tokenizer): self.layers = nn.ModuleList([ LlamaBlock( dim=self.hidden_size, - heads=self.num_heads, + heads=self.hidden_size // self.num_heads, num_kv_heads=self.num_kv_heads, head_dim=self.head_dim, ff_dim=self.intermediate_size, rms_norm_eps=self.rms_norm_eps, attention_dropout=self.attention_dropout, - use_bias=config.get('attention_bias', False) + use_bias=config.get('attention_bias', False), + rotary_pos_emb=self.rotary_pos_emb ) for _ in range(self.num_layers) ]) - self.norm = nn.LayerNorm(self.hidden_size, eps=self.rms_norm_eps) + self.norm = RMSNorm(self.hidden_size, eps=self.rms_norm_eps) self.to_logits = nn.Linear(self.hidden_size, self.vocab_size) def forward( self, input_ids: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, - pos_ids: Optional[torch.Tensor] = None, + position_ids: Optional[torch.Tensor] = None, + cache_position: Optional[torch.LongTensor] = None, past_kv_cache: Optional[KVCache] = None, - return_hidden_states: bool = False - ) -> Tuple[torch.Tensor, Optional[Tuple[torch.Tensor]], KVCache]: + ) -> Tuple[Optional[torch.Tensor], Optional[Tuple[torch.Tensor]], KVCache]: """ Forward pass with integrated position ID handling, attention mask, and optional KVCache. Args: input_ids (torch.Tensor): Input token IDs of shape (batch_size, seq_len). attention_mask (Optional[torch.Tensor]): Attention mask of shape (batch_size, seq_len). - pos_ids (Optional[torch.Tensor]): Position IDs. If None, they are calculated automatically. + position_ids (Optional[torch.Tensor]): Position IDs. If None, they are calculated automatically. + cache_position (Optional[torch.LongTensor]): the positions of inputs in the sequence past_kv_cache (Optional[KVCache]): Optional KVCache for efficient generation. If provided, it stores past key-value states for faster autoregressive inference. - return_hidden_states (bool): Whether to return hidden states from each layer. Returns: Tuple[torch.Tensor, Optional[Tuple[torch.Tensor]], KVCache]: - - logits (torch.Tensor): Output logits of shape (batch_size, seq_len, vocab_size). - - hidden_states (Optional[Tuple[torch.Tensor]]): Hidden states from each layer, if return_hidden_states is True. + - logits (Optional[torch.Tensor]): Output logits of shape (batch_size, seq_len, vocab_size). + - hidden_states (Optional[torch.Tensor]): Hidden states from each layer - past_kv_cache (KVCache): Updated KVCache object. """ batch_size, seq_len = input_ids.shape # Create initial embeddings - x = self.embed(input_ids) + input_embeds = self.embed(input_ids) + + # Initialize or use the provided KVCache + if past_kv_cache is None: + past_kv_cache = KVCache( + batch_size=batch_size, + max_seq_len=self.max_position_embeddings, + num_heads=self.num_heads, + head_dim=self.head_dim, + dtype=x.dtype + ) # Initialize position IDs if not provided - if pos_ids is None: + if cache_position is None: past_seen_tokens = past_kv_cache.size if past_kv_cache is not None else 0 - pos_ids = torch.arange( + cache_position = torch.arange( past_seen_tokens, past_seen_tokens + seq_len, device=input_ids.device ).unsqueeze(0).expand(batch_size, -1) - # Reshape x to prepare for rotary embeddings: (batch_size, seq_len, num_heads, head_dim) - x = x.view(batch_size, seq_len, self.num_heads, self.head_dim) + if position_ids is None: + position_ids = cache_position.unsqueeze(0) + + hidden_states = input_embeds # Apply rotary positional embeddings - x = self.rotary_pos_emb( - x=x, - input_pos=pos_ids + position_embeddings = self.rotary_pos_emb( + hidden_states, + input_pos=position_ids ) - # Reshape x back to original shape: (batch_size, seq_len, hidden_size) - x = x.view(batch_size, seq_len, self.hidden_size) - - # Initialize or use the provided KVCache - if past_kv_cache is None: - past_kv_cache = KVCache( - batch_size=batch_size, - max_seq_len=self.max_position_embeddings, - num_heads=self.num_heads, - head_dim=self.head_dim, - dtype=x.dtype - ) - # Apply attention mask if provided (convert to appropriate format) if attention_mask is not None: attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) # Shape: (batch_size, 1, 1, seq_len) attention_mask = (1.0 - attention_mask) * -1e4 # Convert to large negative values - # Track hidden states if required - hidden_states = [] - # Forward pass through layers with KVCache - for layer_idx, layer in enumerate(self.layers): - x, k_val, v_val = layer(x, past_kv_cache, layer_idx, attention_mask) - - # Update KVCache - past_kv_cache.update(k_val, v_val) + for layer_idx in range(self.shard.end_layer, self.shard.start_layer): + layer_hidden_state, layer_kv_cache = layer( + hidden_states=hidden_states, + kv_cache=past_kv_cache, + attention_mask=attention_mask, + position_ids=position_ids, + position_embeddings=position_embeddings + ) - if return_hidden_states: - hidden_states.append(x) + hidden_states = layer_hidden_state # Apply final layer normalization - x = self.norm(x) + hidden_states = self.norm(hidden_states) - # Compute logits - logits = self.to_logits(x) + # Compute logits if at end layer + if self.shard.is_last_layer(): + logits = self.to_logits(hidden_states) + else: + logits = None # Prepare the return values if return_hidden_states: - return logits, tuple(hidden_states), past_kv_cache + return logits, hidden_states, past_kv_cache else: return logits, None, past_kv_cache diff --git a/exo/inference/torch/tests/test_safetensor_shard.py b/exo/inference/torch/tests/test_safetensor_shard.py index ef72ef580..dd84ff18d 100644 --- a/exo/inference/torch/tests/test_safetensor_shard.py +++ b/exo/inference/torch/tests/test_safetensor_shard.py @@ -17,7 +17,7 @@ async def main(): # Create a Shard object shard = Shard( - model_id="meta-llama/Llama-3.2-1B-Instruct", + model_id="unsloth/Meta-Llama-3.1-8B-Instruct", start_layer=start_layer, end_layer=end_layer-1, n_layers=32 @@ -42,7 +42,9 @@ async def main(): model = AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=shard.model_id, local_files_only=True, - num_hidden_layers=shard.end_layer - shard.start_layer + num_hidden_layers=shard.end_layer - shard.start_layer, + #device_map="auto", + torch_dtype="float16" ).to("cuda") tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") @@ -61,7 +63,7 @@ async def main(): print(f"model_inputs:\n{model_inputs}") - tensor_shard.restore_backup() + tensor_shard.restore_backups() if __name__ == "__main__": asyncio.run(main()) From 1512d13b52e87636c741c07fe79383cd46cdc891 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 25 Oct 2024 02:18:42 -0800 Subject: [PATCH 463/589] updated llamablock and llamamodel, created a MLP helper class to use in other transformers, added in torch based 4d linear transformation causal mask, need to add in device map or just using device as running out of ram, think other issues at play as should not run out of ram before getting to hidden layers --- exo/inference/torch/models/hf.py | 5 +- exo/inference/torch/models/llama3.py | 183 +++++++++--------- exo/inference/torch/models/llm_utils.py | 129 ++++++++++++ .../torch/tests/test_llama3_model.py | 32 ++- 4 files changed, 246 insertions(+), 103 deletions(-) diff --git a/exo/inference/torch/models/hf.py b/exo/inference/torch/models/hf.py index f15d5d19f..5d5b03e40 100644 --- a/exo/inference/torch/models/hf.py +++ b/exo/inference/torch/models/hf.py @@ -1,5 +1,3 @@ -import os -import json from typing import Tuple, Optional, Union, List from pathlib import Path @@ -8,8 +6,7 @@ from exo.inference.shard import Shard from exo.helpers import DEBUG -from exo.inference.torch.utils import extract_layers -from exo.inference.torch.model.hf_safe_tensor_shard import HFSafeTensorShard +from exo.inference.torch.models.hf_safe_tensor_shard import HFSafeTensorShard from transformers import ( AutoModelForCausalLM, diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 23e04958e..94607388a 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -3,7 +3,6 @@ Written with pytorch using torchtune and other methods """ -import math from typing import Optional, Tuple import torch @@ -16,6 +15,7 @@ ) from exo.inference.shard import Shard +from exo.inference.torch.models.llm_utils import MLP, create_4d_causal_attention_mask class LlamaBlock(nn.Module): """ @@ -36,7 +36,6 @@ def __init__( pos_embeddings=None ): super(LlamaBlock, self).__init__() - # Class vars self.dim = dim self.heads = heads self.num_kv_heads = num_kv_heads @@ -48,45 +47,19 @@ def __init__( self.max_seq_len = max_seq_len self.pos_embeddings = pos_embeddings self.rotary_pos_emb = rotary_pos_emb - - # Define linear projections for Q, K, V, and Output self.q_proj = nn.Linear(dim, heads * head_dim, bias=use_bias) self.k_proj = nn.Linear(dim, num_kv_heads * head_dim, bias=use_bias) self.v_proj = nn.Linear(dim, num_kv_heads * head_dim, bias=use_bias) self.output_proj = nn.Linear(heads * head_dim, dim, bias=use_bias) - - # Define optional query normalization - self.q_norm = nn.LayerNorm(head_dim, eps=rms_norm_eps) - - # MultiHeadAttention from torchtune - self.attn = MultiHeadAttention( - embed_dim=dim, - num_heads=heads, - num_kv_heads=num_kv_heads, - head_dim=head_dim, - q_proj=self.q_proj, - k_proj=self.k_proj, - v_proj=self.v_proj, - output_proj=self.output_proj, - pos_embeddings=pos_embeddings, - q_norm=self.q_norm, - k_norm=self.q_norm, - kv_cache=None, - max_seq_len=max_seq_len, - is_causal=True, - attn_dropout=attention_dropout - ) - - # RMSNorm layers before and after attention and feed-forward layers - self.norm1 = nn.LayerNorm(dim, eps=rms_norm_eps) - self.norm2 = nn.LayerNorm(dim, eps=rms_norm_eps) - - # Feed-forward layer with SwiGLU activation - self.feed_forward = nn.Sequential( - nn.Linear(dim, ff_dim), - nn.GLU(), # SwiGLU approximation - nn.Linear(ff_dim // 2, dim) + self.q_norm = RMSNorm(head_dim, eps=rms_norm_eps) + self.mlp = MLP( + input_dim=dim, + hidden_dims=[ff_dim], # Single hidden layer with ff_dim as the hidden size + output_dim=dim, + activation='gelu', + dropout=attention_dropout ) + self.post_norm = RMSNorm(dim, eps=rms_norm_eps) def forward( self, @@ -94,62 +67,60 @@ def forward( kv_cache: Optional[KVCache] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.Tensor] = None, - position_embeddings: Optional[torch.FloatTensor] = None - ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + ) -> Tuple[torch.Tensor, Optional[KVCache]]: """ - Forward pass with integrated attention and key-value caching. + Forward pass with integrated attention, resnet and key-value caching. Args: - x (torch.Tensor): Input tensor of shape (batch_size, seq_len, dim). + hidden_states (torch.Tensor): Input tensor of shape (batch_size, seq_len, dim). kv_cache (Optional[KVCache]): KVCache object for managing past key-value states. attention_mask (Optional[torch.Tensor]): Attention mask of shape (batch_size, 1, 1, seq_len). position_ids (Optional[torch.Tensor]): Position IDs tensor of shape (batch_size, seq_len). Returns: Tuple[torch.Tensor, KVCache]: - - x (torch.Tensor): Output tensor of shape (batch_size, seq_len, dim). - - kv_cache (KVCache): Updated KVCache object. + - Output tensor of shape (batch_size, seq_len, dim). + - Updated KVCache object. """ - batch_size, seq_len, _ = hidden_states.shape - attn_output, attn_weights = None - - # Do kvq projection - query_states = self.q_proj(hidden_states) - key_states = self.k_proj(hidden_states) - value_states = self.v_proj(hidden_states) - - # Reshape - query_states = query_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2) - key_states = key_states.view(batch_size, seq_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) - value_states = value_states.view(batch_size, seq_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) - - # Initialize or update KVCache - if kv_cache is None: - kv_cache = KVCache( - batch_size=batch_size, - max_seq_len=self.attn.max_seq_len, - num_heads=self.heads, - head_dim=self.attn.head_dim, - dtype=hidden_states.dtype - ) - - # cache - value_states = kv_cache.update(key_states, value_states) - - # Attention weights and causal mask - attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim) - - if attention_mask is not None: - causal_mask = attention_mask[:, :, :, : key_states.shape[-2]] - attn_weights = attn_weights + causal_mask - - attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) - attn_weights = nn.functional.dropout(attn_weights, p=self.attention_dropout, training=self.training) - attn_output = torch.matmul(attn_weights, value_states) + # setting up resnet + residual = hidden_states + + # Apply RMSNorm to input + hidden_states = self.input_norm(hidden_states) + + # Apply MultiHeadAttention with KVCache + hidden_states = MultiHeadAttention( + embed_dim=self.dim, + num_heads=self.heads, + num_kv_heads=self.num_kv_heads, + head_dim=self.head_dim, + q_proj=self.q_proj, + k_proj=self.k_proj, + v_proj=self.v_proj, + output_proj=self.output_proj, + pos_embeddings=self.rotary_pos_emb, + q_norm=self.q_norm, + k_norm=self.q_norm, + kv_cache=kv_cache, # Passed during the forward call + max_seq_len=self.max_seq_len, + is_causal=True, + attn_dropout=self.attention_dropout + )( + x=hidden_states, + mask=attention_mask, + input_pos=position_ids + ) - + # Residual connection + hidden_states = residual + hidden_states + residual = hidden_states + # Post attention normalization + hidden_states = self.post_norm(hidden_states) + # Feed-forward network with MLP and residual connection + hidden_states = self.mlp(hidden_states) + hidden_states = hidden_states + residual - return attn_output, attn_weights, kv_cache + return hidden_states, kv_cache class LlamaModel(nn.Module): """ @@ -188,7 +159,8 @@ def __init__(self, config: dict, shard: Shard): self.rms_norm_eps = config['rms_norm_eps'] self.head_dim = config['head_dim'] self.attention_dropout = config.get('attention_dropout', 0.0) - self.padding_idx = config["pad_token_id"] + self.padding_idx = config.get("pad_token_id") + self.device_map="any" # Model layers self.embed = nn.Embedding(self.vocab_size, self.hidden_size, self.padding_idx) @@ -220,7 +192,7 @@ def forward( position_ids: Optional[torch.Tensor] = None, cache_position: Optional[torch.LongTensor] = None, past_kv_cache: Optional[KVCache] = None, - ) -> Tuple[Optional[torch.Tensor], Optional[Tuple[torch.Tensor]], KVCache]: + ) -> Tuple[Optional[torch.Tensor], Optional[Tuple[torch.Tensor]], Optional[KVCache]]: """ Forward pass with integrated position ID handling, attention mask, and optional KVCache. @@ -250,7 +222,7 @@ def forward( max_seq_len=self.max_position_embeddings, num_heads=self.num_heads, head_dim=self.head_dim, - dtype=x.dtype + dtype=input_embeds.dtype ) # Initialize position IDs if not provided @@ -267,28 +239,60 @@ def forward( hidden_states = input_embeds + # Reshape hidden_states to (batch_size, seq_len, num_heads, head_dim) + batch_size, seq_len, _ = hidden_states.shape + hidden_states = hidden_states.view(batch_size, seq_len, self.num_heads, self.head_dim) + + # Reshape position_ids to match (batch_size, seq_len) + if position_ids.dim() != 2: + position_ids = position_ids.squeeze(0) + + print(f"hidden_states: {hidden_states.shape}") + print(f"position_ids: {position_ids.shape}") + # Apply rotary positional embeddings position_embeddings = self.rotary_pos_emb( hidden_states, input_pos=position_ids ) - # Apply attention mask if provided (convert to appropriate format) + print(f"position_embeddings: {position_embeddings.shape}") + + # Reshape back to (batch_size, seq_len, hidden_size) + hidden_states = hidden_states.view(batch_size, seq_len, self.hidden_size) + print(f"hidden_states: {hidden_states.shape}") + + # create 4d causal mask + causal_mask = None if attention_mask is not None: - attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) # Shape: (batch_size, 1, 1, seq_len) - attention_mask = (1.0 - attention_mask) * -1e4 # Convert to large negative values + causal_mask = create_4d_causal_attention_mask( + attention_mask=attention_mask, + seq_len=hidden_states.size(1), + target_len=self.max_position_embeddings, + dtype=hidden_states.dtype, + device=hidden_states.device, + cache_pos=torch.arange(self.max_position_embeddings, device=hidden_states.device), + batch_size=hidden_states.size(0) + ) + + print(f"attention_mask: {attention_mask.shape}") + print(f"causal_mask: {causal_mask.shape}") # Forward pass through layers with KVCache for layer_idx in range(self.shard.end_layer, self.shard.start_layer): - layer_hidden_state, layer_kv_cache = layer( + print(f"forward layer #{layer_idx}") + encoder_layer = self.layers[layer_idx] + print(f"encoder_layer\n{encoder_layer}") + layer_hidden_state, layer_kv_cache = self.layers[layer_idx]( hidden_states=hidden_states, kv_cache=past_kv_cache, - attention_mask=attention_mask, + attention_mask=causal_mask, position_ids=position_ids, position_embeddings=position_embeddings ) hidden_states = layer_hidden_state + past_kv_cache = layer_kv_cache # Apply final layer normalization hidden_states = self.norm(hidden_states) @@ -299,8 +303,7 @@ def forward( else: logits = None - # Prepare the return values - if return_hidden_states: + if logits is None: return logits, hidden_states, past_kv_cache else: return logits, None, past_kv_cache diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 89057021d..22f224857 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -5,6 +5,7 @@ from pathlib import Path import torch +import torch.nn as nn import torch.nn.functional as F from exo.helpers import DEBUG @@ -89,4 +90,132 @@ def select_next_token( return next_token.squeeze(-1) +class MLP(nn.Module): + def __init__(self, input_dim, hidden_dims, output_dim, activation='gelu', dropout=0.0, use_batchnorm=False): + """ + General MLP (Multi-Layer Perceptron) module. + + Args: + input_dim (int): Dimensionality of the input. + hidden_dims (list of int): List of hidden layer dimensions. + output_dim (int): Dimensionality of the output. + activation (str): Activation function ('relu', 'gelu', 'tanh', 'sigmoid', etc.). + dropout (float): Dropout probability. + use_batchnorm (bool): Whether to use batch normalization. + """ + super(MLP, self).__init__() + + self.layers = nn.ModuleList() + self.use_batchnorm = use_batchnorm + + # Activation function mapping + activations = { + 'relu': nn.ReLU(), + 'gelu': nn.GELU(), + 'tanh': nn.Tanh(), + 'sigmoid': nn.Sigmoid(), + 'leaky_relu': nn.LeakyReLU(0.2) + } + + # Ensure valid activation + if activation not in activations: + raise ValueError(f"Invalid activation: {activation}. Choose from {list(activations.keys())}") + + self.activation = activations[activation] + + # Construct MLP layers + prev_dim = input_dim + for h_dim in hidden_dims: + self.layers.append(nn.Linear(prev_dim, h_dim)) + if use_batchnorm: + self.layers.append(nn.BatchNorm1d(h_dim)) + self.layers.append(self.activation) + if dropout > 0: + self.layers.append(nn.Dropout(dropout)) + prev_dim = h_dim + + # Output layer + self.output_layer = nn.Linear(prev_dim, output_dim) + + def forward(self, x): + """ + Forward pass for the MLP module. + + Args: + x (torch.Tensor): Input tensor. + + Returns: + torch.Tensor: Output tensor after the MLP transformations. + """ + for layer in self.layers: + x = layer(x) + return self.output_layer(x) + +def create_4d_causal_attention_mask( + attention_mask: torch.Tensor, + seq_len: int, + target_len: int, + dtype: torch.dtype, + device: torch.device, + cache_pos: torch.Tensor, + batch_size: int, +) -> torch.Tensor: + """ + Creates a 4D causal attention mask from a 2D mask, with adjustments for static caching. + + Args: + attention_mask (torch.Tensor): + A 2D tensor of shape (batch_size, key_value_length) or a 4D tensor of shape + (batch_size, 1, query_length, key_value_length). + seq_len (int): + Sequence length of the input being processed. + target_len (int): + Target length to generate the causal mask. + dtype (torch.dtype): + Data type for the causal mask. + device (torch.device): + Device to place the causal mask on. + cache_pos (torch.Tensor): + Cache position indices indicating the position of the input tokens in the sequence. + batch_size (int): + Number of samples in the batch. + + Returns: + torch.Tensor: + A 4D causal mask of shape (batch_size, 1, query_length, key_value_length). + """ + if attention_mask is not None and attention_mask.dim() == 4: + # If the mask is already 4D, return it directly + return attention_mask + + min_value = torch.finfo(dtype).min + + # Create a 2D causal mask of shape (seq_len, target_len) + causal_mask = torch.full( + (seq_len, target_len), fill_value=min_value, dtype=dtype, device=device + ) + + if seq_len != 1: + # Mask positions after the current position + causal_mask = torch.triu(causal_mask, diagonal=1) + + # Adjust causal mask for cache position + causal_mask *= (torch.arange(target_len, device=device) > cache_pos.view(-1, 1)) + + # Expand to 4D and batch size + causal_mask = causal_mask[None, None, :, :].expand(batch_size, 1, -1, -1) + + if attention_mask is not None: + # Create a padding mask based on the input attention_mask + mask_len = attention_mask.shape[-1] + causal_mask = causal_mask.clone() # Ensure contiguous memory for in-place operations + padding_mask = causal_mask[:, :, :, :mask_len] + attention_mask[:, None, None, :] + padding_mask = padding_mask == 0 + + # Apply padding to the causal mask + causal_mask[:, :, :, :mask_len] = causal_mask[:, :, :, :mask_len].masked_fill( + padding_mask, min_value + ) + + return causal_mask diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py index 25485b11b..13d50f8d2 100644 --- a/exo/inference/torch/tests/test_llama3_model.py +++ b/exo/inference/torch/tests/test_llama3_model.py @@ -9,8 +9,8 @@ from safetensors.torch import load_file as load_safetensors from exo.inference.torch.models.llm_utils import load_model_config, select_next_token from exo.inference.torch.models.llama3 import LlamaModel, KVCache +from exo.inference.shard import Shard -# Constants MODEL_NAME = "unsloth/Llama-3.2-1B-Instruct" # Get the path to the model files from the Hugging Face cache @@ -20,11 +20,21 @@ # Load model configuration config = load_model_config(cache_dir / "config.json") +print(f"current config\n{config}") + +# Setup shard +shard = Shard( + model_id=MODEL_NAME, + start_layer=0, + end_layer=int(config["num_hidden_layers"]) - 1, + n_layers=int(config["num_hidden_layers"]) +) + # Initialize tokenizer tokenizer = AutoTokenizer.from_pretrained(cache_dir) # Initialize LlamaModel with config and tokenizer -model = LlamaModel(config, tokenizer) +model = LlamaModel(config, shard) # Load weights from safetensors files in the cache directory safetensors_files = list(cache_dir.glob("*.safetensors")) @@ -48,9 +58,12 @@ def test_forward_pass(model, tokenizer, text): """ # Tokenize input text inputs = tokenizer(text, return_tensors="pt") - input_ids = inputs["input_ids"] + input_ids = inputs.get("input_ids") attention_mask = inputs.get("attention_mask") + print(f"input_ids: {input_ids}") + print(f"attention_mask: {attention_mask}") + # Initialize KVCache past_kv_cache = KVCache( batch_size=input_ids.size(0), @@ -62,17 +75,18 @@ def test_forward_pass(model, tokenizer, text): # Forward pass with KVCache with torch.no_grad(): - logits, hidden_states, _ = model( + logits, hidden_states, past_kv_cache = model( input_ids, attention_mask=attention_mask, - pos_ids=None, - past_kv_cache=past_kv_cache, - return_hidden_states=True + position_ids=None, + past_kv_cache=past_kv_cache ) # Print logits shape and hidden state information - print(f"Logits shape: {logits.shape}") - if hidden_states: + if logits is not None: + print(f"Logits shape: {logits.shape}") + + if hidden_states is not None: print(f"Number of hidden states: {len(hidden_states)}") print(f"Shape of last hidden state: {hidden_states[-1].shape}") From 0eb80448fd1714f261f64d2282f05bbd45654b3e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 25 Oct 2024 03:07:34 -0800 Subject: [PATCH 464/589] fixing causual mask loading error, updated testing, working on logit selection issues producing gibberish --- exo/inference/torch/models/llama3.py | 6 +- .../torch/tests/test_llama3_model.py | 130 +++++++----------- 2 files changed, 51 insertions(+), 85 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 94607388a..954df4a1f 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -267,11 +267,11 @@ def forward( if attention_mask is not None: causal_mask = create_4d_causal_attention_mask( attention_mask=attention_mask, - seq_len=hidden_states.size(1), - target_len=self.max_position_embeddings, + seq_len=hidden_states.shape[1], + target_len=attention_mask.shape[-1], dtype=hidden_states.dtype, device=hidden_states.device, - cache_pos=torch.arange(self.max_position_embeddings, device=hidden_states.device), + cache_pos=cache_position, batch_size=hidden_states.size(0) ) diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py index 13d50f8d2..6308335be 100644 --- a/exo/inference/torch/tests/test_llama3_model.py +++ b/exo/inference/torch/tests/test_llama3_model.py @@ -4,6 +4,7 @@ from pathlib import Path import torch +import torchtune.generation as ttg from transformers import AutoTokenizer from huggingface_hub import snapshot_download from safetensors.torch import load_file as load_safetensors @@ -12,83 +13,9 @@ from exo.inference.shard import Shard MODEL_NAME = "unsloth/Llama-3.2-1B-Instruct" +TEMP=0.7 +TOP_K=25 -# Get the path to the model files from the Hugging Face cache -cache_dir = Path(snapshot_download(MODEL_NAME)) -print(f"Cache directory: {cache_dir}") - -# Load model configuration -config = load_model_config(cache_dir / "config.json") - -print(f"current config\n{config}") - -# Setup shard -shard = Shard( - model_id=MODEL_NAME, - start_layer=0, - end_layer=int(config["num_hidden_layers"]) - 1, - n_layers=int(config["num_hidden_layers"]) -) - -# Initialize tokenizer -tokenizer = AutoTokenizer.from_pretrained(cache_dir) - -# Initialize LlamaModel with config and tokenizer -model = LlamaModel(config, shard) - -# Load weights from safetensors files in the cache directory -safetensors_files = list(cache_dir.glob("*.safetensors")) -if not safetensors_files: - raise FileNotFoundError("No safetensors files found in the cache directory.") - -# Load weights from each found safetensors file -for safetensor_file in safetensors_files: - print(f"Loading weights from: {safetensor_file}") - state_dict = load_safetensors(safetensor_file) - model.load_state_dict(state_dict, strict=False) - -model.eval() # Set the model to evaluation mode - -# Sample text for testing -test_text = "Once upon a time," - -def test_forward_pass(model, tokenizer, text): - """ - Test the forward pass of the LlamaModel with given input text. - """ - # Tokenize input text - inputs = tokenizer(text, return_tensors="pt") - input_ids = inputs.get("input_ids") - attention_mask = inputs.get("attention_mask") - - print(f"input_ids: {input_ids}") - print(f"attention_mask: {attention_mask}") - - # Initialize KVCache - past_kv_cache = KVCache( - batch_size=input_ids.size(0), - max_seq_len=model.max_position_embeddings, - num_heads=model.num_heads, - head_dim=model.head_dim, - dtype=input_ids.dtype - ) - - # Forward pass with KVCache - with torch.no_grad(): - logits, hidden_states, past_kv_cache = model( - input_ids, - attention_mask=attention_mask, - position_ids=None, - past_kv_cache=past_kv_cache - ) - - # Print logits shape and hidden state information - if logits is not None: - print(f"Logits shape: {logits.shape}") - - if hidden_states is not None: - print(f"Number of hidden states: {len(hidden_states)}") - print(f"Shape of last hidden state: {hidden_states[-1].shape}") def test_generation(model, tokenizer, text, max_length=50): """ @@ -96,7 +23,7 @@ def test_generation(model, tokenizer, text, max_length=50): """ # Tokenize input text inputs = tokenizer(text, return_tensors="pt") - input_ids = inputs["input_ids"] + input_ids = inputs.get("input_ids") attention_mask = inputs.get("attention_mask") # Initialize KVCache for caching @@ -121,7 +48,9 @@ def test_generation(model, tokenizer, text, max_length=50): ) # Select next token using logits - next_token = select_next_token(logits, top_k=50, top_p=0.9, temperature=0.7, use_max=False) + #next_token = select_next_token(logits, top_k=50, top_p=0.9, temperature=0.7, use_max=False) + next_token = ttg.sample(logits[:, -1, :].clone().float(), temperature=TEMP, top_k=TOP_K).squeeze(-1) + print(f"next_token: {next_token}") # Update generated_ids generated_ids = torch.cat([generated_ids, next_token.unsqueeze(0)], dim=1) @@ -132,12 +61,49 @@ def test_generation(model, tokenizer, text, max_length=50): # Decode generated text generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) - print(f"Generated text: {generated_text}") + print(f"\nPrompt: {text}") + print(f"\nGenerated Response: {generated_text}") if __name__ == "__main__": - print("Testing forward pass:") - test_forward_pass(model, tokenizer, test_text) - print("\nTesting generation:") + # Get the path to the model files from the Hugging Face cache + cache_dir = Path(snapshot_download(MODEL_NAME)) + print(f"Cache directory: {cache_dir}") + + # Load model configuration + config = load_model_config(cache_dir / "config.json") + + print(f"current config\n{config}") + + # Setup shard + shard = Shard( + model_id=MODEL_NAME, + start_layer=0, + end_layer=int(config["num_hidden_layers"]) - 1, + n_layers=int(config["num_hidden_layers"]) + ) + + # Initialize tokenizer + tokenizer = AutoTokenizer.from_pretrained(cache_dir) + + # Initialize LlamaModel with config and tokenizer + model = LlamaModel(config, shard) + + # Load weights from safetensors files in the cache directory + safetensors_files = list(cache_dir.glob("*.safetensors")) + if not safetensors_files: + raise FileNotFoundError("No safetensors files found in the cache directory.") + + # Load weights from each found safetensors file + for safetensor_file in safetensors_files: + print(f"Loading weights from: {safetensor_file}") + state_dict = load_safetensors(safetensor_file) + model.load_state_dict(state_dict, strict=False) + + model.eval() # Set the model to evaluation mode + + # Sample text for testing + test_text = "What color is a red apple?" + test_generation(model, tokenizer, test_text) From a6768b4717fa095268ce9e7b0f95abf7a6f1e7e1 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 25 Oct 2024 03:43:11 -0800 Subject: [PATCH 465/589] adding a chat temple from tokenizer to test, looking at padding ids to see if they is causing more gibberish, looking into better logit sampling and looking over other generation setup --- exo/inference/torch/models/llama3.py | 5 ++--- .../torch/tests/test_llama3_model.py | 22 +++++++++++++++---- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 954df4a1f..60d08426a 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -160,7 +160,6 @@ def __init__(self, config: dict, shard: Shard): self.head_dim = config['head_dim'] self.attention_dropout = config.get('attention_dropout', 0.0) self.padding_idx = config.get("pad_token_id") - self.device_map="any" # Model layers self.embed = nn.Embedding(self.vocab_size, self.hidden_size, self.padding_idx) @@ -183,7 +182,7 @@ def __init__(self, config: dict, shard: Shard): ) for _ in range(self.num_layers) ]) self.norm = RMSNorm(self.hidden_size, eps=self.rms_norm_eps) - self.to_logits = nn.Linear(self.hidden_size, self.vocab_size) + self.lm_head = nn.Linear(self.hidden_size, self.vocab_size, bias=False) def forward( self, @@ -299,7 +298,7 @@ def forward( # Compute logits if at end layer if self.shard.is_last_layer(): - logits = self.to_logits(hidden_states) + logits = self.lm_head(hidden_states[:, -1:, :]) else: logits = None diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py index 6308335be..6537115bf 100644 --- a/exo/inference/torch/tests/test_llama3_model.py +++ b/exo/inference/torch/tests/test_llama3_model.py @@ -14,18 +14,31 @@ MODEL_NAME = "unsloth/Llama-3.2-1B-Instruct" TEMP=0.7 -TOP_K=25 +TOP_K=35 +TOP_P=0.9 -def test_generation(model, tokenizer, text, max_length=50): +def test_generation(model, tokenizer, text, max_length=10): """ Test the generation capabilities of the LlamaModel with sample text. """ # Tokenize input text - inputs = tokenizer(text, return_tensors="pt") + prompt = tokenizer.apply_chat_template([ + { + "role": "user", + "content": text + } + ], tokenize=False, add_generation_prompt=True) + + print(f"prompt: {prompt}") + + inputs = tokenizer(prompt, return_tensors="pt") input_ids = inputs.get("input_ids") attention_mask = inputs.get("attention_mask") + print(f"input_ids: {input_ids}") + print(f"attention_mask: {attention_mask}") + # Initialize KVCache for caching past_kv_cache = KVCache( batch_size=input_ids.size(0), @@ -48,12 +61,13 @@ def test_generation(model, tokenizer, text, max_length=50): ) # Select next token using logits - #next_token = select_next_token(logits, top_k=50, top_p=0.9, temperature=0.7, use_max=False) + #next_token = select_next_token(logits, top_k=TOP_K, top_p=TOP_P, temperature=TEMP, use_max=False) next_token = ttg.sample(logits[:, -1, :].clone().float(), temperature=TEMP, top_k=TOP_K).squeeze(-1) print(f"next_token: {next_token}") # Update generated_ids generated_ids = torch.cat([generated_ids, next_token.unsqueeze(0)], dim=1) + print(f"generated_ids: {generated_ids}") # Check for EOS token if next_token.item() == tokenizer.eos_token_id: From 8ba24e2bc8fd5830237b4b0387628ff0d809a776 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 26 Oct 2024 03:45:29 -0800 Subject: [PATCH 466/589] fixing parameter defintion on 4d mask method, commiting before trying to upgrade to main fork --- exo/inference/torch/models/llm_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 22f224857..10eabc740 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -157,7 +157,7 @@ def create_4d_causal_attention_mask( target_len: int, dtype: torch.dtype, device: torch.device, - cache_pos: torch.Tensor, + cache_pos: torch.LongTensor, batch_size: int, ) -> torch.Tensor: """ From cfb10ba39e196a515e6be87bffce9d628e44a0df Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 28 Oct 2024 01:16:40 -0800 Subject: [PATCH 467/589] added in more base llm functions like multiheadattention and rotate embed, working on llama model and getting shapes/reshaping right, running into kv cache issue --- exo/inference/torch/README.md | 3 + exo/inference/torch/models/llama3.py | 139 +++++---- exo/inference/torch/models/llm_utils.py | 285 ++++++++++++++---- .../torch/tests/test_llama3_model.py | 17 +- 4 files changed, 316 insertions(+), 128 deletions(-) diff --git a/exo/inference/torch/README.md b/exo/inference/torch/README.md index da67faa28..43b3782af 100644 --- a/exo/inference/torch/README.md +++ b/exo/inference/torch/README.md @@ -17,6 +17,9 @@ Still working on split model development (see test_split_model.py). Right now, i ### 10/21/2024 Working on removing transformers due to inference and VRAM usage [issues](https://github.com/exo-explore/exo/pull/139#issuecomment-2424953962). Creating a pure pytorch implementation of llama3 as using transformers wont work for exo. Using some code from meta but also implementing the use of torchtune. +### 10/27/2024 +Still working on llama3 model but wanted to note that a better KVCache needs to be investigated. + ## Tech Tested on diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 60d08426a..bf2e6f16e 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -8,14 +8,17 @@ import torch import torch.nn as nn from torchtune.modules import ( - MultiHeadAttention, - RotaryPositionalEmbeddings, KVCache, - RMSNorm + RMSNorm, ) from exo.inference.shard import Shard -from exo.inference.torch.models.llm_utils import MLP, create_4d_causal_attention_mask +from exo.inference.torch.models.llm_utils import ( + MultiLayerPreceptron, + MultiHeadAttention, + RotaryEmbedding, + create_4d_causal_attention_mask +) class LlamaBlock(nn.Module): """ @@ -24,12 +27,13 @@ class LlamaBlock(nn.Module): def __init__( self, dim, - heads, - num_kv_heads, head_dim, + num_heads, + num_kv_heads, ff_dim, - rms_norm_eps, rotary_pos_emb, + mlp, + rms_norm_eps=1e-6, attention_dropout=0.0, use_bias=False, max_seq_len=4096, @@ -37,9 +41,9 @@ def __init__( ): super(LlamaBlock, self).__init__() self.dim = dim - self.heads = heads - self.num_kv_heads = num_kv_heads self.head_dim = head_dim + self.num_heads = num_heads + self.num_kv_heads = num_kv_heads self.ff_dim = ff_dim self.rms_norm_eps = rms_norm_eps self.attention_dropout = attention_dropout @@ -47,26 +51,21 @@ def __init__( self.max_seq_len = max_seq_len self.pos_embeddings = pos_embeddings self.rotary_pos_emb = rotary_pos_emb - self.q_proj = nn.Linear(dim, heads * head_dim, bias=use_bias) + self.q_proj = nn.Linear(dim, num_heads * head_dim, bias=use_bias) self.k_proj = nn.Linear(dim, num_kv_heads * head_dim, bias=use_bias) self.v_proj = nn.Linear(dim, num_kv_heads * head_dim, bias=use_bias) - self.output_proj = nn.Linear(heads * head_dim, dim, bias=use_bias) - self.q_norm = RMSNorm(head_dim, eps=rms_norm_eps) - self.mlp = MLP( - input_dim=dim, - hidden_dims=[ff_dim], # Single hidden layer with ff_dim as the hidden size - output_dim=dim, - activation='gelu', - dropout=attention_dropout - ) - self.post_norm = RMSNorm(dim, eps=rms_norm_eps) + self.output_proj = nn.Linear(num_heads * head_dim, dim, bias=use_bias) + self.input_layer_norm = RMSNorm(dim, eps=rms_norm_eps) + self.mlp = mlp + self.post_attention_norm = RMSNorm(dim, eps=rms_norm_eps) def forward( self, hidden_states: torch.Tensor, + position_embeddings: Tuple[torch.Tensor, torch.Tensor], kv_cache: Optional[KVCache] = None, attention_mask: Optional[torch.Tensor] = None, - position_ids: Optional[torch.Tensor] = None, + position_ids: Optional[torch.Tensor] = None ) -> Tuple[torch.Tensor, Optional[KVCache]]: """ Forward pass with integrated attention, resnet and key-value caching. @@ -86,36 +85,39 @@ def forward( residual = hidden_states # Apply RMSNorm to input - hidden_states = self.input_norm(hidden_states) + hidden_states = self.input_layer_norm(hidden_states) + print(f"self.input_layer_norm(hidden_states) {hidden_states.shape}") + + batch_size, seq_len, _ = hidden_states.shape + hidden_states = hidden_states.view(batch_size, seq_len, self.num_heads, self.head_dim).squeeze() + print(f"hidden_states: {hidden_states.shape}") # Apply MultiHeadAttention with KVCache - hidden_states = MultiHeadAttention( - embed_dim=self.dim, - num_heads=self.heads, + mh_attn = MultiHeadAttention( + hidden_size=self.head_dim, + num_heads=self.num_heads, num_kv_heads=self.num_kv_heads, head_dim=self.head_dim, - q_proj=self.q_proj, - k_proj=self.k_proj, - v_proj=self.v_proj, - output_proj=self.output_proj, - pos_embeddings=self.rotary_pos_emb, - q_norm=self.q_norm, - k_norm=self.q_norm, - kv_cache=kv_cache, # Passed during the forward call - max_seq_len=self.max_seq_len, + kv_cache=kv_cache, is_causal=True, - attn_dropout=self.attention_dropout - )( - x=hidden_states, - mask=attention_mask, - input_pos=position_ids + attention_dropout=self.attention_dropout, + rotary_emb=self.rotary_pos_emb + ) + + hidden_states = mh_attn( + hidden_states=hidden_states, + position_ids=position_ids, + attention_mask=attention_mask, + position_embeddings=position_embeddings ) # Residual connection hidden_states = residual + hidden_states residual = hidden_states + print(f"hidden_states: {hidden_states}") + print(f"residual: {residual}") # Post attention normalization - hidden_states = self.post_norm(hidden_states) + hidden_states = self.post_attention_norm(hidden_states) # Feed-forward network with MLP and residual connection hidden_states = self.mlp(hidden_states) hidden_states = hidden_states + residual @@ -161,24 +163,32 @@ def __init__(self, config: dict, shard: Shard): self.attention_dropout = config.get('attention_dropout', 0.0) self.padding_idx = config.get("pad_token_id") - # Model layers + # Model layers and methods self.embed = nn.Embedding(self.vocab_size, self.hidden_size, self.padding_idx) - self.rotary_pos_emb = RotaryPositionalEmbeddings( - self.hidden_size // self.num_heads, + self.rotary_pos_emb = RotaryEmbedding( + self.head_dim, config['rope_scaling']['original_max_position_embeddings'], config['rope_theta'] ) + self.mlp = MultiLayerPreceptron( + input_dim=self.hidden_size, + hidden_dims=[self.intermediate_size], # Single hidden layer with ff_dim as the hidden size + output_dim=self.hidden_size, + activation='gelu', + dropout=self.attention_dropout + ) self.layers = nn.ModuleList([ LlamaBlock( dim=self.hidden_size, - heads=self.hidden_size // self.num_heads, + head_dim=self.hidden_size // self.num_heads, + num_heads=self.num_heads, num_kv_heads=self.num_kv_heads, - head_dim=self.head_dim, ff_dim=self.intermediate_size, rms_norm_eps=self.rms_norm_eps, attention_dropout=self.attention_dropout, use_bias=config.get('attention_bias', False), - rotary_pos_emb=self.rotary_pos_emb + rotary_pos_emb=self.rotary_pos_emb, + mlp=self.mlp ) for _ in range(self.num_layers) ]) self.norm = RMSNorm(self.hidden_size, eps=self.rms_norm_eps) @@ -189,7 +199,7 @@ def forward( input_ids: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.Tensor] = None, - cache_position: Optional[torch.LongTensor] = None, + cache_position: Optional[torch.Tensor] = None, past_kv_cache: Optional[KVCache] = None, ) -> Tuple[Optional[torch.Tensor], Optional[Tuple[torch.Tensor]], Optional[KVCache]]: """ @@ -214,15 +224,15 @@ def forward( # Create initial embeddings input_embeds = self.embed(input_ids) - # Initialize or use the provided KVCache - if past_kv_cache is None: - past_kv_cache = KVCache( - batch_size=batch_size, - max_seq_len=self.max_position_embeddings, - num_heads=self.num_heads, - head_dim=self.head_dim, - dtype=input_embeds.dtype - ) + ## Initialize or use the provided KVCache + #if past_kv_cache is None: + # past_kv_cache = KVCache( + # batch_size=batch_size, + # max_seq_len=self.max_position_embeddings, + # num_heads=self.num_heads, + # head_dim=self.head_dim, + # dtype=input_embeds.dtype + # ) # Initialize position IDs if not provided if cache_position is None: @@ -231,11 +241,13 @@ def forward( past_seen_tokens, past_seen_tokens + seq_len, device=input_ids.device - ).unsqueeze(0).expand(batch_size, -1) + ) + #.unsqueeze(0).expand(batch_size, -1) if position_ids is None: position_ids = cache_position.unsqueeze(0) + print(f"input_embeds: {input_embeds.shape}") hidden_states = input_embeds # Reshape hidden_states to (batch_size, seq_len, num_heads, head_dim) @@ -250,14 +262,12 @@ def forward( print(f"position_ids: {position_ids.shape}") # Apply rotary positional embeddings - position_embeddings = self.rotary_pos_emb( - hidden_states, - input_pos=position_ids - ) + position_embeddings = self.rotary_pos_emb(hidden_states, position_ids) - print(f"position_embeddings: {position_embeddings.shape}") + print(f"position_embeddings: {position_embeddings}") # Reshape back to (batch_size, seq_len, hidden_size) + print(f"hidden_size: {self.hidden_size}") hidden_states = hidden_states.view(batch_size, seq_len, self.hidden_size) print(f"hidden_states: {hidden_states.shape}") @@ -278,13 +288,12 @@ def forward( print(f"causal_mask: {causal_mask.shape}") # Forward pass through layers with KVCache - for layer_idx in range(self.shard.end_layer, self.shard.start_layer): + for layer_idx in range(self.shard.start_layer, self.shard.end_layer): print(f"forward layer #{layer_idx}") encoder_layer = self.layers[layer_idx] print(f"encoder_layer\n{encoder_layer}") layer_hidden_state, layer_kv_cache = self.layers[layer_idx]( hidden_states=hidden_states, - kv_cache=past_kv_cache, attention_mask=causal_mask, position_ids=position_ids, position_embeddings=position_embeddings @@ -293,6 +302,8 @@ def forward( hidden_states = layer_hidden_state past_kv_cache = layer_kv_cache + print(f"layer_kv_cache: {layer_kv_cache.size}") + # Apply final layer normalization hidden_states = self.norm(hidden_states) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 10eabc740..1f5abbe5c 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -3,10 +3,13 @@ """ import json from pathlib import Path +from typing import Optional, Tuple import torch import torch.nn as nn import torch.nn.functional as F +import torchtune.modules as ttm +import math from exo.helpers import DEBUG @@ -90,7 +93,7 @@ def select_next_token( return next_token.squeeze(-1) -class MLP(nn.Module): +class MultiLayerPreceptron(nn.Module): def __init__(self, input_dim, hidden_dims, output_dim, activation='gelu', dropout=0.0, use_batchnorm=False): """ General MLP (Multi-Layer Perceptron) module. @@ -103,7 +106,7 @@ def __init__(self, input_dim, hidden_dims, output_dim, activation='gelu', dropou dropout (float): Dropout probability. use_batchnorm (bool): Whether to use batch normalization. """ - super(MLP, self).__init__() + super(MultiLayerPreceptron, self).__init__() self.layers = nn.ModuleList() self.use_batchnorm = use_batchnorm @@ -152,70 +155,238 @@ def forward(self, x): return self.output_layer(x) def create_4d_causal_attention_mask( - attention_mask: torch.Tensor, - seq_len: int, - target_len: int, - dtype: torch.dtype, - device: torch.device, - cache_pos: torch.LongTensor, - batch_size: int, + attention_mask: torch.Tensor, + seq_len: int, + target_len: int, + dtype: torch.dtype, + device: torch.device, + cache_pos: torch.Tensor, + batch_size: int, ) -> torch.Tensor: + """ + Creates a 4D causal attention mask from a 2D mask, with adjustments for static caching. + + Args: + attention_mask (torch.Tensor): + A 2D tensor of shape (batch_size, key_value_length) or a 4D tensor of shape + (batch_size, 1, query_length, key_value_length). + seq_len (int): + Sequence length of the input being processed. + target_len (int): + Target length to generate the causal mask. + dtype (torch.dtype): + Data type for the causal mask. + device (torch.device): + Device to place the causal mask on. + cache_pos (torch.Tensor): + Cache position indices indicating the position of the input tokens in the sequence. + batch_size (int): + Number of samples in the batch. + + Returns: + torch.Tensor: + A 4D causal mask of shape (batch_size, 1, query_length, key_value_length). + """ + if attention_mask is not None and attention_mask.dim() == 4: + # If the mask is already 4D, return it directly + return attention_mask + + min_value = torch.finfo(dtype).min + + # Create a 2D causal mask of shape (seq_len, target_len) + causal_mask = torch.full( + (seq_len, target_len), fill_value=min_value, dtype=dtype, device=device + ) + + if seq_len != 1: + # Mask positions after the current position + causal_mask = torch.triu(causal_mask, diagonal=1) + + # Adjust causal mask for cache position + causal_mask *= (torch.arange(target_len, device=device) > cache_pos.view(-1, 1)) + + # Expand to 4D and batch size + causal_mask = causal_mask[None, None, :, :].expand(batch_size, 1, -1, -1) + + if attention_mask is not None: + # Create a padding mask based on the input attention_mask + mask_len = attention_mask.shape[-1] + causal_mask = causal_mask.clone() # Ensure contiguous memory for in-place operations + padding_mask = causal_mask[:, :, :, :mask_len] + attention_mask[:, None, None, :] + padding_mask = padding_mask == 0 + + # Apply padding to the causal mask + causal_mask[:, :, :, :mask_len] = causal_mask[:, :, :, :mask_len].masked_fill( + padding_mask, min_value + ) + + return causal_mask + +def rotate_half(x): + """Rotates half the hidden dims of the input.""" + x1 = x[..., : x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2 :] + return torch.cat((-x2, x1), dim=-1) + +class MultiHeadAttention(nn.Module): + """Multi-headed attention mechanism.""" + + def __init__( + self, + hidden_size, + num_heads, + num_kv_heads, + head_dim, + rotary_emb, + kv_cache: Optional[ttm.KVCache] = None, + attention_dropout=0.0, + is_causal=True + ): + super().__init__() + self.hidden_size = hidden_size + self.num_heads = num_heads + self.num_kv_heads = num_kv_heads + self.head_dim = head_dim + self.attention_dropout = attention_dropout + self.is_causal = is_causal + self.rotary_emb = rotary_emb + + self.q_proj = nn.Linear(hidden_size, num_heads * head_dim, bias=False) + self.k_proj = nn.Linear(hidden_size, num_heads * head_dim, bias=False) + self.v_proj = nn.Linear(hidden_size, num_heads * head_dim, bias=False) + self.o_proj = nn.Linear(num_heads * head_dim, hidden_size, bias=False) + + self.kv_cache = kv_cache + + def forward( + self, + hidden_states: torch.Tensor, + position_ids: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None + ) -> torch.Tensor: + batch_size, seq_len, _ = hidden_states.size() + + if self.kv_cache is None or self.kv_cache.batch_size != batch_size: + self.kv_cache = ttm.KVCache( + batch_size=batch_size, + max_seq_len=seq_len, + num_heads=self.num_kv_heads, + head_dim=self.head_dim, + dtype=hidden_states.dtype + ) + + # Project to queries, keys, and values + query_states = self.q_proj(hidden_states) + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + + # Reshape to [batch_size, num_heads, seq_len, head_dim] + query_states = query_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2) + key_states = key_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2) + value_states = value_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2) + print(f"query_states: {query_states.shape}") + print(f"key_states: {key_states.shape}") + print(f"value_states: {value_states.shape}") + + # Apply rotary positional embeddings if position_ids are provided + # or use position_embeddings + if position_embeddings is not None: + cos, sin = position_embeddings + else: + cos, sin = self.rotary_emb(query_states, position_ids) + + # Expand cos and sin to match the shape of query_states + cos = cos[:, :, None, :self.head_dim].expand_as(query_states) + sin = sin[:, :, None, :self.head_dim].expand_as(query_states) + print(f"cos: {cos.shape} | sin: {sin.shape}") + + # Apply rotary embeddings to queries and keys + query_states = (query_states * cos) + (rotate_half(query_states) * sin) + key_states = (key_states * cos) + (rotate_half(key_states) * sin) + + # Repeat keys and values if needed + if self.num_heads > self.num_kv_heads: + n_rep = self.num_heads // self.num_kv_heads + key_states = torch.repeat_interleave(key_states, n_rep, dim=1) + value_states = torch.repeat_interleave(value_states, n_rep, dim=1) + + print(f"query_states: {query_states.shape}") + print(f"key_states: {key_states.shape}") + print(f"value_states: {value_states.shape}") + + # Forcing caching always enabled + key_states, value_states = self.kv_cache.update(key_states, value_states) + + # Compute attention scores + attn_weights = torch.matmul(query_states, key_states.transpose(-2, -1)) / math.sqrt(self.head_dim) + + # Apply causal mask, if applicable + if self.is_causal: + causal_mask = torch.tril(torch.ones((seq_len, seq_len), device=hidden_states.device)) + attn_weights = attn_weights.masked_fill(causal_mask == 0, float('-inf')) + + # Apply attention mask, if provided + if attention_mask is not None: + attn_weights = attn_weights + attention_mask + + # Softmax normalization + attn_weights = F.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) + attn_weights = F.dropout(attn_weights, p=self.attention_dropout, training=self.training) + + # Compute attention output + attn_output = torch.matmul(attn_weights, value_states) + + # Reshape to [batch_size, seq_len, hidden_size] + attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, seq_len, -1) + + # Project back to hidden size + attn_output = self.o_proj(attn_output) + + return attn_output + +class RotaryEmbedding(nn.Module): + """Rotary Position Embedding.""" + + def __init__(self, dim, max_position_embeddings=2048, base=10000, scaling_factor=1.0, rope_type="default", device=None): + super().__init__() + self.dim = dim + self.max_position_embeddings = max_position_embeddings + self.base = base + self.scaling_factor = scaling_factor + self.rope_type = rope_type + + # Initialize the inverse frequency for RoPE + inv_freq = 1.0 / (self.base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim)) + self.register_buffer("inv_freq", inv_freq, persistent=False) + + def forward(self, x, position_ids) -> Tuple[torch.Tensor, torch.Tensor]: """ - Creates a 4D causal attention mask from a 2D mask, with adjustments for static caching. + Compute the rotary position embeddings (cos, sin) for the given input tensor. Args: - attention_mask (torch.Tensor): - A 2D tensor of shape (batch_size, key_value_length) or a 4D tensor of shape - (batch_size, 1, query_length, key_value_length). - seq_len (int): - Sequence length of the input being processed. - target_len (int): - Target length to generate the causal mask. - dtype (torch.dtype): - Data type for the causal mask. - device (torch.device): - Device to place the causal mask on. - cache_pos (torch.Tensor): - Cache position indices indicating the position of the input tokens in the sequence. - batch_size (int): - Number of samples in the batch. + x (torch.Tensor): The input tensor of shape (batch_size, seq_len, num_heads, head_dim). + position_ids (torch.Tensor): The position indices for the sequence. Returns: - torch.Tensor: - A 4D causal mask of shape (batch_size, 1, query_length, key_value_length). + Tuple[torch.Tensor, torch.Tensor]: The cos and sin embeddings. """ - if attention_mask is not None and attention_mask.dim() == 4: - # If the mask is already 4D, return it directly - return attention_mask + # Expand inv_freq to match the batch size and sequence length + batch_size, seq_len = position_ids.size(0), position_ids.size(1) + inv_freq_expanded = self.inv_freq[None, :, None].expand(batch_size, -1, seq_len) - min_value = torch.finfo(dtype).min + # Expand position_ids to match the frequency tensor + position_ids_expanded = position_ids[:, None, :].float() - # Create a 2D causal mask of shape (seq_len, target_len) - causal_mask = torch.full( - (seq_len, target_len), fill_value=min_value, dtype=dtype, device=device - ) + # Compute cos and sin embeddings + freqs = torch.einsum("bnd,bnl->bnd", inv_freq_expanded, position_ids_expanded) + emb = torch.cat((freqs, freqs), dim=-1) + cos = emb.cos() + sin = emb.sin() - if seq_len != 1: - # Mask positions after the current position - causal_mask = torch.triu(causal_mask, diagonal=1) + # Apply the scaling factor to cos and sin embeddings + cos = cos * self.scaling_factor + sin = sin * self.scaling_factor - # Adjust causal mask for cache position - causal_mask *= (torch.arange(target_len, device=device) > cache_pos.view(-1, 1)) - - # Expand to 4D and batch size - causal_mask = causal_mask[None, None, :, :].expand(batch_size, 1, -1, -1) - - if attention_mask is not None: - # Create a padding mask based on the input attention_mask - mask_len = attention_mask.shape[-1] - causal_mask = causal_mask.clone() # Ensure contiguous memory for in-place operations - padding_mask = causal_mask[:, :, :, :mask_len] + attention_mask[:, None, None, :] - padding_mask = padding_mask == 0 - - # Apply padding to the causal mask - causal_mask[:, :, :, :mask_len] = causal_mask[:, :, :, :mask_len].masked_fill( - padding_mask, min_value - ) - - return causal_mask + return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype) diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py index 6537115bf..973b66da4 100644 --- a/exo/inference/torch/tests/test_llama3_model.py +++ b/exo/inference/torch/tests/test_llama3_model.py @@ -40,13 +40,16 @@ def test_generation(model, tokenizer, text, max_length=10): print(f"attention_mask: {attention_mask}") # Initialize KVCache for caching - past_kv_cache = KVCache( - batch_size=input_ids.size(0), - max_seq_len=model.max_position_embeddings, - num_heads=model.num_heads, - head_dim=model.head_dim, - dtype=input_ids.dtype - ) + past_kv_cache = None + #past_kv_cache = KVCache( + # batch_size=input_ids.size(0), + # max_seq_len=model.max_position_embeddings, + # num_heads=model.num_heads, + # head_dim=model.head_dim, + # dtype=input_ids.dtype + #) + + #print(f"past_kv_cache: {past_kv_cache}") # Start with initial input_ids generated_ids = input_ids.clone() From ea868c6b7b4155bc88fc589449cefab4085f1a8f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 30 Oct 2024 03:20:09 -0800 Subject: [PATCH 468/589] updating attentions, changed model struct, fixing kv cache --- exo/inference/torch/models/llama3.py | 157 +++---- exo/inference/torch/models/llm_utils.py | 413 +++++++++++++----- .../torch/tests/test_llama3_model.py | 63 +-- 3 files changed, 393 insertions(+), 240 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index bf2e6f16e..09c162e6e 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -9,13 +9,14 @@ import torch.nn as nn from torchtune.modules import ( KVCache, - RMSNorm, + RMSNorm ) from exo.inference.shard import Shard from exo.inference.torch.models.llm_utils import ( MultiLayerPreceptron, - MultiHeadAttention, + #MultiHeadAttention, + SDPAttention, RotaryEmbedding, create_4d_causal_attention_mask ) @@ -27,36 +28,14 @@ class LlamaBlock(nn.Module): def __init__( self, dim, - head_dim, - num_heads, - num_kv_heads, - ff_dim, - rotary_pos_emb, mlp, - rms_norm_eps=1e-6, - attention_dropout=0.0, - use_bias=False, - max_seq_len=4096, - pos_embeddings=None + self_attn, + rms_norm_eps=1e-6 ): super(LlamaBlock, self).__init__() - self.dim = dim - self.head_dim = head_dim - self.num_heads = num_heads - self.num_kv_heads = num_kv_heads - self.ff_dim = ff_dim - self.rms_norm_eps = rms_norm_eps - self.attention_dropout = attention_dropout - self.use_bias = use_bias - self.max_seq_len = max_seq_len - self.pos_embeddings = pos_embeddings - self.rotary_pos_emb = rotary_pos_emb - self.q_proj = nn.Linear(dim, num_heads * head_dim, bias=use_bias) - self.k_proj = nn.Linear(dim, num_kv_heads * head_dim, bias=use_bias) - self.v_proj = nn.Linear(dim, num_kv_heads * head_dim, bias=use_bias) - self.output_proj = nn.Linear(num_heads * head_dim, dim, bias=use_bias) - self.input_layer_norm = RMSNorm(dim, eps=rms_norm_eps) + self.self_attn = self_attn self.mlp = mlp + self.input_layer_norm = RMSNorm(dim, eps=rms_norm_eps) self.post_attention_norm = RMSNorm(dim, eps=rms_norm_eps) def forward( @@ -88,23 +67,12 @@ def forward( hidden_states = self.input_layer_norm(hidden_states) print(f"self.input_layer_norm(hidden_states) {hidden_states.shape}") - batch_size, seq_len, _ = hidden_states.shape - hidden_states = hidden_states.view(batch_size, seq_len, self.num_heads, self.head_dim).squeeze() - print(f"hidden_states: {hidden_states.shape}") - - # Apply MultiHeadAttention with KVCache - mh_attn = MultiHeadAttention( - hidden_size=self.head_dim, - num_heads=self.num_heads, - num_kv_heads=self.num_kv_heads, - head_dim=self.head_dim, - kv_cache=kv_cache, - is_causal=True, - attention_dropout=self.attention_dropout, - rotary_emb=self.rotary_pos_emb - ) + #batch_size, seq_len, _ = hidden_states.shape + #hidden_states = hidden_states.view(batch_size, seq_len, self.num_heads, self.head_dim).squeeze() + #print(f"hidden_states: {hidden_states.shape}") - hidden_states = mh_attn( + # Apply MultiHeadAttention with KVCache + hidden_states, kv_cache = self.self_attn( hidden_states=hidden_states, position_ids=position_ids, attention_mask=attention_mask, @@ -114,8 +82,8 @@ def forward( # Residual connection hidden_states = residual + hidden_states residual = hidden_states - print(f"hidden_states: {hidden_states}") - print(f"residual: {residual}") + print(f"hidden_states: {hidden_states.shape}") + print(f"residual: {residual.shape}") # Post attention normalization hidden_states = self.post_attention_norm(hidden_states) # Feed-forward network with MLP and residual connection @@ -163,41 +131,42 @@ def __init__(self, config: dict, shard: Shard): self.attention_dropout = config.get('attention_dropout', 0.0) self.padding_idx = config.get("pad_token_id") - # Model layers and methods + # Model layers and methods, order matters self.embed = nn.Embedding(self.vocab_size, self.hidden_size, self.padding_idx) - self.rotary_pos_emb = RotaryEmbedding( - self.head_dim, - config['rope_scaling']['original_max_position_embeddings'], - config['rope_theta'] - ) - self.mlp = MultiLayerPreceptron( - input_dim=self.hidden_size, - hidden_dims=[self.intermediate_size], # Single hidden layer with ff_dim as the hidden size - output_dim=self.hidden_size, - activation='gelu', - dropout=self.attention_dropout - ) self.layers = nn.ModuleList([ LlamaBlock( dim=self.hidden_size, - head_dim=self.hidden_size // self.num_heads, - num_heads=self.num_heads, - num_kv_heads=self.num_kv_heads, - ff_dim=self.intermediate_size, rms_norm_eps=self.rms_norm_eps, - attention_dropout=self.attention_dropout, - use_bias=config.get('attention_bias', False), - rotary_pos_emb=self.rotary_pos_emb, - mlp=self.mlp + self_attn=SDPAttention( + hidden_size=self.hidden_size, + num_heads=self.num_heads, + num_kv_heads=self.num_kv_heads, + head_dim=self.hidden_size // self.num_heads, + is_causal=True, + attention_dropout=self.attention_dropout, + rotary_emb=RotaryEmbedding( + self.head_dim + ), + attention_bias=config.get('attention_bias', False) + ), + mlp=MultiLayerPreceptron( + input_dim=self.hidden_size, + hidden_dim=self.intermediate_size, + activation=self.config.get("hidden_act", "silu"), + use_bias=self.config.get("mlp_bias", False) + ), ) for _ in range(self.num_layers) ]) self.norm = RMSNorm(self.hidden_size, eps=self.rms_norm_eps) + self.rotary_pos_emb = RotaryEmbedding( + self.head_dim + ) self.lm_head = nn.Linear(self.hidden_size, self.vocab_size, bias=False) def forward( self, input_ids: torch.Tensor, - attention_mask: Optional[torch.Tensor] = None, + attention_mask: torch.Tensor, position_ids: Optional[torch.Tensor] = None, cache_position: Optional[torch.Tensor] = None, past_kv_cache: Optional[KVCache] = None, @@ -215,7 +184,7 @@ def forward( Returns: Tuple[torch.Tensor, Optional[Tuple[torch.Tensor]], KVCache]: - - logits (Optional[torch.Tensor]): Output logits of shape (batch_size, seq_len, vocab_size). + - pred_score (Optional[torch.Tensor]): Prediction scores from lm_head of model. - hidden_states (Optional[torch.Tensor]): Hidden states from each layer - past_kv_cache (KVCache): Updated KVCache object. """ @@ -239,11 +208,13 @@ def forward( past_seen_tokens = past_kv_cache.size if past_kv_cache is not None else 0 cache_position = torch.arange( past_seen_tokens, - past_seen_tokens + seq_len, + past_seen_tokens + input_embeds.shape[1], device=input_ids.device ) #.unsqueeze(0).expand(batch_size, -1) + print(f"cache_position: {cache_position.shape}") + if position_ids is None: position_ids = cache_position.unsqueeze(0) @@ -264,28 +235,30 @@ def forward( # Apply rotary positional embeddings position_embeddings = self.rotary_pos_emb(hidden_states, position_ids) - print(f"position_embeddings: {position_embeddings}") - # Reshape back to (batch_size, seq_len, hidden_size) print(f"hidden_size: {self.hidden_size}") hidden_states = hidden_states.view(batch_size, seq_len, self.hidden_size) print(f"hidden_states: {hidden_states.shape}") - # create 4d causal mask - causal_mask = None - if attention_mask is not None: - causal_mask = create_4d_causal_attention_mask( - attention_mask=attention_mask, - seq_len=hidden_states.shape[1], - target_len=attention_mask.shape[-1], - dtype=hidden_states.dtype, - device=hidden_states.device, - cache_pos=cache_position, - batch_size=hidden_states.size(0) - ) + # create/update 4d causal mask + seq_len = input_embeds.shape[1] + + if past_kv_cache is not None: + target_len = past_kv_cache.size + seq_len + 1 + else: + target_len = seq_len + 1 + causal_mask = create_4d_causal_attention_mask( + attention_mask=attention_mask, + seq_len=seq_len, + target_len=target_len, + dtype=input_embeds.dtype, + device=input_embeds.device, + cache_pos=cache_position, + batch_size=input_embeds.size(0) + ) - print(f"attention_mask: {attention_mask.shape}") - print(f"causal_mask: {causal_mask.shape}") + print(f"attention_mask: {attention_mask.shape}") + print(f"causal_mask: {causal_mask.shape}") # Forward pass through layers with KVCache for layer_idx in range(self.shard.start_layer, self.shard.end_layer): @@ -307,13 +280,13 @@ def forward( # Apply final layer normalization hidden_states = self.norm(hidden_states) - # Compute logits if at end layer + # Compute prediction score from lm head if at end layer if self.shard.is_last_layer(): - logits = self.lm_head(hidden_states[:, -1:, :]) + pred_score = self.lm_head(hidden_states) else: - logits = None + pred_score = None - if logits is None: - return logits, hidden_states, past_kv_cache + if pred_score is None: + return pred_score, hidden_states, past_kv_cache else: - return logits, None, past_kv_cache + return pred_score, None, past_kv_cache diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 1f5abbe5c..f43c228b0 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -11,6 +11,8 @@ import torchtune.modules as ttm import math +from transformers.models.mamba.modeling_mamba import causal_conv1d_update + from exo.helpers import DEBUG def load_model_config(model_config_path: Path) -> dict: @@ -39,7 +41,7 @@ def select_next_token( Selects the next token from logits using top-k, top-p, and temperature scaling. Args: - logits (torch.Tensor): Logits tensor of shape (batch_size, vocab_size). + logits (torch.Tensor): Logits or prediction scores tensor of shape (batch_size, vocab_size). top_k (int): Number of top logits to consider for sampling. top_p (float): Cumulative probability threshold for nucleus sampling. temperature (float): Scaling factor for temperature. @@ -58,49 +60,50 @@ def select_next_token( # Apply top-k filtering if top_k > 0: - # Get the top-k logits and set the rest to -inf - top_k_values, _ = torch.topk(logits, top_k, dim=-1) - min_top_k_value = top_k_values[:, -1, None] - logits = torch.where(logits < min_top_k_value, torch.tensor(float('-inf'), device=logits.device), logits) + top_k = min(top_k, logits.size(-1)) + min_topk = torch.topk(logits, top_k)[0][..., -1, None] + logits = logits.masked_fill(logits < min_topk, float("-inf")) # Apply top-p (nucleus) filtering if top_p > 0.0: - sorted_logits, sorted_indices = torch.sort(logits, descending=True, dim=-1) - cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) - - # Mask tokens exceeding the top-p threshold - sorted_indices_to_remove = cumulative_probs > top_p - sorted_indices_to_remove[:, 1:] = sorted_indices_to_remove[:, :-1].clone() # Shift right - sorted_indices_to_remove[:, 0] = 0 # Ensure at least one token is selected + sorted_logits, sorted_indices = torch.sort(logits, descending=False) + cumulative_probs = sorted_logits.softmax(dim=-1).cumsum(dim=-1) + sorted_indices_to_remove = cumulative_probs <= (1 - top_p) + sorted_indices_to_remove[..., -1:] = 0 indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove) logits = logits.masked_fill(indices_to_remove, float('-inf')) - # Calculate probabilities - probs = F.softmax(logits, dim=-1) - # Select next token if not use_max: + probs = F.softmax(logits, dim=-1) next_token = torch.multinomial(probs, num_samples=1) else: next_token = torch.argmax(logits, dim=-1, keepdim=True) + next_token = next_token[:, None].squeeze(-1) + # Debugging output if DEBUG >= 4: print(f"Logits: {logits}") - print(f"Probabilities: {probs}") print(f"Next token: {next_token}") - return next_token.squeeze(-1) + return next_token class MultiLayerPreceptron(nn.Module): - def __init__(self, input_dim, hidden_dims, output_dim, activation='gelu', dropout=0.0, use_batchnorm=False): + def __init__( + self, + input_dim, + hidden_dim, + activation='gelu', + use_bias=False + ): """ General MLP (Multi-Layer Perceptron) module. Args: input_dim (int): Dimensionality of the input. - hidden_dims (list of int): List of hidden layer dimensions. + hidden_dims (int): Hidden layer/intermediate dimensions. output_dim (int): Dimensionality of the output. activation (str): Activation function ('relu', 'gelu', 'tanh', 'sigmoid', etc.). dropout (float): Dropout probability. @@ -108,39 +111,27 @@ def __init__(self, input_dim, hidden_dims, output_dim, activation='gelu', dropou """ super(MultiLayerPreceptron, self).__init__() - self.layers = nn.ModuleList() - self.use_batchnorm = use_batchnorm - # Activation function mapping activations = { 'relu': nn.ReLU(), 'gelu': nn.GELU(), 'tanh': nn.Tanh(), 'sigmoid': nn.Sigmoid(), - 'leaky_relu': nn.LeakyReLU(0.2) + 'leaky_relu': nn.LeakyReLU(0.2), + 'silu': nn.SiLU() } # Ensure valid activation if activation not in activations: raise ValueError(f"Invalid activation: {activation}. Choose from {list(activations.keys())}") - self.activation = activations[activation] - # Construct MLP layers - prev_dim = input_dim - for h_dim in hidden_dims: - self.layers.append(nn.Linear(prev_dim, h_dim)) - if use_batchnorm: - self.layers.append(nn.BatchNorm1d(h_dim)) - self.layers.append(self.activation) - if dropout > 0: - self.layers.append(nn.Dropout(dropout)) - prev_dim = h_dim - - # Output layer - self.output_layer = nn.Linear(prev_dim, output_dim) - - def forward(self, x): + self.gate_proj = nn.Linear(input_dim, hidden_dim, bias=use_bias) + self.up_proj = nn.Linear(input_dim, hidden_dim, bias=use_bias) + self.down_proj = nn.Linear(hidden_dim, input_dim, bias=use_bias) + self.act_fn = activations[activation] + + def forward(self, x) -> torch.Tensor: """ Forward pass for the MLP module. @@ -150,9 +141,8 @@ def forward(self, x): Returns: torch.Tensor: Output tensor after the MLP transformations. """ - for layer in self.layers: - x = layer(x) - return self.output_layer(x) + down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x)) + return down_proj def create_4d_causal_attention_mask( attention_mask: torch.Tensor, @@ -228,8 +218,69 @@ def rotate_half(x): x2 = x[..., x.shape[-1] // 2 :] return torch.cat((-x2, x1), dim=-1) +class RotaryEmbedding(nn.Module): + """ + Rotary Position Embedding. + + This computes the inverse frequencies according to the original RoPE implementation. + There are other implementations that will be added. + Ref: https://github.com/huggingface/transformers/blob/main/src/transformers/modeling_rope_utils.py + """ + + def __init__(self, dim, max_position_embeddings=2048, base=10000, scaling_factor=1.0): + super().__init__() + self.dim = dim + self.max_position_embeddings = max_position_embeddings + self.base = base + self.scaling_factor = scaling_factor + + # Initialize the inverse frequency for RoPE + inv_freq = 1.0 / (self.base ** (torch.arange(0, dim, 2, dtype=torch.int64).float() / dim)) + self.register_buffer("inv_freq", inv_freq, persistent=False) + + @torch.no_grad() + def forward(self, x, position_ids) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Compute the rotary position embeddings (cos, sin) for the given input tensor. + + Args: + x (torch.Tensor): The input tensor of shape (batch_size, seq_len, num_heads, head_dim). + position_ids (torch.Tensor): The position indices for the sequence. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: The cos and sin embeddings. + """ + # Expand inv_freq to match the batch size + inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.size(0), -1, 1) + position_ids_expanded = position_ids[:, None, :].float() + + # Compute cos and sin embeddings + device_type = x.device.type + device_type = device_type if isinstance(device_type, str) and device_type != "mps" else "cpu" + with torch.autocast(device_type=device_type, enabled=False): + freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2) + emb = torch.cat((freqs, freqs), dim=-1) + cos = emb.cos() + sin = emb.sin() + + # Apply the scaling factor to cos and sin embeddings + cos = cos * self.scaling_factor + sin = sin * self.scaling_factor + + return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype) + +# ------------------ +# Attention Methods +# ------------------ + class MultiHeadAttention(nn.Module): - """Multi-headed attention mechanism.""" + """ + Multi-headed attention mechanism. + + Using the "attention is all you need" implementation. Other implementations will follow. + Ref: https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L277 + Ref: https://pytorch.org/torchtune/0.3/_modules/torchtune/modules/attention.html + """ def __init__( self, @@ -240,7 +291,8 @@ def __init__( rotary_emb, kv_cache: Optional[ttm.KVCache] = None, attention_dropout=0.0, - is_causal=True + is_causal=True, + attention_bias=False ): super().__init__() self.hidden_size = hidden_size @@ -249,42 +301,37 @@ def __init__( self.head_dim = head_dim self.attention_dropout = attention_dropout self.is_causal = is_causal - self.rotary_emb = rotary_emb - - self.q_proj = nn.Linear(hidden_size, num_heads * head_dim, bias=False) - self.k_proj = nn.Linear(hidden_size, num_heads * head_dim, bias=False) - self.v_proj = nn.Linear(hidden_size, num_heads * head_dim, bias=False) - self.o_proj = nn.Linear(num_heads * head_dim, hidden_size, bias=False) - self.kv_cache = kv_cache + # nn layers + self.q_proj = nn.Linear(hidden_size, num_heads * head_dim, bias=attention_bias) + self.k_proj = nn.Linear(hidden_size, num_kv_heads * head_dim, bias=attention_bias) + self.v_proj = nn.Linear(hidden_size, num_kv_heads * head_dim, bias=attention_bias) + self.o_proj = nn.Linear(num_heads * head_dim, hidden_size, bias=attention_bias) + self.rotary_emb = rotary_emb + def forward( self, hidden_states: torch.Tensor, position_ids: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, - position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None - ) -> torch.Tensor: + position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, + cos_sin_unsqueeze: int=1 + ) -> Tuple[torch.Tensor, ttm.KVCache]: batch_size, seq_len, _ = hidden_states.size() - if self.kv_cache is None or self.kv_cache.batch_size != batch_size: - self.kv_cache = ttm.KVCache( - batch_size=batch_size, - max_seq_len=seq_len, - num_heads=self.num_kv_heads, - head_dim=self.head_dim, - dtype=hidden_states.dtype - ) - # Project to queries, keys, and values query_states = self.q_proj(hidden_states) key_states = self.k_proj(hidden_states) value_states = self.v_proj(hidden_states) + print(f"query_states: {query_states.shape}") + print(f"key_states: {key_states.shape}") + print(f"value_states: {value_states.shape}") # Reshape to [batch_size, num_heads, seq_len, head_dim] query_states = query_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2) - key_states = key_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2) - value_states = value_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2) + key_states = key_states.view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2) + value_states = value_states.view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2) print(f"query_states: {query_states.shape}") print(f"key_states: {key_states.shape}") print(f"value_states: {value_states.shape}") @@ -294,99 +341,227 @@ def forward( if position_embeddings is not None: cos, sin = position_embeddings else: - cos, sin = self.rotary_emb(query_states, position_ids) + cos, sin = self.rotary_emb(value_states, position_ids) - # Expand cos and sin to match the shape of query_states - cos = cos[:, :, None, :self.head_dim].expand_as(query_states) - sin = sin[:, :, None, :self.head_dim].expand_as(query_states) + print(f"cos: {cos.shape} | sin: {sin.shape}") + # Expand cos and sin to match hidden_states' shape + cos = cos.unsqueeze(cos_sin_unsqueeze) + sin = sin.unsqueeze(cos_sin_unsqueeze) print(f"cos: {cos.shape} | sin: {sin.shape}") # Apply rotary embeddings to queries and keys query_states = (query_states * cos) + (rotate_half(query_states) * sin) key_states = (key_states * cos) + (rotate_half(key_states) * sin) - - # Repeat keys and values if needed - if self.num_heads > self.num_kv_heads: - n_rep = self.num_heads // self.num_kv_heads - key_states = torch.repeat_interleave(key_states, n_rep, dim=1) - value_states = torch.repeat_interleave(value_states, n_rep, dim=1) - print(f"query_states: {query_states.shape}") print(f"key_states: {key_states.shape}") print(f"value_states: {value_states.shape}") # Forcing caching always enabled + if self.kv_cache is not None: + print(f"self.kv_cache.size {self.kv_cache.size}") + print(f"key_states.size(0) {key_states.size(2)}") + if self.kv_cache is None or self.kv_cache.batch_size != key_states.size(0): + print(f"\n MAKE NEW KVCACHE batch_size={key_states.size(0)} max_seq_len={key_states.size(2)}") + self.kv_cache = ttm.KVCache( + batch_size=key_states.size(0), + max_seq_len=key_states.size(2), + num_heads=self.num_kv_heads, + head_dim=self.head_dim, + dtype=hidden_states.dtype + ) key_states, value_states = self.kv_cache.update(key_states, value_states) + print(f"kv_cache: {self.kv_cache.size}") + print(f"key_states: {key_states.shape}") + print(f"value_states: {value_states.shape}") - # Compute attention scores - attn_weights = torch.matmul(query_states, key_states.transpose(-2, -1)) / math.sqrt(self.head_dim) + # Repeat keys and values if needed + #if self.num_heads > self.num_kv_heads: + n_rep = self.num_heads // self.num_kv_heads + key_states = torch.repeat_interleave(key_states, n_rep, dim=1) + value_states = torch.repeat_interleave(value_states, n_rep, dim=1) + + print(f"query_states: {query_states.shape}") + print(f"key_states: {key_states.shape}") + print(f"value_states: {value_states.shape}") - # Apply causal mask, if applicable - if self.is_causal: - causal_mask = torch.tril(torch.ones((seq_len, seq_len), device=hidden_states.device)) - attn_weights = attn_weights.masked_fill(causal_mask == 0, float('-inf')) + # Compute attention scores + attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim) + print(f"attn_weights: {attn_weights.shape}") # Apply attention mask, if provided if attention_mask is not None: - attn_weights = attn_weights + attention_mask + print(f"attention_mask: {attention_mask.shape}") + causal_mask = attention_mask[:, :, :, : key_states.shape[-2]] + print(f"causal_mask: {causal_mask.shape}") + attn_weights = attn_weights + causal_mask + print(f"attn_weights: {attn_weights.shape}") # Softmax normalization attn_weights = F.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) attn_weights = F.dropout(attn_weights, p=self.attention_dropout, training=self.training) + print(f"attn_weights: {attn_weights.shape}") # Compute attention output attn_output = torch.matmul(attn_weights, value_states) + print(f"attn_output: {attn_output.shape}") - # Reshape to [batch_size, seq_len, hidden_size] - attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, seq_len, -1) + # Transpose attention output + attn_output = attn_output.transpose(1,2).contiguous() + print(f"attn_output: {attn_output.shape}") + + # Reshape [batch_size, seq_len, -1] + attn_output = attn_output.reshape(batch_size, seq_len, -1) + print(f"attn_output after transpose: {attn_output.shape}") # Project back to hidden size attn_output = self.o_proj(attn_output) + print(f"attn_output: {attn_output.shape}") - return attn_output + return attn_output, self.kv_cache -class RotaryEmbedding(nn.Module): - """Rotary Position Embedding.""" +class SDPAttention(nn.Module): + """ + Scaled dot product attention mechanism. - def __init__(self, dim, max_position_embeddings=2048, base=10000, scaling_factor=1.0, rope_type="default", device=None): + Using the scaled dot product attention method from pytorch + Ref: https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L524 + """ + + def __init__( + self, + hidden_size, + num_heads, + num_kv_heads, + head_dim, + rotary_emb, + kv_cache: Optional[ttm.KVCache] = None, + attention_dropout=0.0, + is_causal=True, + attention_bias=False + ): super().__init__() - self.dim = dim - self.max_position_embeddings = max_position_embeddings - self.base = base - self.scaling_factor = scaling_factor - self.rope_type = rope_type + self.hidden_size = hidden_size + self.num_heads = num_heads + self.num_kv_heads = num_kv_heads + self.head_dim = head_dim + self.attention_dropout = attention_dropout + self.is_causal = is_causal + self.kv_cache = kv_cache - # Initialize the inverse frequency for RoPE - inv_freq = 1.0 / (self.base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim)) - self.register_buffer("inv_freq", inv_freq, persistent=False) + # nn layers + self.q_proj = nn.Linear(hidden_size, num_heads * head_dim, bias=attention_bias) + self.k_proj = nn.Linear(hidden_size, num_kv_heads * head_dim, bias=attention_bias) + self.v_proj = nn.Linear(hidden_size, num_kv_heads * head_dim, bias=attention_bias) + self.o_proj = nn.Linear(num_heads * head_dim, hidden_size, bias=attention_bias) + self.rotary_emb = rotary_emb - def forward(self, x, position_ids) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Compute the rotary position embeddings (cos, sin) for the given input tensor. + def forward( + self, + hidden_states: torch.Tensor, + position_ids: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, + cos_sin_unsqueeze: int=1 + ) -> Tuple[torch.Tensor, ttm.KVCache]: + batch_size, seq_len, _ = hidden_states.size() - Args: - x (torch.Tensor): The input tensor of shape (batch_size, seq_len, num_heads, head_dim). - position_ids (torch.Tensor): The position indices for the sequence. + # Project to queries, keys, and values + query_states = self.q_proj(hidden_states) + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + print(f"query_states: {query_states.shape}") + print(f"key_states: {key_states.shape}") + print(f"value_states: {value_states.shape}") - Returns: - Tuple[torch.Tensor, torch.Tensor]: The cos and sin embeddings. - """ - # Expand inv_freq to match the batch size and sequence length - batch_size, seq_len = position_ids.size(0), position_ids.size(1) - inv_freq_expanded = self.inv_freq[None, :, None].expand(batch_size, -1, seq_len) + # Reshape to [batch_size, num_heads, seq_len, head_dim] + query_states = query_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2) + key_states = key_states.view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2) + value_states = value_states.view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2) + print(f"query_states: {query_states.shape}") + print(f"key_states: {key_states.shape}") + print(f"value_states: {value_states.shape}") - # Expand position_ids to match the frequency tensor - position_ids_expanded = position_ids[:, None, :].float() + # Apply rotary positional embeddings if position_ids are provided + # or use position_embeddings + if position_embeddings is not None: + cos, sin = position_embeddings + else: + cos, sin = self.rotary_emb(value_states, position_ids) - # Compute cos and sin embeddings - freqs = torch.einsum("bnd,bnl->bnd", inv_freq_expanded, position_ids_expanded) - emb = torch.cat((freqs, freqs), dim=-1) - cos = emb.cos() - sin = emb.sin() + print(f"cos: {cos.shape} | sin: {sin.shape}") + # Expand cos and sin to match hidden_states' shape + cos = cos.unsqueeze(cos_sin_unsqueeze) + sin = sin.unsqueeze(cos_sin_unsqueeze) + print(f"cos: {cos.shape} | sin: {sin.shape}") - # Apply the scaling factor to cos and sin embeddings - cos = cos * self.scaling_factor - sin = sin * self.scaling_factor + # Apply rotary embeddings to queries and keys + query_states = (query_states * cos) + (rotate_half(query_states) * sin) + key_states = (key_states * cos) + (rotate_half(key_states) * sin) + print(f"query_states: {query_states.shape}") + print(f"key_states: {key_states.shape}") + print(f"value_states: {value_states.shape}") - return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype) + # Forcing caching always enabled + if self.kv_cache is not None: + print(f"self.kv_cache.size {self.kv_cache.size}") + print(f"key_states.size(0) {key_states.size(2)}") + if self.kv_cache is None or self.kv_cache.size != key_states.size(2): + print(f"\n MAKE NEW KVCACHE batch_size={key_states.size(0)} max_seq_len={key_states.size(2)}") + self.kv_cache = ttm.KVCache( + batch_size=key_states.size(0), + max_seq_len=key_states.size(2), + num_heads=self.num_kv_heads, + head_dim=self.head_dim, + dtype=hidden_states.dtype + ) + key_states, value_states = self.kv_cache.update(key_states, value_states) + print(f"kv_cache: {self.kv_cache.size}") + print(f"from kv_cache / key_states: {key_states.shape}") + print(f"from kv_cache / value_states: {value_states.shape}") + + # Repeat keys and values if needed + #if self.num_heads > self.num_kv_heads: + n_rep = self.num_heads // self.num_kv_heads + key_states = torch.repeat_interleave(key_states, n_rep, dim=1) + value_states = torch.repeat_interleave(value_states, n_rep, dim=1) + + print(f"query_states: {query_states.shape}") + print(f"key_states: {key_states.shape}") + print(f"value_states: {value_states.shape}") + + causal_mask = attention_mask + if causal_mask is not None: + causal_mask = causal_mask[:, :, :, : key_states.shape[-2]] + print(f"causal_mask: {causal_mask.shape}") + + if query_states.device.type == "cuda" and causal_mask is not None: + query_states = query_states.contiguous() + key_states = key_states.contiguous() + value_states = value_states.contiguous() + + print(f"query_states: {query_states.shape}") + print(f"key_states: {key_states.shape}") + print(f"value_states: {value_states.shape}") + + is_causal = True if causal_mask is None and seq_len > 1 else False + + attn_output = F.scaled_dot_product_attention( + query_states, + key_states, + value_states, + attn_mask=causal_mask, + dropout_p=self.attention_dropout if self.training else 0.0, + is_causal=is_causal, + ) + + print(f"attn_output: {attn_output.shape}") + + attn_output = attn_output.transpose(1, 2).contiguous() + attn_output = attn_output.view(batch_size, seq_len, -1) + + attn_output = self.o_proj(attn_output) + + print(f"attn_output: {attn_output.shape}") + + return attn_output, self.kv_cache diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py index 973b66da4..d61cf5b40 100644 --- a/exo/inference/torch/tests/test_llama3_model.py +++ b/exo/inference/torch/tests/test_llama3_model.py @@ -9,27 +9,31 @@ from huggingface_hub import snapshot_download from safetensors.torch import load_file as load_safetensors from exo.inference.torch.models.llm_utils import load_model_config, select_next_token -from exo.inference.torch.models.llama3 import LlamaModel, KVCache +from exo.inference.torch.models.llama3 import LlamaModel from exo.inference.shard import Shard MODEL_NAME = "unsloth/Llama-3.2-1B-Instruct" TEMP=0.7 -TOP_K=35 +TOP_K=25 TOP_P=0.9 -def test_generation(model, tokenizer, text, max_length=10): +def test_generation(model, tokenizer, text, max_length=10, config=None): """ Test the generation capabilities of the LlamaModel with sample text. """ # Tokenize input text prompt = tokenizer.apply_chat_template([ + { + "role": "system", + "content": "You are a helpful assistant." + }, { "role": "user", "content": text } ], tokenize=False, add_generation_prompt=True) - + print(f"prompt: {prompt}") inputs = tokenizer(prompt, return_tensors="pt") @@ -39,47 +43,48 @@ def test_generation(model, tokenizer, text, max_length=10): print(f"input_ids: {input_ids}") print(f"attention_mask: {attention_mask}") - # Initialize KVCache for caching - past_kv_cache = None - #past_kv_cache = KVCache( - # batch_size=input_ids.size(0), - # max_seq_len=model.max_position_embeddings, - # num_heads=model.num_heads, - # head_dim=model.head_dim, - # dtype=input_ids.dtype - #) - - #print(f"past_kv_cache: {past_kv_cache}") - # Start with initial input_ids generated_ids = input_ids.clone() # Generate tokens step-by-step + past_kvs = None + + print(f"{model}") + for _ in range(max_length): with torch.no_grad(): - logits, _, past_kv_cache = model( + pred_score, hstates, past_kvs = model( generated_ids, attention_mask=attention_mask, - past_kv_cache=past_kv_cache + past_kv_cache=past_kvs ) - # Select next token using logits - #next_token = select_next_token(logits, top_k=TOP_K, top_p=TOP_P, temperature=TEMP, use_max=False) - next_token = ttg.sample(logits[:, -1, :].clone().float(), temperature=TEMP, top_k=TOP_K).squeeze(-1) + print(f"pred_score: {pred_score.shape}") + print(f"hstates: {hstates.shape if hstates is not None else None}") + print(f"past_kvs: {past_kvs.size if past_kvs is not None else None}") + # Select next token using pred_score + #next_token = select_next_token(pred_score, top_k=TOP_K, top_p=TOP_P, temperature=TEMP, use_max=False) + next_token = ttg.sample(pred_score, temperature=TEMP, top_k=TOP_K)[:, -1, :] print(f"next_token: {next_token}") # Update generated_ids - generated_ids = torch.cat([generated_ids, next_token.unsqueeze(0)], dim=1) + generated_ids = torch.cat([generated_ids, next_token], dim=1) print(f"generated_ids: {generated_ids}") # Check for EOS token - if next_token.item() == tokenizer.eos_token_id: - break + print(f"next_token.item(): {next_token.item()}") + + if config: + print(config["eos_token_id"]) + if next_token.item() in config["eos_token_id"]: + break + else: + if next_token.item() == tokenizer.eos_token_id: + break # Decode generated text generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) - print(f"\nPrompt: {text}") - print(f"\nGenerated Response: {generated_text}") + print(f"\n\n\n\nGenerated Response: {generated_text}") if __name__ == "__main__": print("\nTesting generation:") @@ -101,7 +106,7 @@ def test_generation(model, tokenizer, text, max_length=10): ) # Initialize tokenizer - tokenizer = AutoTokenizer.from_pretrained(cache_dir) + tokenizer = AutoTokenizer.from_pretrained(shard.model_id) # Initialize LlamaModel with config and tokenizer model = LlamaModel(config, shard) @@ -120,7 +125,7 @@ def test_generation(model, tokenizer, text, max_length=10): model.eval() # Set the model to evaluation mode # Sample text for testing - test_text = "What color is a red apple?" + test_text = "Hello" - test_generation(model, tokenizer, test_text) + test_generation(model, tokenizer, test_text, 5, config) From f1822e292196be72f858475e0fd4f72a4927515f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 30 Oct 2024 03:28:03 -0800 Subject: [PATCH 469/589] fixing kvcache for multiheadattention, fixing layers names for loading weights properly --- exo/inference/torch/models/llama3.py | 10 +++++----- exo/inference/torch/models/llm_utils.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 09c162e6e..13958d06e 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -35,7 +35,7 @@ def __init__( super(LlamaBlock, self).__init__() self.self_attn = self_attn self.mlp = mlp - self.input_layer_norm = RMSNorm(dim, eps=rms_norm_eps) + self.input_layernorm = RMSNorm(dim, eps=rms_norm_eps) self.post_attention_norm = RMSNorm(dim, eps=rms_norm_eps) def forward( @@ -64,8 +64,8 @@ def forward( residual = hidden_states # Apply RMSNorm to input - hidden_states = self.input_layer_norm(hidden_states) - print(f"self.input_layer_norm(hidden_states) {hidden_states.shape}") + hidden_states = self.input_layernorm(hidden_states) + print(f"self.input_layernorm(hidden_states) {hidden_states.shape}") #batch_size, seq_len, _ = hidden_states.shape #hidden_states = hidden_states.view(batch_size, seq_len, self.num_heads, self.head_dim).squeeze() @@ -158,7 +158,7 @@ def __init__(self, config: dict, shard: Shard): ) for _ in range(self.num_layers) ]) self.norm = RMSNorm(self.hidden_size, eps=self.rms_norm_eps) - self.rotary_pos_emb = RotaryEmbedding( + self.rotary_emb = RotaryEmbedding( self.head_dim ) self.lm_head = nn.Linear(self.hidden_size, self.vocab_size, bias=False) @@ -233,7 +233,7 @@ def forward( print(f"position_ids: {position_ids.shape}") # Apply rotary positional embeddings - position_embeddings = self.rotary_pos_emb(hidden_states, position_ids) + position_embeddings = self.rotary_emb(hidden_states, position_ids) # Reshape back to (batch_size, seq_len, hidden_size) print(f"hidden_size: {self.hidden_size}") diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index f43c228b0..459823ca2 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -360,7 +360,7 @@ def forward( if self.kv_cache is not None: print(f"self.kv_cache.size {self.kv_cache.size}") print(f"key_states.size(0) {key_states.size(2)}") - if self.kv_cache is None or self.kv_cache.batch_size != key_states.size(0): + if self.kv_cache is None or self.kv_cache.size != key_states.size(2): print(f"\n MAKE NEW KVCACHE batch_size={key_states.size(0)} max_seq_len={key_states.size(2)}") self.kv_cache = ttm.KVCache( batch_size=key_states.size(0), From 38028c06183886e9caae7eddd8ed4a0e14c5a76f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 31 Oct 2024 15:47:53 -0800 Subject: [PATCH 470/589] doing work with position_id and causal mask --- exo/inference/torch/models/llama3.py | 33 ++++++++++------- .../torch/tests/test_llama3_model.py | 37 ++++++++++++++++++- 2 files changed, 56 insertions(+), 14 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 13958d06e..045a790b5 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -71,7 +71,7 @@ def forward( #hidden_states = hidden_states.view(batch_size, seq_len, self.num_heads, self.head_dim).squeeze() #print(f"hidden_states: {hidden_states.shape}") - # Apply MultiHeadAttention with KVCache + # Apply MultiHeadAttention with KVCache hidden_states, kv_cache = self.self_attn( hidden_states=hidden_states, position_ids=position_ids, @@ -132,7 +132,7 @@ def __init__(self, config: dict, shard: Shard): self.padding_idx = config.get("pad_token_id") # Model layers and methods, order matters - self.embed = nn.Embedding(self.vocab_size, self.hidden_size, self.padding_idx) + self.embed_tokens = nn.Embedding(self.vocab_size, self.hidden_size, self.padding_idx) self.layers = nn.ModuleList([ LlamaBlock( dim=self.hidden_size, @@ -191,7 +191,7 @@ def forward( batch_size, seq_len = input_ids.shape # Create initial embeddings - input_embeds = self.embed(input_ids) + input_embeds = self.embed_tokens(input_ids) ## Initialize or use the provided KVCache #if past_kv_cache is None: @@ -216,10 +216,17 @@ def forward( print(f"cache_position: {cache_position.shape}") if position_ids is None: - position_ids = cache_position.unsqueeze(0) + #position_ids = cache_position.unsqueeze(0) + position_ids = attention_mask.long().cumsum(-1) - 1 + position_ids.masked_fill_(attention_mask == 0, 1) - print(f"input_embeds: {input_embeds.shape}") - hidden_states = input_embeds + # cache based input generation + if past_kv_cache is not None: + hidden_states = input_embeds[:, -cache_position.shape[0]:] + else: + hidden_states = input_embeds + + print(f"LM hidden_states: {hidden_states.shape}") # Reshape hidden_states to (batch_size, seq_len, num_heads, head_dim) batch_size, seq_len, _ = hidden_states.shape @@ -247,7 +254,7 @@ def forward( target_len = past_kv_cache.size + seq_len + 1 else: target_len = seq_len + 1 - causal_mask = create_4d_causal_attention_mask( + attention_mask = create_4d_causal_attention_mask( attention_mask=attention_mask, seq_len=seq_len, target_len=target_len, @@ -258,7 +265,6 @@ def forward( ) print(f"attention_mask: {attention_mask.shape}") - print(f"causal_mask: {causal_mask.shape}") # Forward pass through layers with KVCache for layer_idx in range(self.shard.start_layer, self.shard.end_layer): @@ -267,7 +273,7 @@ def forward( print(f"encoder_layer\n{encoder_layer}") layer_hidden_state, layer_kv_cache = self.layers[layer_idx]( hidden_states=hidden_states, - attention_mask=causal_mask, + attention_mask=attention_mask, position_ids=position_ids, position_embeddings=position_embeddings ) @@ -277,15 +283,16 @@ def forward( print(f"layer_kv_cache: {layer_kv_cache.size}") - # Apply final layer normalization - hidden_states = self.norm(hidden_states) - # Compute prediction score from lm head if at end layer if self.shard.is_last_layer(): - pred_score = self.lm_head(hidden_states) + # Apply final layer normalization + hidden_states = self.norm(hidden_states) + pred_score = self.lm_head(hidden_states[:, -1:, :]) else: pred_score = None + print(f"end attention_mask: {attention_mask.shape}") + if pred_score is None: return pred_score, hidden_states, past_kv_cache else: diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py index d61cf5b40..5a79f14d3 100644 --- a/exo/inference/torch/tests/test_llama3_model.py +++ b/exo/inference/torch/tests/test_llama3_model.py @@ -17,6 +17,24 @@ TOP_K=25 TOP_P=0.9 +def check_weights(model, state_dict): + """ + Verifies that the weights from the state dictionary are properly loaded into the model. + """ + model_state_dict = model.state_dict() + for name, param in model_state_dict.items(): + if name in state_dict: + loaded_param = state_dict[name] + if param.shape != loaded_param.shape: + print(f"Shape mismatch for {name}: expected {param.shape}, got {loaded_param.shape}") + else: + print(f"{name}: loaded correctly") + else: + print(f"{name} not found in the state_dict") + + for name in state_dict: + if name not in model_state_dict: + print(f"Unexpected weight {name} found in state_dict") def test_generation(model, tokenizer, text, max_length=10, config=None): """ @@ -110,6 +128,7 @@ def test_generation(model, tokenizer, text, max_length=10, config=None): # Initialize LlamaModel with config and tokenizer model = LlamaModel(config, shard) + print(f"\nmodel: {model}") # Load weights from safetensors files in the cache directory safetensors_files = list(cache_dir.glob("*.safetensors")) @@ -120,8 +139,24 @@ def test_generation(model, tokenizer, text, max_length=10, config=None): for safetensor_file in safetensors_files: print(f"Loading weights from: {safetensor_file}") state_dict = load_safetensors(safetensor_file) - model.load_state_dict(state_dict, strict=False) + # remap to work with our model + remapped_state_dict = {} + for key, value in state_dict.items(): + # Remove the 'model.' prefix if it exists + print(f"remapping: {key}") + if key.startswith('model.'): + new_key = key[len('model.'):] # Remove 'model.' + else: + new_key = key + + remapped_state_dict[new_key] = value + + model.load_state_dict(remapped_state_dict, strict=False) + + check_weights(model, remapped_state_dict) + + #exit() model.eval() # Set the model to evaluation mode # Sample text for testing From 0fd1797d69f7b1be9e198e903090db0345eb0f9d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 31 Oct 2024 16:04:10 -0800 Subject: [PATCH 471/589] updating torch readme with current model in development --- exo/inference/torch/README.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/exo/inference/torch/README.md b/exo/inference/torch/README.md index 43b3782af..2ac5a7436 100644 --- a/exo/inference/torch/README.md +++ b/exo/inference/torch/README.md @@ -51,3 +51,36 @@ GPU 4: NVIDIA Quadro P400 2GB GPU 5: NVIDIA Quadro P400 2GB ``` +## Current Model + +WIP pytorch llama model + +``` +# Llama-3.2-1B-Instruct # + +LlamaModel( + (embed): Embedding(128256, 2048) + (layers): ModuleList( + (0-15): 16 x LlamaBlock( + (self_attn): SDPAttention( + (q_proj): Linear(in_features=2048, out_features=2048, bias=False) + (k_proj): Linear(in_features=2048, out_features=512, bias=False) + (v_proj): Linear(in_features=2048, out_features=512, bias=False) + (o_proj): Linear(in_features=2048, out_features=2048, bias=False) + (rotary_emb): RotaryEmbedding() + ) + (mlp): MultiLayerPreceptron( + (gate_proj): Linear(in_features=2048, out_features=8192, bias=False) + (up_proj): Linear(in_features=2048, out_features=8192, bias=False) + (down_proj): Linear(in_features=8192, out_features=2048, bias=False) + (act_fn): SiLU() + ) + (input_layer_norm): RMSNorm() + (post_attention_norm): RMSNorm() + ) + ) + (norm): RMSNorm() + (rotary_pos_emb): RotaryEmbedding() + (lm_head): Linear(in_features=2048, out_features=128256, bias=False) +) +``` From 5aaffe6f2cc52560113cee2a6af5374617323616 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 2 Nov 2024 09:47:07 -0800 Subject: [PATCH 472/589] implemented using torchtune multiheadattention, added dot product attention but not implemented fully, added RMSNorm from modeling llama on HF, added weight renaming and loading along with handling no lm_head weight in safetensor where you then use embed weight as seen with gpt2, still not generating proper reponses further dev being done --- exo/inference/torch/models/llama3.py | 222 +++++------------ exo/inference/torch/models/llm_utils.py | 224 ++++++++++-------- .../torch/tests/test_llama3_model.py | 52 ++-- 3 files changed, 226 insertions(+), 272 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 045a790b5..5f42e25bf 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -7,18 +7,12 @@ import torch import torch.nn as nn -from torchtune.modules import ( - KVCache, - RMSNorm -) +import torchtune.modules as ttm from exo.inference.shard import Shard from exo.inference.torch.models.llm_utils import ( MultiLayerPreceptron, - #MultiHeadAttention, - SDPAttention, - RotaryEmbedding, - create_4d_causal_attention_mask + RMSNorm ) class LlamaBlock(nn.Module): @@ -27,39 +21,49 @@ class LlamaBlock(nn.Module): """ def __init__( self, - dim, + config, mlp, self_attn, rms_norm_eps=1e-6 ): super(LlamaBlock, self).__init__() + self.config = config self.self_attn = self_attn self.mlp = mlp - self.input_layernorm = RMSNorm(dim, eps=rms_norm_eps) - self.post_attention_norm = RMSNorm(dim, eps=rms_norm_eps) + self.input_layernorm = RMSNorm(self.config['hidden_size'], eps=rms_norm_eps) + self.post_attention_layernorm = RMSNorm(self.config['hidden_size'], eps=rms_norm_eps) def forward( self, hidden_states: torch.Tensor, - position_embeddings: Tuple[torch.Tensor, torch.Tensor], - kv_cache: Optional[KVCache] = None, - attention_mask: Optional[torch.Tensor] = None, + attention_mask: torch.Tensor, position_ids: Optional[torch.Tensor] = None - ) -> Tuple[torch.Tensor, Optional[KVCache]]: + ) -> torch.Tensor: """ Forward pass with integrated attention, resnet and key-value caching. Args: hidden_states (torch.Tensor): Input tensor of shape (batch_size, seq_len, dim). - kv_cache (Optional[KVCache]): KVCache object for managing past key-value states. - attention_mask (Optional[torch.Tensor]): Attention mask of shape (batch_size, 1, 1, seq_len). position_ids (Optional[torch.Tensor]): Position IDs tensor of shape (batch_size, seq_len). Returns: Tuple[torch.Tensor, KVCache]: - Output tensor of shape (batch_size, seq_len, dim). - - Updated KVCache object. """ + if isinstance(self.self_attn, ttm.MultiHeadAttention): + if self.self_attn.kv_cache is None: + # setup cache + self.self_attn.setup_cache( + batch_size=hidden_states.size(0), + dtype=hidden_states.dtype, + max_seq_len=2048, #self.config['max_position_embeddings'] + ) + + # Reshape `attention_mask` to match the expected shape: [batch_size, seq_len, seq_len] + if attention_mask is not None: + attention_mask = attention_mask[:, None, :].expand(-1, hidden_states.size(1), -1).float() + print(f"reshaped attention_mask: {attention_mask.shape}") + # setting up resnet residual = hidden_states @@ -67,16 +71,10 @@ def forward( hidden_states = self.input_layernorm(hidden_states) print(f"self.input_layernorm(hidden_states) {hidden_states.shape}") - #batch_size, seq_len, _ = hidden_states.shape - #hidden_states = hidden_states.view(batch_size, seq_len, self.num_heads, self.head_dim).squeeze() - #print(f"hidden_states: {hidden_states.shape}") - - # Apply MultiHeadAttention with KVCache - hidden_states, kv_cache = self.self_attn( - hidden_states=hidden_states, - position_ids=position_ids, - attention_mask=attention_mask, - position_embeddings=position_embeddings + hidden_states = self.self_attn( + x=hidden_states, + #mask=attention_mask, + input_pos=position_ids ) # Residual connection @@ -85,19 +83,19 @@ def forward( print(f"hidden_states: {hidden_states.shape}") print(f"residual: {residual.shape}") # Post attention normalization - hidden_states = self.post_attention_norm(hidden_states) + hidden_states = self.post_attention_layernorm(hidden_states) # Feed-forward network with MLP and residual connection hidden_states = self.mlp(hidden_states) hidden_states = hidden_states + residual - return hidden_states, kv_cache + return hidden_states class LlamaModel(nn.Module): """ LlamaModel is a pure PyTorch implementation of the LLaMA architecture """ - def __init__(self, config: dict, shard: Shard): + def __init__(self, config: dict, shard: Shard, is_causal=True): """ Initialize the LlamaModel. @@ -129,37 +127,42 @@ def __init__(self, config: dict, shard: Shard): self.rms_norm_eps = config['rms_norm_eps'] self.head_dim = config['head_dim'] self.attention_dropout = config.get('attention_dropout', 0.0) + self.attention_bias = config.get('attention_bias', False) self.padding_idx = config.get("pad_token_id") + self.has_lm_head_weight = False # Model layers and methods, order matters - self.embed_tokens = nn.Embedding(self.vocab_size, self.hidden_size, self.padding_idx) + self.embed_tokens = nn.Embedding(self.vocab_size, self.hidden_size, self.padding_idx) self.layers = nn.ModuleList([ LlamaBlock( - dim=self.hidden_size, + config=self.config, rms_norm_eps=self.rms_norm_eps, - self_attn=SDPAttention( - hidden_size=self.hidden_size, + self_attn=ttm.MultiHeadAttention( + embed_dim=self.hidden_size, num_heads=self.num_heads, - num_kv_heads=self.num_kv_heads, - head_dim=self.hidden_size // self.num_heads, - is_causal=True, - attention_dropout=self.attention_dropout, - rotary_emb=RotaryEmbedding( - self.head_dim - ), - attention_bias=config.get('attention_bias', False) + num_kv_heads=self.num_heads, + head_dim= self.hidden_size // self.num_heads, + q_proj=nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=self.attention_bias), + k_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=self.attention_bias), + v_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=self.attention_bias), + output_proj=nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=self.attention_bias), + max_seq_len=2048, #self.max_position_embeddings, + is_causal=is_causal, + attn_dropout=self.attention_dropout ), mlp=MultiLayerPreceptron( input_dim=self.hidden_size, hidden_dim=self.intermediate_size, activation=self.config.get("hidden_act", "silu"), use_bias=self.config.get("mlp_bias", False) - ), + ) ) for _ in range(self.num_layers) ]) - self.norm = RMSNorm(self.hidden_size, eps=self.rms_norm_eps) - self.rotary_emb = RotaryEmbedding( - self.head_dim + self.norm = RMSNorm(hidden_size=self.hidden_size, eps=self.rms_norm_eps) + self.rotary_emb = ttm.RotaryPositionalEmbeddings( + dim=self.hidden_size // self.num_heads, + max_seq_len=2048, #self.max_position_embeddings, + base=self.config.get('rope_theta', 10000) ) self.lm_head = nn.Linear(self.hidden_size, self.vocab_size, bias=False) @@ -168,132 +171,33 @@ def forward( input_ids: torch.Tensor, attention_mask: torch.Tensor, position_ids: Optional[torch.Tensor] = None, - cache_position: Optional[torch.Tensor] = None, - past_kv_cache: Optional[KVCache] = None, - ) -> Tuple[Optional[torch.Tensor], Optional[Tuple[torch.Tensor]], Optional[KVCache]]: - """ - Forward pass with integrated position ID handling, attention mask, and optional KVCache. - - Args: - input_ids (torch.Tensor): Input token IDs of shape (batch_size, seq_len). - attention_mask (Optional[torch.Tensor]): Attention mask of shape (batch_size, seq_len). - position_ids (Optional[torch.Tensor]): Position IDs. If None, they are calculated automatically. - cache_position (Optional[torch.LongTensor]): the positions of inputs in the sequence - past_kv_cache (Optional[KVCache]): Optional KVCache for efficient generation. - If provided, it stores past key-value states for faster autoregressive inference. - - Returns: - Tuple[torch.Tensor, Optional[Tuple[torch.Tensor]], KVCache]: - - pred_score (Optional[torch.Tensor]): Prediction scores from lm_head of model. - - hidden_states (Optional[torch.Tensor]): Hidden states from each layer - - past_kv_cache (KVCache): Updated KVCache object. - """ - batch_size, seq_len = input_ids.shape + ) -> Tuple[Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + _, seq_len = input_ids.shape - # Create initial embeddings input_embeds = self.embed_tokens(input_ids) - ## Initialize or use the provided KVCache - #if past_kv_cache is None: - # past_kv_cache = KVCache( - # batch_size=batch_size, - # max_seq_len=self.max_position_embeddings, - # num_heads=self.num_heads, - # head_dim=self.head_dim, - # dtype=input_embeds.dtype - # ) - - # Initialize position IDs if not provided - if cache_position is None: - past_seen_tokens = past_kv_cache.size if past_kv_cache is not None else 0 - cache_position = torch.arange( - past_seen_tokens, - past_seen_tokens + input_embeds.shape[1], - device=input_ids.device - ) - #.unsqueeze(0).expand(batch_size, -1) - - print(f"cache_position: {cache_position.shape}") - if position_ids is None: - #position_ids = cache_position.unsqueeze(0) position_ids = attention_mask.long().cumsum(-1) - 1 position_ids.masked_fill_(attention_mask == 0, 1) + position_ids = position_ids[:, -seq_len:] - # cache based input generation - if past_kv_cache is not None: - hidden_states = input_embeds[:, -cache_position.shape[0]:] - else: - hidden_states = input_embeds - - print(f"LM hidden_states: {hidden_states.shape}") - - # Reshape hidden_states to (batch_size, seq_len, num_heads, head_dim) - batch_size, seq_len, _ = hidden_states.shape - hidden_states = hidden_states.view(batch_size, seq_len, self.num_heads, self.head_dim) + print(f"LM input_embeds: {input_embeds.shape}") + print(f"LM attention_mask: {attention_mask.shape}") - # Reshape position_ids to match (batch_size, seq_len) - if position_ids.dim() != 2: - position_ids = position_ids.squeeze(0) + hidden_states = input_embeds - print(f"hidden_states: {hidden_states.shape}") - print(f"position_ids: {position_ids.shape}") - - # Apply rotary positional embeddings - position_embeddings = self.rotary_emb(hidden_states, position_ids) - - # Reshape back to (batch_size, seq_len, hidden_size) - print(f"hidden_size: {self.hidden_size}") - hidden_states = hidden_states.view(batch_size, seq_len, self.hidden_size) - print(f"hidden_states: {hidden_states.shape}") - - # create/update 4d causal mask - seq_len = input_embeds.shape[1] - - if past_kv_cache is not None: - target_len = past_kv_cache.size + seq_len + 1 - else: - target_len = seq_len + 1 - attention_mask = create_4d_causal_attention_mask( - attention_mask=attention_mask, - seq_len=seq_len, - target_len=target_len, - dtype=input_embeds.dtype, - device=input_embeds.device, - cache_pos=cache_position, - batch_size=input_embeds.size(0) - ) - - print(f"attention_mask: {attention_mask.shape}") - - # Forward pass through layers with KVCache for layer_idx in range(self.shard.start_layer, self.shard.end_layer): - print(f"forward layer #{layer_idx}") - encoder_layer = self.layers[layer_idx] - print(f"encoder_layer\n{encoder_layer}") - layer_hidden_state, layer_kv_cache = self.layers[layer_idx]( - hidden_states=hidden_states, + #print(f"forward layer #{layer_idx}") + #print(f"{self.layers[layer_idx]}") + hidden_states = self.layers[layer_idx]( + hidden_states=input_embeds, attention_mask=attention_mask, position_ids=position_ids, - position_embeddings=position_embeddings ) - hidden_states = layer_hidden_state - past_kv_cache = layer_kv_cache - - print(f"layer_kv_cache: {layer_kv_cache.size}") - - # Compute prediction score from lm head if at end layer if self.shard.is_last_layer(): - # Apply final layer normalization - hidden_states = self.norm(hidden_states) - pred_score = self.lm_head(hidden_states[:, -1:, :]) - else: - pred_score = None + pred_score = self.lm_head(self.norm(hidden_states)[:, -1:, :]) - print(f"end attention_mask: {attention_mask.shape}") + return pred_score, None - if pred_score is None: - return pred_score, hidden_states, past_kv_cache - else: - return pred_score, None, past_kv_cache + return None, hidden_states diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 459823ca2..b4c0658ad 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -11,8 +11,6 @@ import torchtune.modules as ttm import math -from transformers.models.mamba.modeling_mamba import causal_conv1d_update - from exo.helpers import DEBUG def load_model_config(model_config_path: Path) -> dict: @@ -90,60 +88,6 @@ def select_next_token( return next_token -class MultiLayerPreceptron(nn.Module): - def __init__( - self, - input_dim, - hidden_dim, - activation='gelu', - use_bias=False - ): - """ - General MLP (Multi-Layer Perceptron) module. - - Args: - input_dim (int): Dimensionality of the input. - hidden_dims (int): Hidden layer/intermediate dimensions. - output_dim (int): Dimensionality of the output. - activation (str): Activation function ('relu', 'gelu', 'tanh', 'sigmoid', etc.). - dropout (float): Dropout probability. - use_batchnorm (bool): Whether to use batch normalization. - """ - super(MultiLayerPreceptron, self).__init__() - - # Activation function mapping - activations = { - 'relu': nn.ReLU(), - 'gelu': nn.GELU(), - 'tanh': nn.Tanh(), - 'sigmoid': nn.Sigmoid(), - 'leaky_relu': nn.LeakyReLU(0.2), - 'silu': nn.SiLU() - } - - # Ensure valid activation - if activation not in activations: - raise ValueError(f"Invalid activation: {activation}. Choose from {list(activations.keys())}") - - # Construct MLP layers - self.gate_proj = nn.Linear(input_dim, hidden_dim, bias=use_bias) - self.up_proj = nn.Linear(input_dim, hidden_dim, bias=use_bias) - self.down_proj = nn.Linear(hidden_dim, input_dim, bias=use_bias) - self.act_fn = activations[activation] - - def forward(self, x) -> torch.Tensor: - """ - Forward pass for the MLP module. - - Args: - x (torch.Tensor): Input tensor. - - Returns: - torch.Tensor: Output tensor after the MLP transformations. - """ - down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x)) - return down_proj - def create_4d_causal_attention_mask( attention_mask: torch.Tensor, seq_len: int, @@ -269,6 +213,83 @@ def forward(self, x, position_ids) -> Tuple[torch.Tensor, torch.Tensor]: return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype) +class MultiLayerPreceptron(nn.Module): + def __init__( + self, + input_dim, + hidden_dim, + activation='silu', + use_bias=False + ): + """ + General MLP (Multi-Layer Perceptron) module. + + Args: + input_dim (int): Dimensionality of the input. + hidden_dims (int): Hidden layer/intermediate dimensions. + output_dim (int): Dimensionality of the output. + activation (str): Activation function ('relu', 'gelu', 'tanh', 'sigmoid', etc.). + dropout (float): Dropout probability. + use_batchnorm (bool): Whether to use batch normalization. + """ + super(MultiLayerPreceptron, self).__init__() + + # Activation function mapping + activations = { + 'relu': nn.ReLU(), + 'gelu': nn.GELU(), + 'tanh': nn.Tanh(), + 'sigmoid': nn.Sigmoid(), + 'leaky_relu': nn.LeakyReLU(0.2), + 'silu': nn.SiLU() + } + + # Ensure valid activation + if activation not in activations: + raise ValueError(f"Invalid activation: {activation}. Choose from {list(activations.keys())}") + + # Construct MLP layers + self.gate_proj = nn.Linear(input_dim, hidden_dim, bias=use_bias) + self.up_proj = nn.Linear(input_dim, hidden_dim, bias=use_bias) + self.down_proj = nn.Linear(hidden_dim, input_dim, bias=use_bias) + self.act_fn = activations[activation] + + def forward(self, x) -> torch.Tensor: + """ + Forward pass for the MLP module. + + Args: + x (torch.Tensor): Input tensor. + + Returns: + torch.Tensor: Output tensor after the MLP transformations. + """ + + return self.down_proj( + self.act_fn( + self.gate_proj(x) + ) * self.up_proj(x) + ) + +class RMSNorm(nn.Module): + def __init__(self, hidden_size, eps=1e-6): + """ + RMSNorm + """ + super().__init__() + self.weight = nn.Parameter(torch.ones(hidden_size)) + self.variance_epsilon = eps + + def forward(self, hidden_states): + input_dtype = hidden_states.dtype + hidden_states = hidden_states.to(torch.float32) + variance = hidden_states.pow(2).mean(-1, keepdim=True) + hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon) + return self.weight * hidden_states.to(input_dtype) + + def extra_repr(self): + return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}" + # ------------------ # Attention Methods # ------------------ @@ -289,7 +310,6 @@ def __init__( num_kv_heads, head_dim, rotary_emb, - kv_cache: Optional[ttm.KVCache] = None, attention_dropout=0.0, is_causal=True, attention_bias=False @@ -301,7 +321,6 @@ def __init__( self.head_dim = head_dim self.attention_dropout = attention_dropout self.is_causal = is_causal - self.kv_cache = kv_cache # nn layers self.q_proj = nn.Linear(hidden_size, num_heads * head_dim, bias=attention_bias) @@ -316,8 +335,9 @@ def forward( position_ids: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, + kv_cache: Optional[ttm.KVCache] = None, cos_sin_unsqueeze: int=1 - ) -> Tuple[torch.Tensor, ttm.KVCache]: + ) -> Tuple[torch.Tensor, Optional[ttm.KVCache]]: batch_size, seq_len, _ = hidden_states.size() # Project to queries, keys, and values @@ -357,22 +377,25 @@ def forward( print(f"value_states: {value_states.shape}") # Forcing caching always enabled - if self.kv_cache is not None: - print(f"self.kv_cache.size {self.kv_cache.size}") - print(f"key_states.size(0) {key_states.size(2)}") - if self.kv_cache is None or self.kv_cache.size != key_states.size(2): - print(f"\n MAKE NEW KVCACHE batch_size={key_states.size(0)} max_seq_len={key_states.size(2)}") - self.kv_cache = ttm.KVCache( - batch_size=key_states.size(0), - max_seq_len=key_states.size(2), - num_heads=self.num_kv_heads, - head_dim=self.head_dim, - dtype=hidden_states.dtype - ) - key_states, value_states = self.kv_cache.update(key_states, value_states) - print(f"kv_cache: {self.kv_cache.size}") - print(f"key_states: {key_states.shape}") - print(f"value_states: {value_states.shape}") + if kv_cache is not None: + #print(f"kv_cache.size {kv_cache.size}") + + #print(f"key_states.size(2) {key_states.size(2)}") + + #if kv_cache.size != key_states.size(2): + # print(f"\n MAKE NEW KVCACHE batch_size={key_states.size(0)} max_seq_len={key_states.size(2)}") + # kv_cache = ttm.KVCache( + # batch_size=key_states.size(0), + # max_seq_len=key_states.size(2), + # num_heads=self.num_kv_heads, + # head_dim=self.head_dim, + # dtype=hidden_states.dtype + # ) + + key_states, value_states = kv_cache.update(key_states, value_states) + print(f"kv_cache: {kv_cache.size}") + print(f"key_states: {key_states.shape}") + print(f"value_states: {value_states.shape}") # Repeat keys and values if needed #if self.num_heads > self.num_kv_heads: @@ -417,7 +440,7 @@ def forward( attn_output = self.o_proj(attn_output) print(f"attn_output: {attn_output.shape}") - return attn_output, self.kv_cache + return attn_output, kv_cache class SDPAttention(nn.Module): """ @@ -434,7 +457,6 @@ def __init__( num_kv_heads, head_dim, rotary_emb, - kv_cache: Optional[ttm.KVCache] = None, attention_dropout=0.0, is_causal=True, attention_bias=False @@ -446,7 +468,6 @@ def __init__( self.head_dim = head_dim self.attention_dropout = attention_dropout self.is_causal = is_causal - self.kv_cache = kv_cache # nn layers self.q_proj = nn.Linear(hidden_size, num_heads * head_dim, bias=attention_bias) @@ -461,8 +482,9 @@ def forward( position_ids: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, + kv_cache: Optional[ttm.KVCache] = None, cos_sin_unsqueeze: int=1 - ) -> Tuple[torch.Tensor, ttm.KVCache]: + ) -> Tuple[torch.Tensor, Optional[ttm.KVCache]]: batch_size, seq_len, _ = hidden_states.size() # Project to queries, keys, and values @@ -502,22 +524,30 @@ def forward( print(f"value_states: {value_states.shape}") # Forcing caching always enabled - if self.kv_cache is not None: - print(f"self.kv_cache.size {self.kv_cache.size}") - print(f"key_states.size(0) {key_states.size(2)}") - if self.kv_cache is None or self.kv_cache.size != key_states.size(2): - print(f"\n MAKE NEW KVCACHE batch_size={key_states.size(0)} max_seq_len={key_states.size(2)}") - self.kv_cache = ttm.KVCache( - batch_size=key_states.size(0), - max_seq_len=key_states.size(2), - num_heads=self.num_kv_heads, - head_dim=self.head_dim, - dtype=hidden_states.dtype - ) - key_states, value_states = self.kv_cache.update(key_states, value_states) - print(f"kv_cache: {self.kv_cache.size}") - print(f"from kv_cache / key_states: {key_states.shape}") - print(f"from kv_cache / value_states: {value_states.shape}") + if kv_cache is not None: + #print(f"kv_cache.size {kv_cache.size}") + #print(f"key_states.size(0) {key_states.size(2)}") + + #if kv_cache.size != key_states.size(2): + # print(f"\n MAKE NEW KVCACHE batch_size={key_states.size(0)} max_seq_len={key_states.size(2)}") + # kv_cache = ttm.KVCache( + # batch_size=key_states.size(0), + # max_seq_len=key_states.size(2), + # num_heads=self.num_kv_heads, + # head_dim=self.head_dim, + # dtype=hidden_states.dtype + # ) + + key_states, value_states = kv_cache.update(key_states, value_states) + + # **Slice KVCache to match `query_states` length** + key_states = key_states[:, :, :seq_len, :] + value_states = value_states[:, :, :seq_len, :] + + # kv_cache.update(key_states, value_states) + print(f"kv_cache: {kv_cache.size}") + print(f"from kv_cache / key_states: {key_states.shape}") + print(f"from kv_cache / value_states: {value_states.shape}") # Repeat keys and values if needed #if self.num_heads > self.num_kv_heads: @@ -550,7 +580,7 @@ def forward( key_states, value_states, attn_mask=causal_mask, - dropout_p=self.attention_dropout if self.training else 0.0, + dropout_p=0.0, is_causal=is_causal, ) @@ -563,5 +593,5 @@ def forward( print(f"attn_output: {attn_output.shape}") - return attn_output, self.kv_cache + return attn_output, kv_cache diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py index 5a79f14d3..b5f07d008 100644 --- a/exo/inference/torch/tests/test_llama3_model.py +++ b/exo/inference/torch/tests/test_llama3_model.py @@ -1,6 +1,7 @@ """ Test of pytorch based llama3 model """ +import re from pathlib import Path import torch @@ -14,7 +15,7 @@ MODEL_NAME = "unsloth/Llama-3.2-1B-Instruct" TEMP=0.7 -TOP_K=25 +TOP_K=35 TOP_P=0.9 def check_weights(model, state_dict): @@ -22,6 +23,7 @@ def check_weights(model, state_dict): Verifies that the weights from the state dictionary are properly loaded into the model. """ model_state_dict = model.state_dict() + print(f"model_state_dict: {model_state_dict.keys()}") for name, param in model_state_dict.items(): if name in state_dict: loaded_param = state_dict[name] @@ -29,8 +31,8 @@ def check_weights(model, state_dict): print(f"Shape mismatch for {name}: expected {param.shape}, got {loaded_param.shape}") else: print(f"{name}: loaded correctly") - else: - print(f"{name} not found in the state_dict") + #else: + # print(f"{name} not found in the state_dict") for name in state_dict: if name not in model_state_dict: @@ -65,44 +67,46 @@ def test_generation(model, tokenizer, text, max_length=10, config=None): generated_ids = input_ids.clone() # Generate tokens step-by-step - past_kvs = None - print(f"{model}") for _ in range(max_length): with torch.no_grad(): - pred_score, hstates, past_kvs = model( + pred_score, hstates = model( generated_ids, - attention_mask=attention_mask, - past_kv_cache=past_kvs + attention_mask=attention_mask ) + print("\n\n------------------------------------------------------") print(f"pred_score: {pred_score.shape}") print(f"hstates: {hstates.shape if hstates is not None else None}") - print(f"past_kvs: {past_kvs.size if past_kvs is not None else None}") # Select next token using pred_score - #next_token = select_next_token(pred_score, top_k=TOP_K, top_p=TOP_P, temperature=TEMP, use_max=False) - next_token = ttg.sample(pred_score, temperature=TEMP, top_k=TOP_K)[:, -1, :] + next_token = select_next_token(pred_score, top_k=TOP_K, top_p=TOP_P, temperature=TEMP, use_max=False) + #next_token = ttg.sample(pred_score, temperature=TEMP, top_k=TOP_K)[:, -1, :] print(f"next_token: {next_token}") # Update generated_ids generated_ids = torch.cat([generated_ids, next_token], dim=1) - print(f"generated_ids: {generated_ids}") + print(f"generated_ids: {generated_ids.shape}") + + # Update attention mask + #attention_mask = torch.cat([attention_mask, torch.ones((attention_mask.size(0), 1), device=attention_mask.device)], dim=1) + print(f"attention_mask: {attention_mask.shape}") # Check for EOS token print(f"next_token.item(): {next_token.item()}") if config: - print(config["eos_token_id"]) if next_token.item() in config["eos_token_id"]: break else: if next_token.item() == tokenizer.eos_token_id: break - # Decode generated text - generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) - print(f"\n\n\n\nGenerated Response: {generated_text}") + # Decode generated text + generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) + print(f"\n\n\n\nGenerated Response: {generated_text}") + + print("\n\n------------------------------------------------------") if __name__ == "__main__": print("\nTesting generation:") @@ -142,6 +146,7 @@ def test_generation(model, tokenizer, text, max_length=10, config=None): # remap to work with our model remapped_state_dict = {} + tied_embed_weight = None for key, value in state_dict.items(): # Remove the 'model.' prefix if it exists print(f"remapping: {key}") @@ -150,8 +155,23 @@ def test_generation(model, tokenizer, text, max_length=10, config=None): else: new_key = key + # change o_proj to output_proj + re_o_proj = re.findall(r'layers.(\d+).(\w+).(o_proj).(\w+)', new_key) + if len(re_o_proj) != 0: + new_key = f"layers.{re_o_proj[0][0]}.{re_o_proj[0][1]}.output_proj.weight" + remapped_state_dict[new_key] = value + # saving embed for tied weights + if new_key == 'embed_tokens.weight': + tied_embed_weight = value + + if new_key == 'lm_head.weight': + model.has_lm_head_weight = True + + if not model.has_lm_head_weight: + remapped_state_dict['lm_head.weight'] = tied_embed_weight + model.load_state_dict(remapped_state_dict, strict=False) check_weights(model, remapped_state_dict) From b2b63c3c863fca640c75237775699a3856da1d3b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 3 Nov 2024 04:55:36 -0900 Subject: [PATCH 473/589] FINALLY A WORKING PYTORCH ONLY MODEL, working on logit gen, shard testing and then inference engine testing but we are almost there. HELL YEAAAAAAAAAAA --- exo/inference/torch/models/llama3.py | 238 ++++++++-------- exo/inference/torch/models/llm_utils.py | 200 +++++++++----- .../torch/tests/test_llama3_model.py | 257 +++++++++--------- 3 files changed, 381 insertions(+), 314 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 5f42e25bf..2feb5e189 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -3,11 +3,13 @@ Written with pytorch using torchtune and other methods """ -from typing import Optional, Tuple +from typing import Tuple, List import torch import torch.nn as nn import torchtune.modules as ttm +import torchtune.generation as ttg +from torchtune.models.llama3_1 import Llama3ScaledRoPE from exo.inference.shard import Shard from exo.inference.torch.models.llm_utils import ( @@ -37,7 +39,7 @@ def forward( self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, - position_ids: Optional[torch.Tensor] = None + max_seq_len: int = 2048 ) -> torch.Tensor: """ Forward pass with integrated attention, resnet and key-value caching. @@ -56,148 +58,146 @@ def forward( self.self_attn.setup_cache( batch_size=hidden_states.size(0), dtype=hidden_states.dtype, - max_seq_len=2048, #self.config['max_position_embeddings'] + max_seq_len=max_seq_len, #self.config['max_position_embeddings'] ) - # Reshape `attention_mask` to match the expected shape: [batch_size, seq_len, seq_len] - if attention_mask is not None: - attention_mask = attention_mask[:, None, :].expand(-1, hidden_states.size(1), -1).float() - print(f"reshaped attention_mask: {attention_mask.shape}") - - # setting up resnet - residual = hidden_states - # Apply RMSNorm to input hidden_states = self.input_layernorm(hidden_states) print(f"self.input_layernorm(hidden_states) {hidden_states.shape}") + # get causal mask from attention mask + causal_mask = ttg.get_causal_mask_from_padding_mask( + attention_mask.bool(), + max_seq_len + ) + + print(f"causal_mask: {causal_mask.shape}") + + # get position_ids from attention mask + position_ids = ttg.get_position_ids_from_padding_mask( + attention_mask.bool() + ) + + print(f"position_ids: {position_ids.shape}") + hidden_states = self.self_attn( x=hidden_states, - #mask=attention_mask, - input_pos=position_ids + y=hidden_states, + mask=causal_mask, + #input_pos=position_ids ) # Residual connection - hidden_states = residual + hidden_states - residual = hidden_states print(f"hidden_states: {hidden_states.shape}") - print(f"residual: {residual.shape}") # Post attention normalization hidden_states = self.post_attention_layernorm(hidden_states) # Feed-forward network with MLP and residual connection hidden_states = self.mlp(hidden_states) - hidden_states = hidden_states + residual return hidden_states -class LlamaModel(nn.Module): +def LlamaModel( + config: dict, + shard: Shard, + is_causal: bool=True, + max_seq_len: int=4096 +): """ - LlamaModel is a pure PyTorch implementation of the LLaMA architecture + LlamaModel using torchtune """ - - def __init__(self, config: dict, shard: Shard, is_causal=True): - """ - Initialize the LlamaModel. - - Args: - config (dict): Configuration dictionary containing model parameters. - - hidden_size (int): Size of the hidden layers. - - num_hidden_layers (int): Number of transformer layers. - - num_attention_heads (int): Number of attention heads. - - intermediate_size (int): Size of the intermediate (feed-forward) layers. - - vocab_size (int): Vocabulary size for the embedding layer. - - max_position_embeddings (int): Maximum number of positional embeddings. - - rms_norm_eps (float): Epsilon for RMS normalization. - - head_dim (int): Dimension of each attention head. - - attention_dropout (float): Dropout rate for attention layers. - """ - super(LlamaModel, self).__init__() - - self.shard = shard - - # Load configurations from config - self.config = config - self.hidden_size = config['hidden_size'] - self.num_layers = config['num_hidden_layers'] - self.num_heads = config['num_attention_heads'] - self.num_kv_heads = config['num_key_value_heads'] - self.intermediate_size = config['intermediate_size'] - self.vocab_size = config['vocab_size'] - self.max_position_embeddings = config['max_position_embeddings'] - self.rms_norm_eps = config['rms_norm_eps'] - self.head_dim = config['head_dim'] - self.attention_dropout = config.get('attention_dropout', 0.0) - self.attention_bias = config.get('attention_bias', False) - self.padding_idx = config.get("pad_token_id") - self.has_lm_head_weight = False - - # Model layers and methods, order matters - self.embed_tokens = nn.Embedding(self.vocab_size, self.hidden_size, self.padding_idx) - self.layers = nn.ModuleList([ - LlamaBlock( - config=self.config, - rms_norm_eps=self.rms_norm_eps, - self_attn=ttm.MultiHeadAttention( - embed_dim=self.hidden_size, - num_heads=self.num_heads, - num_kv_heads=self.num_heads, - head_dim= self.hidden_size // self.num_heads, - q_proj=nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=self.attention_bias), - k_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=self.attention_bias), - v_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=self.attention_bias), - output_proj=nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=self.attention_bias), - max_seq_len=2048, #self.max_position_embeddings, - is_causal=is_causal, - attn_dropout=self.attention_dropout - ), - mlp=MultiLayerPreceptron( - input_dim=self.hidden_size, - hidden_dim=self.intermediate_size, - activation=self.config.get("hidden_act", "silu"), - use_bias=self.config.get("mlp_bias", False) - ) - ) for _ in range(self.num_layers) - ]) - self.norm = RMSNorm(hidden_size=self.hidden_size, eps=self.rms_norm_eps) - self.rotary_emb = ttm.RotaryPositionalEmbeddings( - dim=self.hidden_size // self.num_heads, - max_seq_len=2048, #self.max_position_embeddings, - base=self.config.get('rope_theta', 10000) + print(shard) + + # Load configurations from config + rope_scaling = config.get("rope_scaling") + hidden_head_dim = config["hidden_size"] // config["num_attention_heads"] + + # Model layers and methods, order matters + embed_tokens = nn.Embedding( + config["vocab_size"], + config["hidden_size"] + ) + + layers = [] + for _ in range(shard.n_layers): + pos_embeddings = Llama3ScaledRoPE( + dim=hidden_head_dim, + max_seq_len=max_seq_len, + base=config.get('rope_theta', 10000), + scale_factor=rope_scaling['factor'] if rope_scaling else 32 ) - self.lm_head = nn.Linear(self.hidden_size, self.vocab_size, bias=False) - - def forward( - self, - input_ids: torch.Tensor, - attention_mask: torch.Tensor, - position_ids: Optional[torch.Tensor] = None, - ) -> Tuple[Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: - _, seq_len = input_ids.shape - input_embeds = self.embed_tokens(input_ids) - - if position_ids is None: - position_ids = attention_mask.long().cumsum(-1) - 1 - position_ids.masked_fill_(attention_mask == 0, 1) - position_ids = position_ids[:, -seq_len:] - - print(f"LM input_embeds: {input_embeds.shape}") - print(f"LM attention_mask: {attention_mask.shape}") + self_attn = ttm.MultiHeadAttention( + embed_dim=config["hidden_size"], + num_heads=config["num_attention_heads"], + num_kv_heads=config["num_key_value_heads"], + head_dim=hidden_head_dim, + q_proj=nn.Linear( + config["hidden_size"], + config["num_attention_heads"] * config["head_dim"], + bias=config.get('attention_bias', False) + ), + k_proj = nn.Linear( + config["hidden_size"], + config["num_key_value_heads"] * config["head_dim"], + bias=config.get('attention_bias', False) + ), + v_proj = nn.Linear( + config["hidden_size"], + config["num_key_value_heads"] * config["head_dim"], + bias=config.get('attention_bias', False) + ), + output_proj=nn.Linear( + config["hidden_size"], + config["hidden_size"], + bias=config.get('attention_bias', False) + ), + max_seq_len=max_seq_len, + is_causal=is_causal, + attn_dropout=config.get('attention_dropout', 0.0), + pos_embeddings=pos_embeddings + ) - hidden_states = input_embeds + mlp = MultiLayerPreceptron( + config["hidden_size"], + config['intermediate_size'], + 'silu' + ) - for layer_idx in range(self.shard.start_layer, self.shard.end_layer): - #print(f"forward layer #{layer_idx}") - #print(f"{self.layers[layer_idx]}") - hidden_states = self.layers[layer_idx]( - hidden_states=input_embeds, - attention_mask=attention_mask, - position_ids=position_ids, - ) + layer = ttm.TransformerSelfAttentionLayer( + attn=self_attn, + mlp=mlp, + sa_norm=RMSNorm(config["hidden_size"], eps=config["rms_norm_eps"]), + mlp_norm=RMSNorm(config["hidden_size"], eps=config["rms_norm_eps"]) + ) - if self.shard.is_last_layer(): - pred_score = self.lm_head(self.norm(hidden_states)[:, -1:, :]) + layers.append(layer) + + return ttm.TransformerDecoder( + tok_embeddings=embed_tokens, + layers=nn.ModuleList(layers), + max_seq_len=max_seq_len, + num_heads=config["num_attention_heads"], + head_dim=config["head_dim"], + norm=RMSNorm(config["hidden_size"], eps=config["rms_norm_eps"]), + output=nn.Linear(config["hidden_size"], config["vocab_size"]), + num_layers=shard.n_layers, + #output_hidden_states=list(range(shard.start_layer, shard.end_layer)) + ) + +class ShardedLlamaModel(nn.Module): + def __init__(self, config: dict, shard: Shard, is_causal=True): + super(ShardedLlamaModel, self).__init__() - return pred_score, None + self.shard = shard + self.config = config + self.model = LlamaModel(config, shard, is_causal) - return None, hidden_states + def generate( + self, + prompt: torch.Tensor + ): + """ + move login being done in test_llama3_model for generation to here + along with test sharding + """ + pass diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index b4c0658ad..0946890ed 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -1,9 +1,10 @@ """ Utility methods used by LLMs """ +import re import json from pathlib import Path -from typing import Optional, Tuple +from typing import Any, Optional, Tuple import torch import torch.nn as nn @@ -11,7 +12,18 @@ import torchtune.modules as ttm import math +from safetensors.torch import load_file as load_safetensors + +from transformers import ( + LogitsProcessorList, + TopKLogitsWarper, + TopPLogitsWarper, + TemperatureLogitsWarper +) +from transformers.cache_utils import Cache, DynamicCache + from exo.helpers import DEBUG +from exo.inference.shard import Shard def load_model_config(model_config_path: Path) -> dict: """ @@ -28,65 +40,127 @@ def load_model_config(model_config_path: Path) -> dict: model_config = json.load(f) return model_config -def select_next_token( - logits, - top_k=0, - top_p=0.0, - temperature=1.0, - use_max=False, -): +def check_weights(model, state_dict): """ - Selects the next token from logits using top-k, top-p, and temperature scaling. - - Args: - logits (torch.Tensor): Logits or prediction scores tensor of shape (batch_size, vocab_size). - top_k (int): Number of top logits to consider for sampling. - top_p (float): Cumulative probability threshold for nucleus sampling. - temperature (float): Scaling factor for temperature. - use_max (bool): Whether to use argmax for next token selection. - debug (bool): If True, prints debugging information. - - Returns: - next_token (torch.Tensor): The next token selected (batch_size,). + Verifies that the weights from the state dictionary are properly loaded into the model. """ - # Get logits for the last token in the sequence - logits = logits[:, -1, :].clone().float() - - # Apply temperature scaling - if temperature != 1.0: - logits = logits / temperature - - # Apply top-k filtering - if top_k > 0: - top_k = min(top_k, logits.size(-1)) - min_topk = torch.topk(logits, top_k)[0][..., -1, None] - logits = logits.masked_fill(logits < min_topk, float("-inf")) + model_state_dict = model.state_dict() + for name, param in model_state_dict.items(): + if name in state_dict: + print(f"\nchecking {name}\n") + loaded_param = state_dict[name] + if param.shape != loaded_param.shape: + print(f"Shape mismatch for {name}: expected {param.shape}, got {loaded_param.shape}") + else: + print(f"{name}: loaded correctly") + + for name in state_dict: + if name not in model_state_dict: + print(f"Unexpected weight {name} found in state_dict") + +def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): + """ + Loads weights from huggingface and changes it to match torchtune naming structure + """ + # Load weights from safetensors files in the cache directory + safetensors_files = list(cache_dir.glob("*.safetensors")) + if not safetensors_files: + raise FileNotFoundError("No safetensors files found in the cache directory.") + + # Load weights from each found safetensors file + stitch_lmhead = True + for safetensor_file in safetensors_files: + state_dict = load_safetensors(safetensor_file) + + # remap to work with our model + remapped_state_dict = {} + tied_embed_weight = None + for key, value in state_dict.items(): + # load layer by shard + lnrgx = re.findall(r'model\.layers\.(\d+).*', key) + layer_num = int(lnrgx[0]) if len(lnrgx) > 0 else None + shard_layer_range = list(range(shard.start_layer, shard.end_layer)) + if layer_num in shard_layer_range: + # change input layer norm to sa_norm for torchtune + re_iln = re.findall( + rf'model.layers\.{layer_num}\.(input_layernorm)\.weight', key) + if len(re_iln) != 0: + key = f"model.layers.{layer_num}.sa_norm.weight" + + # change post attention layernorm to mlp_norm for torchtune + re_pal = re.findall( + rf'model.layers\.{layer_num}\.(post_attention_layernorm)\.weight', key) + if len(re_pal) != 0: + key = f"model.layers.{layer_num}.mlp_norm.weight" + + # change o_proj to output_proj + re_o_proj = re.findall(rf'model\.layers\.{layer_num}.(\w+)\.o_proj\.weight', key) + if len(re_o_proj) != 0: + key = f"model.layers.{layer_num}.{re_o_proj[0]}.output_proj.weight" + + # change self_attn to attn + re_attn = re.findall(rf'model\.layers\.{layer_num}.(\w+)\.(\w+)\.(\w+)', key) + if len(re_attn) != 0 and re_attn[0][0] == "self_attn": + key = f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}" + + # saving embed for tied weights + elif key == 'model.embed_tokens.weight': + tied_embed_weight = value + # change name for torchtune + key = 'model.tok_embeddings.weight' + + elif key == 'lm_head.weight': + stitch_lmhead = False + # change key for torchtune + key = 'model.output.weight' + + elif key == 'model.norm.weight': + key = 'model.norm.weight' + + remapped_state_dict[key] = value + + if stitch_lmhead: + remapped_state_dict['model.output.weight'] = tied_embed_weight + + model.load_state_dict(remapped_state_dict, strict=False) + + #if DEBUG >= 7: + print("\n--- checking weights ----\n") + check_weights(model, remapped_state_dict) + +def hf_logit_sample( + logits, + input_ids, + use_max: bool=False, + top_k: int=0, + top_p: float=0.9, + temp: float=1.0, +) -> torch.Tensor: + """ + Logit sampling using transformers + """ + logits_processor = LogitsProcessorList([ + TopKLogitsWarper(top_k), + TemperatureLogitsWarper(temp), + TopPLogitsWarper(top_p) + ]) - # Apply top-p (nucleus) filtering - if top_p > 0.0: - sorted_logits, sorted_indices = torch.sort(logits, descending=False) - cumulative_probs = sorted_logits.softmax(dim=-1).cumsum(dim=-1) - sorted_indices_to_remove = cumulative_probs <= (1 - top_p) - sorted_indices_to_remove[..., -1:] = 0 + # get a single cloned logit + logits = logits[:, -1, :].clone().float() - indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove) - logits = logits.masked_fill(indices_to_remove, float('-inf')) + next_token_scores = logits_processor(input_ids, logits) - # Select next token if not use_max: - probs = F.softmax(logits, dim=-1) + probs = nn.functional.softmax(next_token_scores, dim=-1) next_token = torch.multinomial(probs, num_samples=1) else: - next_token = torch.argmax(logits, dim=-1, keepdim=True) + next_token = torch.argmax(next_token_scores, dim=-1) - next_token = next_token[:, None].squeeze(-1) - - # Debugging output if DEBUG >= 4: - print(f"Logits: {logits}") - print(f"Next token: {next_token}") + print(f"input_ids: {input_ids}") + print(f"next_token: {next_token}") - return next_token + return next_token[:, None].squeeze(-1) def create_4d_causal_attention_mask( attention_mask: torch.Tensor, @@ -98,7 +172,7 @@ def create_4d_causal_attention_mask( batch_size: int, ) -> torch.Tensor: """ - Creates a 4D causal attention mask from a 2D mask, with adjustments for static caching. + Creates a 4D causal attention mask from a 2D mask Args: attention_mask (torch.Tensor): @@ -142,17 +216,16 @@ def create_4d_causal_attention_mask( # Expand to 4D and batch size causal_mask = causal_mask[None, None, :, :].expand(batch_size, 1, -1, -1) - if attention_mask is not None: - # Create a padding mask based on the input attention_mask - mask_len = attention_mask.shape[-1] - causal_mask = causal_mask.clone() # Ensure contiguous memory for in-place operations - padding_mask = causal_mask[:, :, :, :mask_len] + attention_mask[:, None, None, :] - padding_mask = padding_mask == 0 + # Create a padding mask based on the input attention_mask + mask_len = attention_mask.shape[-1] + causal_mask = causal_mask.clone() # Ensure contiguous memory for in-place operations + padding_mask = causal_mask[:, :, :, :mask_len] + attention_mask[:, None, None, :] + padding_mask = padding_mask == 0 - # Apply padding to the causal mask - causal_mask[:, :, :, :mask_len] = causal_mask[:, :, :, :mask_len].masked_fill( - padding_mask, min_value - ) + # Apply padding to the causal mask + causal_mask[:, :, :, :mask_len] = causal_mask[:, :, :, :mask_len].masked_fill( + padding_mask, min_value + ) return causal_mask @@ -278,18 +351,15 @@ def __init__(self, hidden_size, eps=1e-6): """ super().__init__() self.weight = nn.Parameter(torch.ones(hidden_size)) - self.variance_epsilon = eps + self.eps = eps def forward(self, hidden_states): input_dtype = hidden_states.dtype hidden_states = hidden_states.to(torch.float32) variance = hidden_states.pow(2).mean(-1, keepdim=True) - hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon) + hidden_states = hidden_states * torch.rsqrt(variance + self.eps) return self.weight * hidden_states.to(input_dtype) - def extra_repr(self): - return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}" - # ------------------ # Attention Methods # ------------------ diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py index b5f07d008..32e8fa1d0 100644 --- a/exo/inference/torch/tests/test_llama3_model.py +++ b/exo/inference/torch/tests/test_llama3_model.py @@ -1,112 +1,148 @@ """ Test of pytorch based llama3 model """ -import re from pathlib import Path import torch -import torchtune.generation as ttg from transformers import AutoTokenizer from huggingface_hub import snapshot_download -from safetensors.torch import load_file as load_safetensors -from exo.inference.torch.models.llm_utils import load_model_config, select_next_token -from exo.inference.torch.models.llama3 import LlamaModel + +import torchtune.generation as ttg +from torchtune.models import llama3 +from torchtune.data import Message + +from exo.inference.torch.models.llm_utils import ( + load_model_config, + hf_logit_sample, + load_model_weights_torchtune, + create_4d_causal_attention_mask +) +from exo.inference.torch.models.llama3 import ShardedLlamaModel from exo.inference.shard import Shard -MODEL_NAME = "unsloth/Llama-3.2-1B-Instruct" -TEMP=0.7 + +MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" +TEMP=0.6 TOP_K=35 TOP_P=0.9 +MAX_SEQ_LEN=2048 -def check_weights(model, state_dict): - """ - Verifies that the weights from the state dictionary are properly loaded into the model. - """ - model_state_dict = model.state_dict() - print(f"model_state_dict: {model_state_dict.keys()}") - for name, param in model_state_dict.items(): - if name in state_dict: - loaded_param = state_dict[name] - if param.shape != loaded_param.shape: - print(f"Shape mismatch for {name}: expected {param.shape}, got {loaded_param.shape}") - else: - print(f"{name}: loaded correctly") - #else: - # print(f"{name} not found in the state_dict") - - for name in state_dict: - if name not in model_state_dict: - print(f"Unexpected weight {name} found in state_dict") - -def test_generation(model, tokenizer, text, max_length=10, config=None): +def test_generation(text, max_length=10, config=None): """ Test the generation capabilities of the LlamaModel with sample text. """ # Tokenize input text - prompt = tokenizer.apply_chat_template([ - { - "role": "system", - "content": "You are a helpful assistant." - }, - { - "role": "user", - "content": text - } - ], tokenize=False, add_generation_prompt=True) - - print(f"prompt: {prompt}") + messages = [] + messages.extend( + [ + Message(role="user", content=text), + # Empty assistant message to kick-start generation + Message(role="assistant", content=""), + ] + ) - inputs = tokenizer(prompt, return_tensors="pt") - input_ids = inputs.get("input_ids") - attention_mask = inputs.get("attention_mask") + tokenizer_out = llama_tokenizer({"messages": messages}, inference=True) + print(f"tokenizer_out: {tokenizer_out}") + tokens = tokenizer_out["tokens"] + prompt = torch.tensor(tokens, dtype=torch.int) + + if prompt.ndim == 1: + prompt = prompt.view(1, -1) + + bsz, prompt_length = prompt.size() + total_response_length = prompt_length + MAX_SEQ_LEN + generated_tokens = prompt.clone() + resp_max_seq_len = ( + total_response_length + if not shard_model.model.caches_are_enabled() + else shard_model.model.decoder_max_cache_seq_len + ) - print(f"input_ids: {input_ids}") - print(f"attention_mask: {attention_mask}") + # masking for proper attention + padding_masks = prompt != llama_tokenizer.pad_id + if not padding_masks.all(): + padding_masks = torch.nn.functional.pad( + padding_masks, + (0, MAX_SEQ_LEN), + value=True + ) + + masks = ttg.get_causal_mask_from_padding_mask( + padding_masks, + target_seq_len=resp_max_seq_len + ) + + input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) + else: + masks = torch.tril( + torch.ones( + total_response_length, + resp_max_seq_len if resp_max_seq_len is not None else MAX_SEQ_LEN, + dtype=torch.bool, + device=prompt.device, + ) + ).unsqueeze(0) + + input_pos = torch.arange( + 0, total_response_length, device=prompt.device + ).unsqueeze(0) + + if shard_model.model.caches_are_enabled(): + curr_masks = masks[:, :prompt_length] + else: + curr_masks = masks[:, :prompt_length, :prompt_length] + + print(f"padding_masks: {padding_masks.shape}") + print(padding_masks.all()) + + next_token, gen_logits = ttg.generate_next_token( + shard_model.model, + input_pos=input_pos[:, :prompt_length].squeeze(), + x=prompt, + mask=curr_masks, + q=torch.empty( + ( + prompt.size(0), + shard_model.model.tok_embeddings.num_embeddings + ), device=prompt.device + ).exponential_(1, generator=None) + ) - # Start with initial input_ids - generated_ids = input_ids.clone() + print(f"next_token: {next_token}") - # Generate tokens step-by-step - print(f"{model}") + generated_tokens = torch.cat([generated_tokens, next_token], dim=-1) - for _ in range(max_length): - with torch.no_grad(): - pred_score, hstates = model( - generated_ids, - attention_mask=attention_mask - ) + print(f"generated_tokens: {generated_tokens}") - print("\n\n------------------------------------------------------") - print(f"pred_score: {pred_score.shape}") - print(f"hstates: {hstates.shape if hstates is not None else None}") - # Select next token using pred_score - next_token = select_next_token(pred_score, top_k=TOP_K, top_p=TOP_P, temperature=TEMP, use_max=False) - #next_token = ttg.sample(pred_score, temperature=TEMP, top_k=TOP_K)[:, -1, :] - print(f"next_token: {next_token}") + curr_pos = prompt_length - # Update generated_ids - generated_ids = torch.cat([generated_ids, next_token], dim=1) - print(f"generated_ids: {generated_ids.shape}") + # stop tokens logic + stop_tokens = None + stop_token_reached = torch.zeros(bsz, dtype=torch.bool, device=prompt.device) + stop_tokens = ( + torch.tensor(stop_tokens, device=prompt.device, dtype=tokens.dtype) + if stop_tokens + else None + ) + stop_token_mask = torch.ones( + (bsz, prompt_length + 1), dtype=torch.int32, device=prompt.device + ) - # Update attention mask - #attention_mask = torch.cat([attention_mask, torch.ones((attention_mask.size(0), 1), device=attention_mask.device)], dim=1) - print(f"attention_mask: {attention_mask.shape}") + # finish writing stop token logic using torchtune generation + # ref https://github.com/pytorch/torchtune/blob/main/torchtune/generation/_generation.py#L337 - # Check for EOS token - print(f"next_token.item(): {next_token.item()}") + for _ in range(max_length): - if config: - if next_token.item() in config["eos_token_id"]: - break + if shard_model.model.caches_are_enabled(): + curr_input_pos = input_pos[:, curr_pos] + curr_masks = masks[:, curr_pos, None, :] else: - if next_token.item() == tokenizer.eos_token_id: - break + tokens = generated_tokens.clone() + curr_input_pos = input_pos[:, : curr_pos + 1] + curr_masks = masks[:, : curr_pos + 1, : curr_pos + 1] - # Decode generated text - generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) - print(f"\n\n\n\nGenerated Response: {generated_text}") - - print("\n\n------------------------------------------------------") + generated_tokens = generated_tokens.tolist() + print(f"resp: {llama_tokenizer.decode(generated_tokens[0])}") if __name__ == "__main__": print("\nTesting generation:") @@ -123,64 +159,25 @@ def test_generation(model, tokenizer, text, max_length=10, config=None): shard = Shard( model_id=MODEL_NAME, start_layer=0, - end_layer=int(config["num_hidden_layers"]) - 1, + end_layer=int(config["num_hidden_layers"]), n_layers=int(config["num_hidden_layers"]) ) # Initialize tokenizer - tokenizer = AutoTokenizer.from_pretrained(shard.model_id) + llama_tokenizer_path = f"{cache_dir}/original/tokenizer.model" + llama_tokenizer = llama3.llama3_tokenizer(path=llama_tokenizer_path) + #tokenizer = AutoTokenizer.from_pretrained( + # MODEL_NAME, + # add_eos_token=True + #) # Initialize LlamaModel with config and tokenizer - model = LlamaModel(config, shard) - print(f"\nmodel: {model}") - - # Load weights from safetensors files in the cache directory - safetensors_files = list(cache_dir.glob("*.safetensors")) - if not safetensors_files: - raise FileNotFoundError("No safetensors files found in the cache directory.") - - # Load weights from each found safetensors file - for safetensor_file in safetensors_files: - print(f"Loading weights from: {safetensor_file}") - state_dict = load_safetensors(safetensor_file) - - # remap to work with our model - remapped_state_dict = {} - tied_embed_weight = None - for key, value in state_dict.items(): - # Remove the 'model.' prefix if it exists - print(f"remapping: {key}") - if key.startswith('model.'): - new_key = key[len('model.'):] # Remove 'model.' - else: - new_key = key - - # change o_proj to output_proj - re_o_proj = re.findall(r'layers.(\d+).(\w+).(o_proj).(\w+)', new_key) - if len(re_o_proj) != 0: - new_key = f"layers.{re_o_proj[0][0]}.{re_o_proj[0][1]}.output_proj.weight" - - remapped_state_dict[new_key] = value - - # saving embed for tied weights - if new_key == 'embed_tokens.weight': - tied_embed_weight = value - - if new_key == 'lm_head.weight': - model.has_lm_head_weight = True - - if not model.has_lm_head_weight: - remapped_state_dict['lm_head.weight'] = tied_embed_weight - - model.load_state_dict(remapped_state_dict, strict=False) - - check_weights(model, remapped_state_dict) - - #exit() - model.eval() # Set the model to evaluation mode + shard_model = ShardedLlamaModel(config, shard) + print(f"\nshard_model: {shard_model}") + load_model_weights_torchtune(cache_dir, shard, shard_model) # Sample text for testing test_text = "Hello" - test_generation(model, tokenizer, test_text, 5, config) + test_generation(test_text, 5, config) From f53ebd17646e95fcf5f8c2d0e807c26300b958fe Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 3 Nov 2024 10:12:59 -0900 Subject: [PATCH 474/589] cleaning up custom dot product attention but might be removed, building out next parts for distributed inference --- exo/inference/torch/models/llama3.py | 28 ++++++++++------ exo/inference/torch/models/llm_utils.py | 44 +++++++++++++------------ 2 files changed, 41 insertions(+), 31 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 2feb5e189..7f41d1290 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -3,7 +3,7 @@ Written with pytorch using torchtune and other methods """ -from typing import Tuple, List +from typing import Optional import torch import torch.nn as nn @@ -105,8 +105,6 @@ def LlamaModel( """ LlamaModel using torchtune """ - print(shard) - # Load configurations from config rope_scaling = config.get("rope_scaling") hidden_head_dim = config["hidden_size"] // config["num_attention_heads"] @@ -185,19 +183,29 @@ def LlamaModel( ) class ShardedLlamaModel(nn.Module): - def __init__(self, config: dict, shard: Shard, is_causal=True): + def __init__(self, + config: dict, + shard: Shard, + device: torch.device=torch.device("cpu"), + hidden_states: Optional[torch.Tensor] = None, + is_causal=True + ): super(ShardedLlamaModel, self).__init__() self.shard = shard self.config = config self.model = LlamaModel(config, shard, is_causal) + self.device = device - def generate( - self, - prompt: torch.Tensor - ): + def generate(self, prompt: torch.Tensor): """ - move login being done in test_llama3_model for generation to here - along with test sharding + move logit generation being done in test_llama3_model for generation to here + along with sharding """ + self.model.output_hidden_states = list(range(shard.start_layer, shard.end_layer)) + + # pass hidden state to model until last layer + # can be done with model's encoder_input and encoder_mask + # on last layer can generate + pass diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 0946890ed..df345968b 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -24,6 +24,7 @@ from exo.helpers import DEBUG from exo.inference.shard import Shard +from exo.inference.torch.tests.test_llama3_model import MAX_SEQ_LEN def load_model_config(model_config_path: Path) -> dict: """ @@ -68,13 +69,13 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): raise FileNotFoundError("No safetensors files found in the cache directory.") # Load weights from each found safetensors file - stitch_lmhead = True + paried_lmhead = True for safetensor_file in safetensors_files: state_dict = load_safetensors(safetensor_file) # remap to work with our model remapped_state_dict = {} - tied_embed_weight = None + paried_embed_weight = None for key, value in state_dict.items(): # load layer by shard lnrgx = re.findall(r'model\.layers\.(\d+).*', key) @@ -103,14 +104,14 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): if len(re_attn) != 0 and re_attn[0][0] == "self_attn": key = f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}" - # saving embed for tied weights + # saving embed for paired weights elif key == 'model.embed_tokens.weight': - tied_embed_weight = value + paried_embed_weight = value # change name for torchtune key = 'model.tok_embeddings.weight' elif key == 'lm_head.weight': - stitch_lmhead = False + paried_lmhead = False # change key for torchtune key = 'model.output.weight' @@ -119,8 +120,8 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): remapped_state_dict[key] = value - if stitch_lmhead: - remapped_state_dict['model.output.weight'] = tied_embed_weight + if paried_lmhead: + remapped_state_dict['model.output.weight'] = paried_embed_weight model.load_state_dict(remapped_state_dict, strict=False) @@ -529,7 +530,8 @@ def __init__( rotary_emb, attention_dropout=0.0, is_causal=True, - attention_bias=False + attention_bias=False, + kv_max_seq_len=2048 ): super().__init__() self.hidden_size = hidden_size @@ -538,6 +540,7 @@ def __init__( self.head_dim = head_dim self.attention_dropout = attention_dropout self.is_causal = is_causal + self.kv_max_seq_len = kv_max_seq_len # nn layers self.q_proj = nn.Linear(hidden_size, num_heads * head_dim, bias=attention_bias) @@ -593,20 +596,19 @@ def forward( print(f"key_states: {key_states.shape}") print(f"value_states: {value_states.shape}") - # Forcing caching always enabled + # Caching if kv_cache is not None: - #print(f"kv_cache.size {kv_cache.size}") - #print(f"key_states.size(0) {key_states.size(2)}") - - #if kv_cache.size != key_states.size(2): - # print(f"\n MAKE NEW KVCACHE batch_size={key_states.size(0)} max_seq_len={key_states.size(2)}") - # kv_cache = ttm.KVCache( - # batch_size=key_states.size(0), - # max_seq_len=key_states.size(2), - # num_heads=self.num_kv_heads, - # head_dim=self.head_dim, - # dtype=hidden_states.dtype - # ) + if kv_cache.size >= self.max_seq_len: + # double the cache each time space is ran out + self.kv_max_seq_len = self.kv_max_seq_len + self.kv_max_seq_len + + kv_cache = ttm.KVCache( + batch_size=key_states.size(0), + max_seq_len=self.kv_max_seq_len, + num_heads=self.num_kv_heads, + head_dim=self.head_dim, + dtype=hidden_states.dtype + ) key_states, value_states = kv_cache.update(key_states, value_states) From e8db8eee975b5773f007e908ccc9a3541a99ae51 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 10 Nov 2024 13:29:52 -0900 Subject: [PATCH 475/589] first layer run fixes, variable layer length weight loading fixes, working on split modeling --- exo/inference/torch/models/llama3.py | 85 +++++++- exo/inference/torch/models/llm_utils.py | 34 +-- .../torch/tests/test_llama3_model.py | 195 +++++++++--------- 3 files changed, 195 insertions(+), 119 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 7f41d1290..a8769edfd 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -3,7 +3,7 @@ Written with pytorch using torchtune and other methods """ -from typing import Optional +from typing import Optional, Any, Tuple, List import torch import torch.nn as nn @@ -186,26 +186,91 @@ class ShardedLlamaModel(nn.Module): def __init__(self, config: dict, shard: Shard, + tokenizer: Any, device: torch.device=torch.device("cpu"), hidden_states: Optional[torch.Tensor] = None, is_causal=True ): super(ShardedLlamaModel, self).__init__() + self.tokenizer = tokenizer self.shard = shard self.config = config self.model = LlamaModel(config, shard, is_causal) self.device = device - def generate(self, prompt: torch.Tensor): + def generate( + self, + input_tensor: torch.Tensor, + max_seq_len: int=4096 + ) -> Tuple[Optional[List[torch.Tensor]], Optional[torch.Tensor]]: """ - move logit generation being done in test_llama3_model for generation to here - along with sharding + Generate logits and/or hidden_states from llama model + + Args + input (torch.Tensor) - tokens if initial first layer input and hidden states after + max_seq_len (int) - Max sequence length of generation, default 4096 """ - self.model.output_hidden_states = list(range(shard.start_layer, shard.end_layer)) - - # pass hidden state to model until last layer - # can be done with model's encoder_input and encoder_mask - # on last layer can generate + self.model.output_hidden_states = list(range(self.shard.start_layer, self.shard.end_layer)) + + if self.shard.is_first_layer(): + tokens = input_tensor + + if tokens.ndim == 1: + tokens = tokens.view(1, -1) + + _, tokens_length = tokens.size() + total_response_length = tokens_length + max_seq_len + resp_max_seq_len = ( + total_response_length + if not self.model.caches_are_enabled() + else self.model.decoder_max_cache_seq_len + ) + + # masking for proper attention + padding_masks = tokens != self.tokenizer.pad_id + if not padding_masks.all(): + padding_masks = torch.nn.functional.pad( + padding_masks, + (0, max_seq_len), + value=True + ) + + masks = ttg.get_causal_mask_from_padding_mask( + padding_masks, + target_seq_len=resp_max_seq_len + ) + + input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) + else: + masks = torch.tril( + torch.ones( + total_response_length, + resp_max_seq_len if resp_max_seq_len is not None else max_seq_len, + dtype=torch.bool, + device=tokens.device, + ) + ).unsqueeze(0) + + input_pos = torch.arange( + 0, total_response_length, device=tokens.device + ).unsqueeze(0) + + if self.model.caches_are_enabled(): + curr_masks = masks[:, :tokens_length] + else: + curr_masks = masks[:, :tokens_length, :tokens_length] + + model_output = self.model( + tokens=tokens, + mask=curr_masks, + input_pos=input_pos[:, :tokens_length].squeeze() + ) + + model_logits = model_output[-1] + model_output.pop() # remove logits + model_hs = model_output # hidden states - pass + return model_hs, model_logits + else: + return None, None diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index df345968b..8e4ec14d3 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -24,7 +24,6 @@ from exo.helpers import DEBUG from exo.inference.shard import Shard -from exo.inference.torch.tests.test_llama3_model import MAX_SEQ_LEN def load_model_config(model_config_path: Path) -> dict: """ @@ -70,6 +69,7 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): # Load weights from each found safetensors file paried_lmhead = True + shard_layer_range = list(range(shard.start_layer, shard.end_layer)) for safetensor_file in safetensors_files: state_dict = load_safetensors(safetensor_file) @@ -80,45 +80,44 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): # load layer by shard lnrgx = re.findall(r'model\.layers\.(\d+).*', key) layer_num = int(lnrgx[0]) if len(lnrgx) > 0 else None - shard_layer_range = list(range(shard.start_layer, shard.end_layer)) if layer_num in shard_layer_range: # change input layer norm to sa_norm for torchtune re_iln = re.findall( rf'model.layers\.{layer_num}\.(input_layernorm)\.weight', key) if len(re_iln) != 0: - key = f"model.layers.{layer_num}.sa_norm.weight" + remapped_state_dict[f"model.layers.{layer_num}.sa_norm.weight"] = value # change post attention layernorm to mlp_norm for torchtune re_pal = re.findall( rf'model.layers\.{layer_num}\.(post_attention_layernorm)\.weight', key) if len(re_pal) != 0: - key = f"model.layers.{layer_num}.mlp_norm.weight" - - # change o_proj to output_proj - re_o_proj = re.findall(rf'model\.layers\.{layer_num}.(\w+)\.o_proj\.weight', key) - if len(re_o_proj) != 0: - key = f"model.layers.{layer_num}.{re_o_proj[0]}.output_proj.weight" + remapped_state_dict[f"model.layers.{layer_num}.mlp_norm.weight"] = value # change self_attn to attn + # along with changing o_proj to output_proj re_attn = re.findall(rf'model\.layers\.{layer_num}.(\w+)\.(\w+)\.(\w+)', key) if len(re_attn) != 0 and re_attn[0][0] == "self_attn": - key = f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}" + if re_attn[0][1] == "o_proj": + remapped_state_dict[f"model.layers.{layer_num}.attn.output_proj.weight"] = value + else: + remapped_state_dict[f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}"] = value # saving embed for paired weights elif key == 'model.embed_tokens.weight': paried_embed_weight = value # change name for torchtune - key = 'model.tok_embeddings.weight' + remapped_state_dict['model.tok_embeddings.weight'] = value elif key == 'lm_head.weight': paried_lmhead = False - # change key for torchtune - key = 'model.output.weight' - - elif key == 'model.norm.weight': - key = 'model.norm.weight' - remapped_state_dict[key] = value + # get everything else except layers, embed_tokens and lm_head + if ( + len(re.findall(r'model\.layers\..*', key)) == 0 + and key != "model.embed_tokens.weight" + and key != "lm_head.weight" + ): + remapped_state_dict[key] = value if paried_lmhead: remapped_state_dict['model.output.weight'] = paried_embed_weight @@ -127,6 +126,7 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): #if DEBUG >= 7: print("\n--- checking weights ----\n") + print(f"\nremapped_state_dict: {remapped_state_dict.keys()}\n") check_weights(model, remapped_state_dict) def hf_logit_sample( diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py index 32e8fa1d0..71fcbfd87 100644 --- a/exo/inference/torch/tests/test_llama3_model.py +++ b/exo/inference/torch/tests/test_llama3_model.py @@ -11,14 +11,14 @@ from torchtune.models import llama3 from torchtune.data import Message + +from exo.inference.torch.models.llama3 import ShardedLlamaModel +from exo.inference.shard import Shard + from exo.inference.torch.models.llm_utils import ( load_model_config, - hf_logit_sample, load_model_weights_torchtune, - create_4d_causal_attention_mask ) -from exo.inference.torch.models.llama3 import ShardedLlamaModel -from exo.inference.shard import Shard MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" @@ -46,103 +46,114 @@ def test_generation(text, max_length=10, config=None): tokens = tokenizer_out["tokens"] prompt = torch.tensor(tokens, dtype=torch.int) - if prompt.ndim == 1: - prompt = prompt.view(1, -1) + hidden_states, logits = shard_model.generate(prompt) - bsz, prompt_length = prompt.size() - total_response_length = prompt_length + MAX_SEQ_LEN - generated_tokens = prompt.clone() - resp_max_seq_len = ( - total_response_length - if not shard_model.model.caches_are_enabled() - else shard_model.model.decoder_max_cache_seq_len - ) + if hidden_states is not None: + print(f"hidden_states: {hidden_states[0].shape}\n{hidden_states}") - # masking for proper attention - padding_masks = prompt != llama_tokenizer.pad_id - if not padding_masks.all(): - padding_masks = torch.nn.functional.pad( - padding_masks, - (0, MAX_SEQ_LEN), - value=True - ) - - masks = ttg.get_causal_mask_from_padding_mask( - padding_masks, - target_seq_len=resp_max_seq_len - ) - - input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) - else: - masks = torch.tril( - torch.ones( - total_response_length, - resp_max_seq_len if resp_max_seq_len is not None else MAX_SEQ_LEN, - dtype=torch.bool, - device=prompt.device, - ) - ).unsqueeze(0) - - input_pos = torch.arange( - 0, total_response_length, device=prompt.device - ).unsqueeze(0) - - if shard_model.model.caches_are_enabled(): - curr_masks = masks[:, :prompt_length] - else: - curr_masks = masks[:, :prompt_length, :prompt_length] - - print(f"padding_masks: {padding_masks.shape}") - print(padding_masks.all()) - - next_token, gen_logits = ttg.generate_next_token( - shard_model.model, - input_pos=input_pos[:, :prompt_length].squeeze(), - x=prompt, - mask=curr_masks, - q=torch.empty( - ( - prompt.size(0), - shard_model.model.tok_embeddings.num_embeddings - ), device=prompt.device - ).exponential_(1, generator=None) - ) + if logits is not None: + print(f"logits: {logits.shape}\n{logits}") + #if prompt.ndim == 1: + # prompt = prompt.view(1, -1) - print(f"next_token: {next_token}") + #bsz, prompt_length = prompt.size() + #total_response_length = prompt_length + MAX_SEQ_LEN + #generated_tokens = prompt.clone() + #resp_max_seq_len = ( + # total_response_length + # if not shard_model.model.caches_are_enabled() + # else shard_model.model.decoder_max_cache_seq_len + #) - generated_tokens = torch.cat([generated_tokens, next_token], dim=-1) + ## masking for proper attention + #padding_masks = prompt != llama_tokenizer.pad_id + #if not padding_masks.all(): + # padding_masks = torch.nn.functional.pad( + # padding_masks, + # (0, MAX_SEQ_LEN), + # value=True + # ) + + # masks = ttg.get_causal_mask_from_padding_mask( + # padding_masks, + # target_seq_len=resp_max_seq_len + # ) + + # input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) + #else: + # masks = torch.tril( + # torch.ones( + # total_response_length, + # resp_max_seq_len if resp_max_seq_len is not None else MAX_SEQ_LEN, + # dtype=torch.bool, + # device=prompt.device, + # ) + # ).unsqueeze(0) + + # input_pos = torch.arange( + # 0, total_response_length, device=prompt.device + # ).unsqueeze(0) + + #if shard_model.model.caches_are_enabled(): + # curr_masks = masks[:, :prompt_length] + #else: + # curr_masks = masks[:, :prompt_length, :prompt_length] + + #rand_sample = torch.empty( + # ( + # prompt.size(0), + # self.model.tok_embeddings.num_embeddings + # ), device=prompt.device + #).exponential_(1, generator=None) + + #print(f"padding_masks: {padding_masks.shape}") + #print(padding_masks.all()) + + ## this can be sepearted out for dist inference + ## see https://github.com/pytorch/torchtune/blob/bc4acc19ffab2366a14468c97294992dbb7c50d1/torchtune/generation/_generation.py#L66 + #next_token, gen_logits = ttg.generate_next_token( + # shard_model.model, + # input_pos=input_pos[:, :prompt_length].squeeze(), + # x=prompt, + # mask=curr_masks, + # q=rand_sample + #) - print(f"generated_tokens: {generated_tokens}") + #print(f"next_token: {next_token}") - curr_pos = prompt_length + #generated_tokens = torch.cat([generated_tokens, next_token], dim=-1) - # stop tokens logic - stop_tokens = None - stop_token_reached = torch.zeros(bsz, dtype=torch.bool, device=prompt.device) - stop_tokens = ( - torch.tensor(stop_tokens, device=prompt.device, dtype=tokens.dtype) - if stop_tokens - else None - ) - stop_token_mask = torch.ones( - (bsz, prompt_length + 1), dtype=torch.int32, device=prompt.device - ) + #print(f"generated_tokens: {generated_tokens}") + + #curr_pos = prompt_length + + ## stop tokens logic + #stop_tokens = None + #stop_token_reached = torch.zeros(bsz, dtype=torch.bool, device=prompt.device) + #stop_tokens = ( + # torch.tensor(stop_tokens, device=prompt.device, dtype=tokens.dtype) + # if stop_tokens + # else None + #) + #stop_token_mask = torch.ones( + # (bsz, prompt_length + 1), dtype=torch.int32, device=prompt.device + #) - # finish writing stop token logic using torchtune generation - # ref https://github.com/pytorch/torchtune/blob/main/torchtune/generation/_generation.py#L337 + ## finish writing stop token logic using torchtune generation + ## ref https://github.com/pytorch/torchtune/blob/main/torchtune/generation/_generation.py#L337 - for _ in range(max_length): + #for _ in range(max_length): - if shard_model.model.caches_are_enabled(): - curr_input_pos = input_pos[:, curr_pos] - curr_masks = masks[:, curr_pos, None, :] - else: - tokens = generated_tokens.clone() - curr_input_pos = input_pos[:, : curr_pos + 1] - curr_masks = masks[:, : curr_pos + 1, : curr_pos + 1] + # if shard_model.model.caches_are_enabled(): + # curr_input_pos = input_pos[:, curr_pos] + # curr_masks = masks[:, curr_pos, None, :] + # else: + # tokens = generated_tokens.clone() + # curr_input_pos = input_pos[:, : curr_pos + 1] + # curr_masks = masks[:, : curr_pos + 1, : curr_pos + 1] - generated_tokens = generated_tokens.tolist() - print(f"resp: {llama_tokenizer.decode(generated_tokens[0])}") + #generated_tokens = generated_tokens.tolist() + #print(f"resp: {llama_tokenizer.decode(generated_tokens[0])}") if __name__ == "__main__": print("\nTesting generation:") @@ -159,7 +170,7 @@ def test_generation(text, max_length=10, config=None): shard = Shard( model_id=MODEL_NAME, start_layer=0, - end_layer=int(config["num_hidden_layers"]), + end_layer=4,#int(config["num_hidden_layers"]), n_layers=int(config["num_hidden_layers"]) ) @@ -172,7 +183,7 @@ def test_generation(text, max_length=10, config=None): #) # Initialize LlamaModel with config and tokenizer - shard_model = ShardedLlamaModel(config, shard) + shard_model = ShardedLlamaModel(config, shard, llama_tokenizer) print(f"\nshard_model: {shard_model}") load_model_weights_torchtune(cache_dir, shard, shard_model) From 22bc6a78d2ee5c859533f2b60650bacb7047c5b0 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 10 Nov 2024 15:04:52 -0900 Subject: [PATCH 476/589] made it so weight for last output layer is only loaded when shard is last layer --- exo/inference/torch/models/llm_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 8e4ec14d3..9e139238e 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -119,7 +119,7 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): ): remapped_state_dict[key] = value - if paried_lmhead: + if paried_lmhead and shard.is_last_layer(): remapped_state_dict['model.output.weight'] = paried_embed_weight model.load_state_dict(remapped_state_dict, strict=False) From 7f2abc3ad87a6cf8cb00c73067fff2866c8ae563 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 12 Nov 2024 06:03:56 -0900 Subject: [PATCH 477/589] working on sharding issue where hidden state is not working when being passed --- exo/inference/torch/models/llama3.py | 277 +++++++++++------- .../torch/tests/test_llama3_model.py | 83 +++++- 2 files changed, 241 insertions(+), 119 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index a8769edfd..2536f57fc 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -3,13 +3,14 @@ Written with pytorch using torchtune and other methods """ -from typing import Optional, Any, Tuple, List +from typing import Optional, Any, Tuple, List, Union, Callable import torch import torch.nn as nn import torchtune.modules as ttm import torchtune.generation as ttg from torchtune.models.llama3_1 import Llama3ScaledRoPE +from torchtune.modules.attention_utils import _MaskType from exo.inference.shard import Shard from exo.inference.torch.models.llm_utils import ( @@ -17,84 +18,101 @@ RMSNorm ) -class LlamaBlock(nn.Module): - """ - Encoder block class for the LLaMA model - """ +class ShardTransformerDecoder(ttm.TransformerDecoder): def __init__( self, - config, - mlp, - self_attn, - rms_norm_eps=1e-6 + *, + shard: Shard, + tok_embeddings: nn.Embedding, + layers: Union[nn.Module, List[nn.Module], nn.ModuleList], + max_seq_len: int, + num_heads: int, + head_dim: int, + norm: nn.Module, + output: Union[nn.Linear, Callable], + num_layers: Optional[int] = None, + output_hidden_states: Optional[List[int]] = None ): - super(LlamaBlock, self).__init__() - self.config = config - self.self_attn = self_attn - self.mlp = mlp - self.input_layernorm = RMSNorm(self.config['hidden_size'], eps=rms_norm_eps) - self.post_attention_layernorm = RMSNorm(self.config['hidden_size'], eps=rms_norm_eps) + super().__init__( + tok_embeddings=tok_embeddings, + layers=layers, + max_seq_len=max_seq_len, + num_heads=num_heads, + head_dim=head_dim, + norm=norm, + output=output, + num_layers=num_layers, + output_hidden_states=output_hidden_states, + ) + + self.shard = shard def forward( self, - hidden_states: torch.Tensor, - attention_mask: torch.Tensor, - max_seq_len: int = 2048 - ) -> torch.Tensor: - """ - Forward pass with integrated attention, resnet and key-value caching. - - Args: - hidden_states (torch.Tensor): Input tensor of shape (batch_size, seq_len, dim). - position_ids (Optional[torch.Tensor]): Position IDs tensor of shape (batch_size, seq_len). + tokens: torch.Tensor, + *, + mask: Optional[_MaskType] = None, + encoder_input: Optional[torch.Tensor] = None, + encoder_mask: Optional[torch.Tensor] = None, + input_pos: Optional[torch.Tensor] = None, + ) -> Union[torch.Tensor, List[torch.Tensor]]: + # for captured hidden states + hidden = [] + + # Determine the type of input and shape + print(f"tokens.ndim: {tokens.ndim}") + if tokens.ndim == 3: + h = tokens # Use directly as hidden states + else: + h = self.tok_embeddings(tokens) # Apply token tok_embeddings - Returns: - Tuple[torch.Tensor, KVCache]: - - Output tensor of shape (batch_size, seq_len, dim). - """ - if isinstance(self.self_attn, ttm.MultiHeadAttention): - if self.self_attn.kv_cache is None: - # setup cache - self.self_attn.setup_cache( - batch_size=hidden_states.size(0), - dtype=hidden_states.dtype, - max_seq_len=max_seq_len, #self.config['max_position_embeddings'] - ) + # capture tok hidden state, if needed + if 0 in self.output_hidden_states: + hidden.append(h) - # Apply RMSNorm to input - hidden_states = self.input_layernorm(hidden_states) - print(f"self.input_layernorm(hidden_states) {hidden_states.shape}") + seq_len = h.shape[1] - # get causal mask from attention mask - causal_mask = ttg.get_causal_mask_from_padding_mask( - attention_mask.bool(), - max_seq_len + self._validate_inputs( + seq_len, + mask=mask, + encoder_input=encoder_input, + encoder_mask=encoder_mask, + input_pos=input_pos, ) - print(f"causal_mask: {causal_mask.shape}") + # Initialize a list to capture hidden states if requested + hidden = [] + for i in range(self.shard.start_layer, self.shard.end_layer+1): + layer = self.layers[i] + + # Process through each transformer layer + h = layer( + h, + mask=mask, + encoder_input=encoder_input, + encoder_mask=encoder_mask, + input_pos=input_pos, + ) - # get position_ids from attention mask - position_ids = ttg.get_position_ids_from_padding_mask( - attention_mask.bool() - ) + # capture wanted hidden states + if i in self.output_hidden_states: + hidden.append(h) - print(f"position_ids: {position_ids.shape}") + print(f"\n\n\nhidden layer H[{i}]\n{h}\n\n\n") - hidden_states = self.self_attn( - x=hidden_states, - y=hidden_states, - mask=causal_mask, - #input_pos=position_ids - ) + # Apply normalization + h = self.norm(h) - # Residual connection - print(f"hidden_states: {hidden_states.shape}") - # Post attention normalization - hidden_states = self.post_attention_layernorm(hidden_states) - # Feed-forward network with MLP and residual connection - hidden_states = self.mlp(hidden_states) + # Handle chunked output if needed + if self.num_output_chunks > 0: + output = self.chunked_output(h) + else: + output = self.output(h).float() - return hidden_states + # Return list if hidden states are requested + output = output if not hidden else [*hidden, output] + print(f"\n\noutput {output}\n\n") + return output def LlamaModel( config: dict, @@ -170,7 +188,7 @@ def LlamaModel( layers.append(layer) - return ttm.TransformerDecoder( + return ShardTransformerDecoder( tok_embeddings=embed_tokens, layers=nn.ModuleList(layers), max_seq_len=max_seq_len, @@ -179,7 +197,8 @@ def LlamaModel( norm=RMSNorm(config["hidden_size"], eps=config["rms_norm_eps"]), output=nn.Linear(config["hidden_size"], config["vocab_size"]), num_layers=shard.n_layers, - #output_hidden_states=list(range(shard.start_layer, shard.end_layer)) + #output_hidden_states=list(range(shard.start_layer, shard.end_layer)), + shard=shard ) class ShardedLlamaModel(nn.Module): @@ -201,76 +220,114 @@ def __init__(self, def generate( self, - input_tensor: torch.Tensor, + tokens: torch.Tensor, + hidden_state: Optional[torch.Tensor] = None, max_seq_len: int=4096 ) -> Tuple[Optional[List[torch.Tensor]], Optional[torch.Tensor]]: """ Generate logits and/or hidden_states from llama model Args - input (torch.Tensor) - tokens if initial first layer input and hidden states after + tokens (torch.Tensor) - tokens from prompt tokenization + hidden_state (torch.Tensor, optional) - hidden state from last activated hidden layer, if any max_seq_len (int) - Max sequence length of generation, default 4096 """ - self.model.output_hidden_states = list(range(self.shard.start_layer, self.shard.end_layer)) - - if self.shard.is_first_layer(): - tokens = input_tensor + print(self.shard) + print(self.shard.is_last_layer()) + if not self.shard.is_last_layer(): + self.model.output_hidden_states = [self.shard.end_layer] + + if tokens.ndim == 1: + tokens = tokens.view(1, -1) + + _, tokens_length = tokens.size() + total_response_length = tokens_length + max_seq_len + resp_max_seq_len = ( + total_response_length + if not self.model.caches_are_enabled() + else self.model.decoder_max_cache_seq_len + ) - if tokens.ndim == 1: - tokens = tokens.view(1, -1) + # clone tokens + generated_tokens = tokens.clone() - _, tokens_length = tokens.size() - total_response_length = tokens_length + max_seq_len - resp_max_seq_len = ( - total_response_length - if not self.model.caches_are_enabled() - else self.model.decoder_max_cache_seq_len + # masking for proper attention + padding_masks = generated_tokens != self.tokenizer.pad_id + if not padding_masks.all(): + padding_masks = torch.nn.functional.pad( + padding_masks, + (0, max_seq_len), + value=True ) - # masking for proper attention - padding_masks = tokens != self.tokenizer.pad_id - if not padding_masks.all(): - padding_masks = torch.nn.functional.pad( - padding_masks, - (0, max_seq_len), - value=True - ) + masks = ttg.get_causal_mask_from_padding_mask( + padding_masks, + target_seq_len=resp_max_seq_len + ) - masks = ttg.get_causal_mask_from_padding_mask( - padding_masks, - target_seq_len=resp_max_seq_len + input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) + else: + masks = torch.tril( + torch.ones( + total_response_length, + resp_max_seq_len if resp_max_seq_len is not None else total_response_length, + dtype=torch.bool, + device=tokens.device, ) - - input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) - else: - masks = torch.tril( - torch.ones( - total_response_length, - resp_max_seq_len if resp_max_seq_len is not None else max_seq_len, - dtype=torch.bool, - device=tokens.device, - ) - ).unsqueeze(0) + ).unsqueeze(0) input_pos = torch.arange( - 0, total_response_length, device=tokens.device + 0, total_response_length, device=generated_tokens.device ).unsqueeze(0) - if self.model.caches_are_enabled(): - curr_masks = masks[:, :tokens_length] - else: - curr_masks = masks[:, :tokens_length, :tokens_length] + if self.model.caches_are_enabled(): + curr_masks = masks[:, :tokens_length] + else: + curr_masks = masks[:, :tokens_length, :tokens_length] + + if hidden_state is not None: + #_, hs_len, _ = hidden_state.size() + #total_hidden_length = hs_len + max_seq_len + #hs_max_seq_len = ( + # total_response_length + # if not self.model.caches_are_enabled() + # else self.model.decoder_max_cache_seq_len + #) + + #hs_mask = torch.tril( + # torch.ones( + # total_hidden_length, + # hs_max_seq_len if hs_max_seq_len is not None else max_seq_len, + # dtype=torch.bool, + # device=tokens.device, + # ) + #).unsqueeze(0) + + #if self.model.caches_are_enabled(): + #hs_curr_masks = hs_mask[:, :hs_len] + #else: + #hs_curr_masks = hs_mask[:, :hs_len, :hs_len] + model_output = self.model( + tokens=hidden_state, + mask=curr_masks, + input_pos=input_pos[:, :tokens_length].squeeze(), + ) + else: model_output = self.model( tokens=tokens, mask=curr_masks, input_pos=input_pos[:, :tokens_length].squeeze() ) + print(f"\nmodel_output: {model_output}") + + if isinstance(model_output, list): model_logits = model_output[-1] model_output.pop() # remove logits - model_hs = model_output # hidden states - - return model_hs, model_logits + model_hs = model_output[-1] # get last hidden state else: - return None, None + model_logits = model_output + model_hs = None + + return model_hs, model_logits diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py index 71fcbfd87..d742ff9c3 100644 --- a/exo/inference/torch/tests/test_llama3_model.py +++ b/exo/inference/torch/tests/test_llama3_model.py @@ -27,7 +27,7 @@ TOP_P=0.9 MAX_SEQ_LEN=2048 -def test_generation(text, max_length=10, config=None): +def test_generation_1(shard_model, text): """ Test the generation capabilities of the LlamaModel with sample text. """ @@ -49,10 +49,12 @@ def test_generation(text, max_length=10, config=None): hidden_states, logits = shard_model.generate(prompt) if hidden_states is not None: - print(f"hidden_states: {hidden_states[0].shape}\n{hidden_states}") + print(f"hidden_states[{len(hidden_states)}]: {hidden_states}") if logits is not None: print(f"logits: {logits.shape}\n{logits}") + + return hidden_states, logits, prompt #if prompt.ndim == 1: # prompt = prompt.view(1, -1) @@ -155,6 +157,44 @@ def test_generation(text, max_length=10, config=None): #generated_tokens = generated_tokens.tolist() #print(f"resp: {llama_tokenizer.decode(generated_tokens[0])}") +def test_generation_2(shard_model, tokens, hidden_state): + print("Generate with the rest of layers") + hidden_states, logits = shard_model.generate( + tokens=tokens, + hidden_state=hidden_state + ) + + if hidden_states is not None: + print(f"hidden_states {hidden_states.shape}: {hidden_states}") + + if logits is not None: + print(f"logits: {logits.shape}\n{logits}") + + rand_sample = torch.empty( + ( + logits.size(0), + shard_model.model.tok_embeddings.num_embeddings + ), device=logits.device + ).exponential_(1, generator=None) + + logit = ttg.sample( + logits=logits[:, -1].clone(), + temperature=TEMP, + top_k=TOP_K, + q=rand_sample + ) + + print(f"logit: {logit}") + + generated_tokens = tokens.clone() + generated_tokens = torch.cat([generated_tokens, logit.squeeze(-1)], dim=-1).tolist() + + print(f"generated_tokens: {generated_tokens}") + + print(f"resp: {llama_tokenizer.decode(generated_tokens)}\n\n\n") + + return hidden_states, logits + if __name__ == "__main__": print("\nTesting generation:") # Get the path to the model files from the Hugging Face cache @@ -167,10 +207,20 @@ def test_generation(text, max_length=10, config=None): print(f"current config\n{config}") # Setup shard - shard = Shard( + s1_end = int(int(config["num_hidden_layers"])/2) + shard_1 = Shard( model_id=MODEL_NAME, start_layer=0, - end_layer=4,#int(config["num_hidden_layers"]), + end_layer=s1_end, + n_layers=int(config["num_hidden_layers"]) + ) + + s2_start = s1_end + 1 + s2_end = shard_1.n_layers - 1 + shard_2 = Shard( + model_id=MODEL_NAME, + start_layer=s2_start, + end_layer=s2_end, n_layers=int(config["num_hidden_layers"]) ) @@ -183,12 +233,27 @@ def test_generation(text, max_length=10, config=None): #) # Initialize LlamaModel with config and tokenizer - shard_model = ShardedLlamaModel(config, shard, llama_tokenizer) - print(f"\nshard_model: {shard_model}") - load_model_weights_torchtune(cache_dir, shard, shard_model) + shard_model_1 = ShardedLlamaModel(config, shard_1, llama_tokenizer) + print(f"\nshard_model_1: {shard_model_1}") + load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) # Sample text for testing - test_text = "Hello" + #prompt = "First letter in the word 'Red'" + prompt = "Hello" + shard_1_hs, shard_1_logits, shard_1_tokens = test_generation_1(shard_model_1, prompt) + + print(f"shard_1_hs:\n{shard_1_hs}") + print(f"shard_1_logits:\n{shard_1_logits}") + print(f"shard_1_tokens:\n{shard_1_tokens}") + + del shard_model_1.model + del shard_model_1 + + shard_model_2 = ShardedLlamaModel(config, shard_2, llama_tokenizer) + print(f"\nshard_model_2: {shard_model_2}") + load_model_weights_torchtune(cache_dir, shard_2, shard_model_2) + shard_2_hs, shard_2_logits = test_generation_2(shard_model_2, shard_1_tokens, shard_1_hs) - test_generation(test_text, 5, config) + print(f"shard_2_hs:\n{shard_2_hs}") + print(f"shard_2_logits:\n{shard_2_logits}") From bdf3240481349ba52a3f6313eed5f2c3a169e7b5 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 14 Nov 2024 23:56:51 -0900 Subject: [PATCH 478/589] fixing last hidden value handling --- exo/inference/torch/models/llama3.py | 47 ++++++++++--------- .../torch/tests/test_llama3_model.py | 14 ++++-- 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 2536f57fc..c29979d9d 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -56,9 +56,6 @@ def forward( encoder_mask: Optional[torch.Tensor] = None, input_pos: Optional[torch.Tensor] = None, ) -> Union[torch.Tensor, List[torch.Tensor]]: - # for captured hidden states - hidden = [] - # Determine the type of input and shape print(f"tokens.ndim: {tokens.ndim}") if tokens.ndim == 3: @@ -66,10 +63,6 @@ def forward( else: h = self.tok_embeddings(tokens) # Apply token tok_embeddings - # capture tok hidden state, if needed - if 0 in self.output_hidden_states: - hidden.append(h) - seq_len = h.shape[1] self._validate_inputs( @@ -81,9 +74,13 @@ def forward( ) # Initialize a list to capture hidden states if requested - hidden = [] + # for captured hidden states + hidden = None + for i in range(self.shard.start_layer, self.shard.end_layer+1): - layer = self.layers[i] + layer = self.layers[i] + + print(f"\nhidden layer in H[{i}]\n{h}\n") # Process through each transformer layer h = layer( @@ -94,12 +91,13 @@ def forward( input_pos=input_pos, ) - # capture wanted hidden states - if i in self.output_hidden_states: - hidden.append(h) + # for shard model just capture the last hs computed + if i == self.shard.end_layer: + hidden = h - print(f"\n\n\nhidden layer H[{i}]\n{h}\n\n\n") + print(f"\nhidden layer out H[{i}]->H[{i+1}]\n{h}\n") + print(f"last hidden: {hidden}") # Apply normalization h = self.norm(h) @@ -110,7 +108,7 @@ def forward( output = self.output(h).float() # Return list if hidden states are requested - output = output if not hidden else [*hidden, output] + output = [hidden, output] print(f"\n\noutput {output}\n\n") return output @@ -207,8 +205,8 @@ def __init__(self, shard: Shard, tokenizer: Any, device: torch.device=torch.device("cpu"), - hidden_states: Optional[torch.Tensor] = None, - is_causal=True + is_causal=True, + use_cache=False ): super(ShardedLlamaModel, self).__init__() @@ -217,6 +215,7 @@ def __init__(self, self.config = config self.model = LlamaModel(config, shard, is_causal) self.device = device + self.use_cache = use_cache def generate( self, @@ -234,13 +233,19 @@ def generate( """ print(self.shard) print(self.shard.is_last_layer()) - if not self.shard.is_last_layer(): - self.model.output_hidden_states = [self.shard.end_layer] if tokens.ndim == 1: tokens = tokens.view(1, -1) - _, tokens_length = tokens.size() + bsz, tokens_length = tokens.size() + + # setup cache + if not self.model.caches_are_enabled() and self.use_cache: + self.model.setup_caches(bsz, torch.float, decoder_max_seq_len=self.model.decoder_max_cache_seq_len) + + if not self.shard.is_last_layer(): + self.model.output_hidden_states = [self.shard.end_layer] + total_response_length = tokens_length + max_seq_len resp_max_seq_len = ( total_response_length @@ -323,9 +328,9 @@ def generate( print(f"\nmodel_output: {model_output}") if isinstance(model_output, list): - model_logits = model_output[-1] + model_logits = model_output[1] model_output.pop() # remove logits - model_hs = model_output[-1] # get last hidden state + model_hs = model_output[0] # get last hidden state else: model_logits = model_output model_hs = None diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py index d742ff9c3..d573cf73a 100644 --- a/exo/inference/torch/tests/test_llama3_model.py +++ b/exo/inference/torch/tests/test_llama3_model.py @@ -2,7 +2,8 @@ Test of pytorch based llama3 model """ from pathlib import Path - +import gc +import time import torch from transformers import AutoTokenizer from huggingface_hub import snapshot_download @@ -246,14 +247,19 @@ def test_generation_2(shard_model, tokens, hidden_state): print(f"shard_1_logits:\n{shard_1_logits}") print(f"shard_1_tokens:\n{shard_1_tokens}") + gc.collect() + torch.cuda.empty_cache() + + if shard_model_1.model.caches_are_enabled(): + shard_model_1.model.reset_caches() + del shard_model_1.model del shard_model_1 + #time.sleep(10) + shard_model_2 = ShardedLlamaModel(config, shard_2, llama_tokenizer) print(f"\nshard_model_2: {shard_model_2}") load_model_weights_torchtune(cache_dir, shard_2, shard_model_2) shard_2_hs, shard_2_logits = test_generation_2(shard_model_2, shard_1_tokens, shard_1_hs) - print(f"shard_2_hs:\n{shard_2_hs}") - print(f"shard_2_logits:\n{shard_2_logits}") - From 227199f720bbe3b732836155ecfe02346de55e3c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 15 Nov 2024 00:07:40 -0900 Subject: [PATCH 479/589] update test --- exo/inference/torch/tests/test_llama3_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py index d573cf73a..8b5799f8e 100644 --- a/exo/inference/torch/tests/test_llama3_model.py +++ b/exo/inference/torch/tests/test_llama3_model.py @@ -192,7 +192,7 @@ def test_generation_2(shard_model, tokens, hidden_state): print(f"generated_tokens: {generated_tokens}") - print(f"resp: {llama_tokenizer.decode(generated_tokens)}\n\n\n") + print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(generated_tokens)}\n\n\n") return hidden_states, logits From 5af630268c31942502d7be10d256b31a7c852589 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 15 Nov 2024 00:09:14 -0900 Subject: [PATCH 480/589] update test --- exo/inference/torch/tests/test_llama3_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py index 8b5799f8e..0ef2748e1 100644 --- a/exo/inference/torch/tests/test_llama3_model.py +++ b/exo/inference/torch/tests/test_llama3_model.py @@ -240,7 +240,7 @@ def test_generation_2(shard_model, tokens, hidden_state): # Sample text for testing #prompt = "First letter in the word 'Red'" - prompt = "Hello" + prompt = "GM, say it back" shard_1_hs, shard_1_logits, shard_1_tokens = test_generation_1(shard_model_1, prompt) print(f"shard_1_hs:\n{shard_1_hs}") From d7e5aca57a7cecf17e84019f4ded0a3b7bf63143 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 15 Nov 2024 00:11:32 -0900 Subject: [PATCH 481/589] update test --- exo/inference/torch/tests/test_llama3_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py index 0ef2748e1..22abb2781 100644 --- a/exo/inference/torch/tests/test_llama3_model.py +++ b/exo/inference/torch/tests/test_llama3_model.py @@ -192,7 +192,7 @@ def test_generation_2(shard_model, tokens, hidden_state): print(f"generated_tokens: {generated_tokens}") - print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(generated_tokens)}\n\n\n") + print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(logit.squeeze(-1).tolist())}\n\n\n") return hidden_states, logits From 1874d2307f52cac6b3898c46b6aea034ae9b798d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 15 Nov 2024 00:14:18 -0900 Subject: [PATCH 482/589] update test, turn on caching --- exo/inference/torch/tests/test_llama3_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py index 22abb2781..1b16d91f0 100644 --- a/exo/inference/torch/tests/test_llama3_model.py +++ b/exo/inference/torch/tests/test_llama3_model.py @@ -234,7 +234,7 @@ def test_generation_2(shard_model, tokens, hidden_state): #) # Initialize LlamaModel with config and tokenizer - shard_model_1 = ShardedLlamaModel(config, shard_1, llama_tokenizer) + shard_model_1 = ShardedLlamaModel(config, shard_1, llama_tokenizer, use_cache=True) print(f"\nshard_model_1: {shard_model_1}") load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) @@ -258,7 +258,7 @@ def test_generation_2(shard_model, tokens, hidden_state): #time.sleep(10) - shard_model_2 = ShardedLlamaModel(config, shard_2, llama_tokenizer) + shard_model_2 = ShardedLlamaModel(config, shard_2, llama_tokenizer, use_cache=True) print(f"\nshard_model_2: {shard_model_2}") load_model_weights_torchtune(cache_dir, shard_2, shard_model_2) shard_2_hs, shard_2_logits = test_generation_2(shard_model_2, shard_1_tokens, shard_1_hs) From 3a0ad62226d8b18967581cd197999ae5c10f3193 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 15 Nov 2024 00:19:05 -0900 Subject: [PATCH 483/589] test safetensor load --- exo/inference/torch/models/llm_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 9e139238e..8e4ec14d3 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -119,7 +119,7 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): ): remapped_state_dict[key] = value - if paried_lmhead and shard.is_last_layer(): + if paried_lmhead: remapped_state_dict['model.output.weight'] = paried_embed_weight model.load_state_dict(remapped_state_dict, strict=False) From 6098ae5324845d4520bc2f9158720dde8251face Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 15 Nov 2024 01:00:15 -0900 Subject: [PATCH 484/589] test hidden alignment --- exo/inference/torch/models/llama3.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index c29979d9d..3280600ee 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -60,6 +60,10 @@ def forward( print(f"tokens.ndim: {tokens.ndim}") if tokens.ndim == 3: h = tokens # Use directly as hidden states + + # check states alignment + align_check = self.layers[0].in_features == h.shape[-1] + print(f"align_check {align_check}") else: h = self.tok_embeddings(tokens) # Apply token tok_embeddings From fa1e70fdc93b73ccb46054d2986540574f03ac4f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 17 Nov 2024 05:53:04 -0900 Subject: [PATCH 485/589] updates to torchtune model, fixing non-generation errors, created split and full test, separating huggingface and torchtune inference engines --- .../torch/{inference.py => hf_inference.py} | 5 +- exo/inference/torch/models/llama3.py | 266 +++---- exo/inference/torch/models/llm_utils.py | 711 ++++-------------- exo/inference/torch/pt_inference.py | 5 + .../torch/tests/test_inference_engine.py | 6 +- exo/inference/torch/tests/test_llama3_full.py | 121 +++ .../torch/tests/test_llama3_model.py | 265 ------- .../torch/tests/test_llama3_split.py | 131 ++++ 8 files changed, 524 insertions(+), 986 deletions(-) rename exo/inference/torch/{inference.py => hf_inference.py} (98%) create mode 100644 exo/inference/torch/pt_inference.py create mode 100644 exo/inference/torch/tests/test_llama3_full.py delete mode 100644 exo/inference/torch/tests/test_llama3_model.py create mode 100644 exo/inference/torch/tests/test_llama3_split.py diff --git a/exo/inference/torch/inference.py b/exo/inference/torch/hf_inference.py similarity index 98% rename from exo/inference/torch/inference.py rename to exo/inference/torch/hf_inference.py index 23bbe814a..1b4f19e00 100644 --- a/exo/inference/torch/inference.py +++ b/exo/inference/torch/hf_inference.py @@ -25,9 +25,10 @@ TEMP = 0.6 TOP_P = 0.9 -class TorchDynamicShardInferenceEngine(InferenceEngine): +class HFDynamicShardInferenceEngine(InferenceEngine): """ - Torch Dynamic Shard Inference Engine for performing model inference with sharded Pytorch/HF based models. + HuggingFace Dynamic Shard Inference Engine + Performing model inference with sharded Pytorch based HuggingFace models. """ def __init__(self, shard_downloader: HFShardDownloader): diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 3280600ee..cb3456ebb 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -3,6 +3,7 @@ Written with pytorch using torchtune and other methods """ + from typing import Optional, Any, Tuple, List, Union, Callable import torch @@ -15,10 +16,17 @@ from exo.inference.shard import Shard from exo.inference.torch.models.llm_utils import ( MultiLayerPreceptron, - RMSNorm + RMSNorm, + get_torch_dtype ) + class ShardTransformerDecoder(ttm.TransformerDecoder): + """ + ShardTransformerDecorder + Custom version of torchtune TransformerDecoder to allow for + sharding of models and passing of hidden layers between shards + """ def __init__( self, *, @@ -31,7 +39,7 @@ def __init__( norm: nn.Module, output: Union[nn.Linear, Callable], num_layers: Optional[int] = None, - output_hidden_states: Optional[List[int]] = None + output_hidden_states: Optional[List[int]] = None, ): super().__init__( tok_embeddings=tok_embeddings, @@ -57,34 +65,29 @@ def forward( input_pos: Optional[torch.Tensor] = None, ) -> Union[torch.Tensor, List[torch.Tensor]]: # Determine the type of input and shape - print(f"tokens.ndim: {tokens.ndim}") if tokens.ndim == 3: h = tokens # Use directly as hidden states - - # check states alignment - align_check = self.layers[0].in_features == h.shape[-1] - print(f"align_check {align_check}") else: h = self.tok_embeddings(tokens) # Apply token tok_embeddings - seq_len = h.shape[1] + seq_len = h.shape[1] - self._validate_inputs( - seq_len, - mask=mask, - encoder_input=encoder_input, - encoder_mask=encoder_mask, - input_pos=input_pos, - ) + self._validate_inputs( + seq_len, + mask=mask, + encoder_input=encoder_input, + encoder_mask=encoder_mask, + input_pos=input_pos, + ) # Initialize a list to capture hidden states if requested # for captured hidden states - hidden = None + hidden = [] - for i in range(self.shard.start_layer, self.shard.end_layer+1): + for i in range(self.shard.start_layer, self.shard.end_layer + 1): layer = self.layers[i] - print(f"\nhidden layer in H[{i}]\n{h}\n") + print(f"\nhidden layer in H[{i}]\n{h}\nmask\n{mask}\ninput_pos\n{input_pos}\n{self.output_hidden_states}\n") # Process through each transformer layer h = layer( @@ -95,138 +98,141 @@ def forward( input_pos=input_pos, ) - # for shard model just capture the last hs computed - if i == self.shard.end_layer: - hidden = h + if i in self.output_hidden_states: + hidden.append(h) - print(f"\nhidden layer out H[{i}]->H[{i+1}]\n{h}\n") + print(f"\nhidden layer out H[{i}]->H[{i + 1}]\n{h}\n") - print(f"last hidden: {hidden}") # Apply normalization h = self.norm(h) # Handle chunked output if needed if self.num_output_chunks > 0: - output = self.chunked_output(h) + output = self.chunked_output(h) else: - output = self.output(h).float() + output = self.output(h).float() # Return list if hidden states are requested - output = [hidden, output] + output = [hidden[-1], output] if hidden else output print(f"\n\noutput {output}\n\n") return output -def LlamaModel( - config: dict, - shard: Shard, - is_causal: bool=True, - max_seq_len: int=4096 -): +def LlamaModel(config: dict, shard: Shard): """ LlamaModel using torchtune """ - # Load configurations from config - rope_scaling = config.get("rope_scaling") - hidden_head_dim = config["hidden_size"] // config["num_attention_heads"] - - # Model layers and methods, order matters - embed_tokens = nn.Embedding( - config["vocab_size"], - config["hidden_size"] + # rope scaling config + if config["rope_scaling"] is not None: + scale_factor = config["rope_scaling"].get("factor", 32) + + rope = Llama3ScaledRoPE( + dim=config["head_dim"], + max_seq_len=config["max_seq_len"], + base=config["rope_base"], + scale_factor=scale_factor, ) layers = [] for _ in range(shard.n_layers): - pos_embeddings = Llama3ScaledRoPE( - dim=hidden_head_dim, - max_seq_len=max_seq_len, - base=config.get('rope_theta', 10000), - scale_factor=rope_scaling['factor'] if rope_scaling else 32 - ) - self_attn = ttm.MultiHeadAttention( - embed_dim=config["hidden_size"], - num_heads=config["num_attention_heads"], - num_kv_heads=config["num_key_value_heads"], - head_dim=hidden_head_dim, + embed_dim=config["embed_dim"], + num_heads=config["num_heads"], + num_kv_heads=config["num_kv_heads"], + head_dim=config["head_dim"], q_proj=nn.Linear( - config["hidden_size"], - config["num_attention_heads"] * config["head_dim"], - bias=config.get('attention_bias', False) + config["embed_dim"], + config["num_heads"] * config["head_dim"], + bias=config["attn_bias"], ), - k_proj = nn.Linear( - config["hidden_size"], - config["num_key_value_heads"] * config["head_dim"], - bias=config.get('attention_bias', False) + k_proj=nn.Linear( + config["embed_dim"], + config["num_kv_heads"] * config["head_dim"], + bias=config["attn_bias"], ), - v_proj = nn.Linear( - config["hidden_size"], - config["num_key_value_heads"] * config["head_dim"], - bias=config.get('attention_bias', False) + v_proj=nn.Linear( + config["embed_dim"], + config["num_kv_heads"] * config["head_dim"], + bias=config["attn_bias"], ), output_proj=nn.Linear( - config["hidden_size"], - config["hidden_size"], - bias=config.get('attention_bias', False) + config["embed_dim"], + config["embed_dim"], + bias=config["attn_bias"], ), - max_seq_len=max_seq_len, - is_causal=is_causal, - attn_dropout=config.get('attention_dropout', 0.0), - pos_embeddings=pos_embeddings + max_seq_len=config["max_seq_len"], + attn_dropout=config["attn_dropout"], + pos_embeddings=rope, ) mlp = MultiLayerPreceptron( - config["hidden_size"], - config['intermediate_size'], - 'silu' + config["embed_dim"], + config["intermediate_dim"], + config["hidden_act"] ) layer = ttm.TransformerSelfAttentionLayer( attn=self_attn, mlp=mlp, - sa_norm=RMSNorm(config["hidden_size"], eps=config["rms_norm_eps"]), - mlp_norm=RMSNorm(config["hidden_size"], eps=config["rms_norm_eps"]) + sa_norm=RMSNorm(config["embed_dim"], eps=config["norm_eps"]), + mlp_norm=RMSNorm(config["embed_dim"], eps=config["norm_eps"]), ) layers.append(layer) + + layers = nn.ModuleList(layers) + tok_embeddings = nn.Embedding(config["vocab_size"], config["embed_dim"]) + # output_proj = ttm.TiedLinear(tok_embeddings) + output_proj = nn.Linear( + config["embed_dim"], + config["vocab_size"], + bias=config["attn_bias"], + ) return ShardTransformerDecoder( - tok_embeddings=embed_tokens, - layers=nn.ModuleList(layers), - max_seq_len=max_seq_len, - num_heads=config["num_attention_heads"], + tok_embeddings=tok_embeddings, + shard=shard, + layers=layers, + max_seq_len=config["max_seq_len"], + num_heads=config["num_heads"], head_dim=config["head_dim"], - norm=RMSNorm(config["hidden_size"], eps=config["rms_norm_eps"]), - output=nn.Linear(config["hidden_size"], config["vocab_size"]), - num_layers=shard.n_layers, - #output_hidden_states=list(range(shard.start_layer, shard.end_layer)), - shard=shard + norm=RMSNorm(config["embed_dim"], eps=config["norm_eps"]), + output=output_proj, + num_layers=config["num_layers"] ) + # return ttm.TransformerDecoder( + # tok_embeddings=tok_embeddings, + # layers=layers, + # max_seq_len=config["max_seq_len"], + # num_heads=config["num_heads"], + # head_dim=config["head_dim"], + # norm=RMSNorm(config["embed_dim"], eps=config["norm_eps"]), + # output=output_proj, + # num_layers=config["num_layers"], + # ) + + class ShardedLlamaModel(nn.Module): - def __init__(self, - config: dict, - shard: Shard, - tokenizer: Any, - device: torch.device=torch.device("cpu"), - is_causal=True, - use_cache=False - ): + def __init__( + self, + config: dict, + shard: Shard, + tokenizer: Any, + device: Optional[torch.device] = None, + max_seq_len: Optional[int] = None + ): super(ShardedLlamaModel, self).__init__() self.tokenizer = tokenizer self.shard = shard self.config = config - self.model = LlamaModel(config, shard, is_causal) - self.device = device - self.use_cache = use_cache + self.dtype = get_torch_dtype(self.config["torch_dtype"]) if "torch_dtype" in self.config else torch.float + self.device = device if device is not None else torch.device("cpu") + self.use_cache = self.config.get("use_cache", False) + self.model = LlamaModel(config, self.shard).to(dtype=self.dtype, device=self.device) + self.max_seq_len = max_seq_len if max_seq_len is not None else 4096 - def generate( - self, - tokens: torch.Tensor, - hidden_state: Optional[torch.Tensor] = None, - max_seq_len: int=4096 - ) -> Tuple[Optional[List[torch.Tensor]], Optional[torch.Tensor]]: + def generate(self, tokens: torch.Tensor, hidden_state: Optional[torch.Tensor] = None) -> Tuple[Optional[List[torch.Tensor]], Optional[torch.Tensor]]: """ Generate logits and/or hidden_states from llama model @@ -245,17 +251,14 @@ def generate( # setup cache if not self.model.caches_are_enabled() and self.use_cache: - self.model.setup_caches(bsz, torch.float, decoder_max_seq_len=self.model.decoder_max_cache_seq_len) + with self.device: + self.model.setup_caches(bsz, self.dtype, decoder_max_seq_len=self.model.decoder_max_cache_seq_len) if not self.shard.is_last_layer(): self.model.output_hidden_states = [self.shard.end_layer] - total_response_length = tokens_length + max_seq_len - resp_max_seq_len = ( - total_response_length - if not self.model.caches_are_enabled() - else self.model.decoder_max_cache_seq_len - ) + total_response_length = tokens_length + self.max_seq_len + resp_max_seq_len = total_response_length if not self.model.caches_are_enabled() else self.model.decoder_max_cache_seq_len # clone tokens generated_tokens = tokens.clone() @@ -263,16 +266,9 @@ def generate( # masking for proper attention padding_masks = generated_tokens != self.tokenizer.pad_id if not padding_masks.all(): - padding_masks = torch.nn.functional.pad( - padding_masks, - (0, max_seq_len), - value=True - ) + padding_masks = torch.nn.functional.pad(padding_masks, (0, self.max_seq_len), value=True) - masks = ttg.get_causal_mask_from_padding_mask( - padding_masks, - target_seq_len=resp_max_seq_len - ) + masks = ttg.get_causal_mask_from_padding_mask(padding_masks, target_seq_len=resp_max_seq_len) input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) else: @@ -285,56 +281,34 @@ def generate( ) ).unsqueeze(0) - input_pos = torch.arange( - 0, total_response_length, device=generated_tokens.device - ).unsqueeze(0) - + input_pos = torch.arange(0, total_response_length, device=generated_tokens.device).unsqueeze(0) + if self.model.caches_are_enabled(): curr_masks = masks[:, :tokens_length] else: curr_masks = masks[:, :tokens_length, :tokens_length] - if hidden_state is not None: - #_, hs_len, _ = hidden_state.size() - #total_hidden_length = hs_len + max_seq_len - #hs_max_seq_len = ( - # total_response_length - # if not self.model.caches_are_enabled() - # else self.model.decoder_max_cache_seq_len - #) - - #hs_mask = torch.tril( - # torch.ones( - # total_hidden_length, - # hs_max_seq_len if hs_max_seq_len is not None else max_seq_len, - # dtype=torch.bool, - # device=tokens.device, - # ) - #).unsqueeze(0) - - #if self.model.caches_are_enabled(): - #hs_curr_masks = hs_mask[:, :hs_len] - #else: - #hs_curr_masks = hs_mask[:, :hs_len, :hs_len] + input_pos = input_pos[:, :tokens_length].squeeze() + if hidden_state is not None: model_output = self.model( tokens=hidden_state, mask=curr_masks, - input_pos=input_pos[:, :tokens_length].squeeze(), + input_pos=input_pos, ) else: model_output = self.model( tokens=tokens, mask=curr_masks, - input_pos=input_pos[:, :tokens_length].squeeze() + input_pos=input_pos, ) print(f"\nmodel_output: {model_output}") if isinstance(model_output, list): model_logits = model_output[1] - model_output.pop() # remove logits - model_hs = model_output[0] # get last hidden state + model_output.pop() # remove logits + model_hs = model_output[0] # get last hidden state else: model_logits = model_output model_hs = None diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 8e4ec14d3..f1a60e100 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -1,6 +1,7 @@ """ Utility methods used by LLMs """ + import re import json from pathlib import Path @@ -10,21 +11,30 @@ import torch.nn as nn import torch.nn.functional as F import torchtune.modules as ttm +from torchtune.models.convert_weights import hf_to_tune import math from safetensors.torch import load_file as load_safetensors -from transformers import ( - LogitsProcessorList, - TopKLogitsWarper, - TopPLogitsWarper, - TemperatureLogitsWarper -) +from transformers import LogitsProcessorList, TopKLogitsWarper, TopPLogitsWarper, TemperatureLogitsWarper from transformers.cache_utils import Cache, DynamicCache from exo.helpers import DEBUG from exo.inference.shard import Shard + +def get_torch_dtype(dtype_str: str) -> torch.dtype: + """ + Get dtype from setting in model's config.json + """ + if dtype_str == "bfloat16": + return torch.bfloat16 + elif dtype_str == "float16": + return torch.float16 + else: + return torch.float16 + + def load_model_config(model_config_path: Path) -> dict: """ Loads the config.json of the model @@ -37,9 +47,28 @@ def load_model_config(model_config_path: Path) -> dict: """ model_config = {} with open(model_config_path, "r") as f: - model_config = json.load(f) + base_config = json.load(f) + + model_config = { + "rope_scaling": base_config.get("rope_scaling"), + "embed_dim": base_config["hidden_size"], + "num_heads": base_config["num_attention_heads"], + "head_dim": base_config["hidden_size"] // base_config["num_attention_heads"], # Assuming embed_dim = hidden_size + "num_kv_heads": base_config["num_key_value_heads"], + "max_seq_len": base_config["max_position_embeddings"], + "intermediate_dim": base_config["intermediate_size"], + "attn_dropout": base_config.get("attention_dropout", 0.0), + "norm_eps": base_config["rms_norm_eps"], + "rope_base": base_config["rope_theta"], + "vocab_size": base_config["vocab_size"], + "num_layers": base_config["num_hidden_layers"], + "attn_bias": base_config.get("attention_bias", False), + "hidden_act": base_config.get("hidden_act", "silu") + } + return model_config + def check_weights(model, state_dict): """ Verifies that the weights from the state dictionary are properly loaded into the model. @@ -53,11 +82,12 @@ def check_weights(model, state_dict): print(f"Shape mismatch for {name}: expected {param.shape}, got {loaded_param.shape}") else: print(f"{name}: loaded correctly") - + for name in state_dict: if name not in model_state_dict: print(f"Unexpected weight {name} found in state_dict") + def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): """ Loads weights from huggingface and changes it to match torchtune naming structure @@ -70,231 +100,91 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): # Load weights from each found safetensors file paried_lmhead = True shard_layer_range = list(range(shard.start_layer, shard.end_layer)) + + full_state_dict = None for safetensor_file in safetensors_files: state_dict = load_safetensors(safetensor_file) - # remap to work with our model - remapped_state_dict = {} - paried_embed_weight = None - for key, value in state_dict.items(): - # load layer by shard - lnrgx = re.findall(r'model\.layers\.(\d+).*', key) - layer_num = int(lnrgx[0]) if len(lnrgx) > 0 else None - if layer_num in shard_layer_range: - # change input layer norm to sa_norm for torchtune - re_iln = re.findall( - rf'model.layers\.{layer_num}\.(input_layernorm)\.weight', key) - if len(re_iln) != 0: - remapped_state_dict[f"model.layers.{layer_num}.sa_norm.weight"] = value - - # change post attention layernorm to mlp_norm for torchtune - re_pal = re.findall( - rf'model.layers\.{layer_num}\.(post_attention_layernorm)\.weight', key) - if len(re_pal) != 0: - remapped_state_dict[f"model.layers.{layer_num}.mlp_norm.weight"] = value - - # change self_attn to attn - # along with changing o_proj to output_proj - re_attn = re.findall(rf'model\.layers\.{layer_num}.(\w+)\.(\w+)\.(\w+)', key) - if len(re_attn) != 0 and re_attn[0][0] == "self_attn": - if re_attn[0][1] == "o_proj": - remapped_state_dict[f"model.layers.{layer_num}.attn.output_proj.weight"] = value - else: - remapped_state_dict[f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}"] = value - - # saving embed for paired weights - elif key == 'model.embed_tokens.weight': - paried_embed_weight = value - # change name for torchtune - remapped_state_dict['model.tok_embeddings.weight'] = value - - elif key == 'lm_head.weight': - paried_lmhead = False - - # get everything else except layers, embed_tokens and lm_head - if ( - len(re.findall(r'model\.layers\..*', key)) == 0 - and key != "model.embed_tokens.weight" - and key != "lm_head.weight" - ): - remapped_state_dict[key] = value - - if paried_lmhead: - remapped_state_dict['model.output.weight'] = paried_embed_weight - - model.load_state_dict(remapped_state_dict, strict=False) - - #if DEBUG >= 7: - print("\n--- checking weights ----\n") - print(f"\nremapped_state_dict: {remapped_state_dict.keys()}\n") - check_weights(model, remapped_state_dict) - -def hf_logit_sample( - logits, - input_ids, - use_max: bool=False, - top_k: int=0, - top_p: float=0.9, - temp: float=1.0, -) -> torch.Tensor: - """ - Logit sampling using transformers - """ - logits_processor = LogitsProcessorList([ - TopKLogitsWarper(top_k), - TemperatureLogitsWarper(temp), - TopPLogitsWarper(top_p) - ]) - - # get a single cloned logit - logits = logits[:, -1, :].clone().float() - - next_token_scores = logits_processor(input_ids, logits) - - if not use_max: - probs = nn.functional.softmax(next_token_scores, dim=-1) - next_token = torch.multinomial(probs, num_samples=1) - else: - next_token = torch.argmax(next_token_scores, dim=-1) - - if DEBUG >= 4: - print(f"input_ids: {input_ids}") - print(f"next_token: {next_token}") - - return next_token[:, None].squeeze(-1) - -def create_4d_causal_attention_mask( - attention_mask: torch.Tensor, - seq_len: int, - target_len: int, - dtype: torch.dtype, - device: torch.device, - cache_pos: torch.Tensor, - batch_size: int, -) -> torch.Tensor: - """ - Creates a 4D causal attention mask from a 2D mask - - Args: - attention_mask (torch.Tensor): - A 2D tensor of shape (batch_size, key_value_length) or a 4D tensor of shape - (batch_size, 1, query_length, key_value_length). - seq_len (int): - Sequence length of the input being processed. - target_len (int): - Target length to generate the causal mask. - dtype (torch.dtype): - Data type for the causal mask. - device (torch.device): - Device to place the causal mask on. - cache_pos (torch.Tensor): - Cache position indices indicating the position of the input tokens in the sequence. - batch_size (int): - Number of samples in the batch. - - Returns: - torch.Tensor: - A 4D causal mask of shape (batch_size, 1, query_length, key_value_length). - """ - if attention_mask is not None and attention_mask.dim() == 4: - # If the mask is already 4D, return it directly - return attention_mask - - min_value = torch.finfo(dtype).min - - # Create a 2D causal mask of shape (seq_len, target_len) - causal_mask = torch.full( - (seq_len, target_len), fill_value=min_value, dtype=dtype, device=device - ) - - if seq_len != 1: - # Mask positions after the current position - causal_mask = torch.triu(causal_mask, diagonal=1) - - # Adjust causal mask for cache position - causal_mask *= (torch.arange(target_len, device=device) > cache_pos.view(-1, 1)) - - # Expand to 4D and batch size - causal_mask = causal_mask[None, None, :, :].expand(batch_size, 1, -1, -1) - - # Create a padding mask based on the input attention_mask - mask_len = attention_mask.shape[-1] - causal_mask = causal_mask.clone() # Ensure contiguous memory for in-place operations - padding_mask = causal_mask[:, :, :, :mask_len] + attention_mask[:, None, None, :] - padding_mask = padding_mask == 0 - - # Apply padding to the causal mask - causal_mask[:, :, :, :mask_len] = causal_mask[:, :, :, :mask_len].masked_fill( - padding_mask, min_value - ) - - return causal_mask - -def rotate_half(x): - """Rotates half the hidden dims of the input.""" - x1 = x[..., : x.shape[-1] // 2] - x2 = x[..., x.shape[-1] // 2 :] - return torch.cat((-x2, x1), dim=-1) - -class RotaryEmbedding(nn.Module): - """ - Rotary Position Embedding. - - This computes the inverse frequencies according to the original RoPE implementation. - There are other implementations that will be added. - Ref: https://github.com/huggingface/transformers/blob/main/src/transformers/modeling_rope_utils.py - """ - - def __init__(self, dim, max_position_embeddings=2048, base=10000, scaling_factor=1.0): - super().__init__() - self.dim = dim - self.max_position_embeddings = max_position_embeddings - self.base = base - self.scaling_factor = scaling_factor - - # Initialize the inverse frequency for RoPE - inv_freq = 1.0 / (self.base ** (torch.arange(0, dim, 2, dtype=torch.int64).float() / dim)) - self.register_buffer("inv_freq", inv_freq, persistent=False) - - @torch.no_grad() - def forward(self, x, position_ids) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Compute the rotary position embeddings (cos, sin) for the given input tensor. - - Args: - x (torch.Tensor): The input tensor of shape (batch_size, seq_len, num_heads, head_dim). - position_ids (torch.Tensor): The position indices for the sequence. - - Returns: - Tuple[torch.Tensor, torch.Tensor]: The cos and sin embeddings. - """ - # Expand inv_freq to match the batch size - inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.size(0), -1, 1) - position_ids_expanded = position_ids[:, None, :].float() - - # Compute cos and sin embeddings - device_type = x.device.type - device_type = device_type if isinstance(device_type, str) and device_type != "mps" else "cpu" - with torch.autocast(device_type=device_type, enabled=False): - freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2) - emb = torch.cat((freqs, freqs), dim=-1) - cos = emb.cos() - sin = emb.sin() - - # Apply the scaling factor to cos and sin embeddings - cos = cos * self.scaling_factor - sin = sin * self.scaling_factor + if full_state_dict is not None: + full_state_dict = full_state_dict | state_dict + else: + full_state_dict = state_dict + + # remap to work with our model + remapped_state_dict = {} + paried_embed_weight = None + for key, value in full_state_dict.items(): + # load layer by shard + lnrgx = re.findall(r"model\.layers\.(\d+).*", key) + layer_num = int(lnrgx[0]) if len(lnrgx) > 0 else None + if layer_num in shard_layer_range: + # change input layer norm to sa_norm for torchtune + re_iln = re.findall(rf"model.layers\.{layer_num}\.(input_layernorm)\.weight", key) + if len(re_iln) != 0: + new_key = f"model.layers.{layer_num}.sa_norm.weight" + # print(f"{key} == {new_key}") + remapped_state_dict[new_key] = value + + # change post attention layernorm to mlp_norm for torchtune + re_pal = re.findall(rf"model.layers\.{layer_num}\.(post_attention_layernorm)\.weight", key) + if len(re_pal) != 0: + new_key = f"model.layers.{layer_num}.mlp_norm.weight" + # print(f"{key} == {new_key}") + remapped_state_dict[new_key] = value + + # change self_attn to attn + # along with changing o_proj to output_proj + re_attn = re.findall(rf"model\.layers\.{layer_num}.(\w+)\.(\w+)\.(\w+)", key) + if len(re_attn) != 0 and re_attn[0][0] == "self_attn": + if re_attn[0][1] == "o_proj": + new_key = f"model.layers.{layer_num}.attn.output_proj.weight" + # print(f"{key} == {new_key}") + remapped_state_dict[new_key] = value + else: + new_key = f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}" + # print(f"{key} == {new_key}") + remapped_state_dict[new_key] = value + + # set mlp weights + re_mlp = re.findall(rf"model\.layers\.{layer_num}.mlp.(\w+)\.(\w+)", key) + if len(re_mlp) != 0: + new_key = f"model.layers.{layer_num}.mlp.{re_mlp[0][0]}.{re_mlp[0][1]}" + # print(f"load mlp {key}") + remapped_state_dict[new_key] = value + + # saving embed for paired weights + elif key == "model.embed_tokens.weight": + paried_embed_weight = value + # change name for torchtune + # print("model.embed_tokens.weight == model.tok_embeddings.weight") + remapped_state_dict["model.tok_embeddings.weight"] = value + + elif key == "lm_head.weight": + paried_lmhead = False + + # get everything else except layers, embed_tokens and lm_head + if len(re.findall(r"model\.layers\..*", key)) == 0 and key != "model.embed_tokens.weight" and key != "lm_head.weight": + # print(f"loading other weight: {key}") + remapped_state_dict[key] = value + + if paried_lmhead: + # print(f"model.output.weight: {paried_embed_weight}") + remapped_state_dict["model.output.weight"] = paried_embed_weight + + # print("\nRemapped state dict\n") + # for rsdk in remapped_state_dict.keys(): + # print(f"-- {rsdk}") + + model.load_state_dict(remapped_state_dict, strict=False) + + # if DEBUG >= 7: + # print("\n--- checking weights ----\n") + # print(f"\nremapped_state_dict: {remapped_state_dict.keys()}\n") + # check_weights(model, remapped_state_dict) - return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype) class MultiLayerPreceptron(nn.Module): - def __init__( - self, - input_dim, - hidden_dim, - activation='silu', - use_bias=False - ): + def __init__(self, input_dim, hidden_dim, activation="silu", use_bias=False): """ General MLP (Multi-Layer Perceptron) module. @@ -303,24 +193,24 @@ def __init__( hidden_dims (int): Hidden layer/intermediate dimensions. output_dim (int): Dimensionality of the output. activation (str): Activation function ('relu', 'gelu', 'tanh', 'sigmoid', etc.). - dropout (float): Dropout probability. - use_batchnorm (bool): Whether to use batch normalization. + use_bias (bool): Use bias with linearization """ super(MultiLayerPreceptron, self).__init__() # Activation function mapping activations = { - 'relu': nn.ReLU(), - 'gelu': nn.GELU(), - 'tanh': nn.Tanh(), - 'sigmoid': nn.Sigmoid(), - 'leaky_relu': nn.LeakyReLU(0.2), - 'silu': nn.SiLU() + "relu": nn.ReLU(), + "gelu": nn.GELU(), + "tanh": nn.Tanh(), + "sigmoid": nn.Sigmoid(), + "leaky_relu": nn.LeakyReLU(0.2), + "silu": nn.SiLU() } # Ensure valid activation if activation not in activations: - raise ValueError(f"Invalid activation: {activation}. Choose from {list(activations.keys())}") + raise ValueError( + f"Invalid activation: {activation}. Choose from {list(activations.keys())}") # Construct MLP layers self.gate_proj = nn.Linear(input_dim, hidden_dim, bias=use_bias) @@ -329,341 +219,22 @@ def __init__( self.act_fn = activations[activation] def forward(self, x) -> torch.Tensor: - """ - Forward pass for the MLP module. - - Args: - x (torch.Tensor): Input tensor. - - Returns: - torch.Tensor: Output tensor after the MLP transformations. - """ + return self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x)) - return self.down_proj( - self.act_fn( - self.gate_proj(x) - ) * self.up_proj(x) - ) class RMSNorm(nn.Module): - def __init__(self, hidden_size, eps=1e-6): - """ - RMSNorm - """ - super().__init__() - self.weight = nn.Parameter(torch.ones(hidden_size)) - self.eps = eps - - def forward(self, hidden_states): - input_dtype = hidden_states.dtype - hidden_states = hidden_states.to(torch.float32) - variance = hidden_states.pow(2).mean(-1, keepdim=True) - hidden_states = hidden_states * torch.rsqrt(variance + self.eps) - return self.weight * hidden_states.to(input_dtype) - -# ------------------ -# Attention Methods -# ------------------ - -class MultiHeadAttention(nn.Module): - """ - Multi-headed attention mechanism. - - Using the "attention is all you need" implementation. Other implementations will follow. - Ref: https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L277 - Ref: https://pytorch.org/torchtune/0.3/_modules/torchtune/modules/attention.html - """ - - def __init__( - self, - hidden_size, - num_heads, - num_kv_heads, - head_dim, - rotary_emb, - attention_dropout=0.0, - is_causal=True, - attention_bias=False - ): - super().__init__() - self.hidden_size = hidden_size - self.num_heads = num_heads - self.num_kv_heads = num_kv_heads - self.head_dim = head_dim - self.attention_dropout = attention_dropout - self.is_causal = is_causal - - # nn layers - self.q_proj = nn.Linear(hidden_size, num_heads * head_dim, bias=attention_bias) - self.k_proj = nn.Linear(hidden_size, num_kv_heads * head_dim, bias=attention_bias) - self.v_proj = nn.Linear(hidden_size, num_kv_heads * head_dim, bias=attention_bias) - self.o_proj = nn.Linear(num_heads * head_dim, hidden_size, bias=attention_bias) - self.rotary_emb = rotary_emb - - def forward( - self, - hidden_states: torch.Tensor, - position_ids: torch.Tensor, - attention_mask: Optional[torch.Tensor] = None, - position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, - kv_cache: Optional[ttm.KVCache] = None, - cos_sin_unsqueeze: int=1 - ) -> Tuple[torch.Tensor, Optional[ttm.KVCache]]: - batch_size, seq_len, _ = hidden_states.size() - - # Project to queries, keys, and values - query_states = self.q_proj(hidden_states) - key_states = self.k_proj(hidden_states) - value_states = self.v_proj(hidden_states) - print(f"query_states: {query_states.shape}") - print(f"key_states: {key_states.shape}") - print(f"value_states: {value_states.shape}") - - # Reshape to [batch_size, num_heads, seq_len, head_dim] - query_states = query_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2) - key_states = key_states.view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2) - value_states = value_states.view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2) - print(f"query_states: {query_states.shape}") - print(f"key_states: {key_states.shape}") - print(f"value_states: {value_states.shape}") - - # Apply rotary positional embeddings if position_ids are provided - # or use position_embeddings - if position_embeddings is not None: - cos, sin = position_embeddings - else: - cos, sin = self.rotary_emb(value_states, position_ids) - - print(f"cos: {cos.shape} | sin: {sin.shape}") - # Expand cos and sin to match hidden_states' shape - cos = cos.unsqueeze(cos_sin_unsqueeze) - sin = sin.unsqueeze(cos_sin_unsqueeze) - print(f"cos: {cos.shape} | sin: {sin.shape}") - - # Apply rotary embeddings to queries and keys - query_states = (query_states * cos) + (rotate_half(query_states) * sin) - key_states = (key_states * cos) + (rotate_half(key_states) * sin) - print(f"query_states: {query_states.shape}") - print(f"key_states: {key_states.shape}") - print(f"value_states: {value_states.shape}") - - # Forcing caching always enabled - if kv_cache is not None: - #print(f"kv_cache.size {kv_cache.size}") - - #print(f"key_states.size(2) {key_states.size(2)}") - - #if kv_cache.size != key_states.size(2): - # print(f"\n MAKE NEW KVCACHE batch_size={key_states.size(0)} max_seq_len={key_states.size(2)}") - # kv_cache = ttm.KVCache( - # batch_size=key_states.size(0), - # max_seq_len=key_states.size(2), - # num_heads=self.num_kv_heads, - # head_dim=self.head_dim, - # dtype=hidden_states.dtype - # ) - - key_states, value_states = kv_cache.update(key_states, value_states) - print(f"kv_cache: {kv_cache.size}") - print(f"key_states: {key_states.shape}") - print(f"value_states: {value_states.shape}") - - # Repeat keys and values if needed - #if self.num_heads > self.num_kv_heads: - n_rep = self.num_heads // self.num_kv_heads - key_states = torch.repeat_interleave(key_states, n_rep, dim=1) - value_states = torch.repeat_interleave(value_states, n_rep, dim=1) - - print(f"query_states: {query_states.shape}") - print(f"key_states: {key_states.shape}") - print(f"value_states: {value_states.shape}") - - # Compute attention scores - attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim) - print(f"attn_weights: {attn_weights.shape}") - - # Apply attention mask, if provided - if attention_mask is not None: - print(f"attention_mask: {attention_mask.shape}") - causal_mask = attention_mask[:, :, :, : key_states.shape[-2]] - print(f"causal_mask: {causal_mask.shape}") - attn_weights = attn_weights + causal_mask - print(f"attn_weights: {attn_weights.shape}") - - # Softmax normalization - attn_weights = F.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) - attn_weights = F.dropout(attn_weights, p=self.attention_dropout, training=self.training) - print(f"attn_weights: {attn_weights.shape}") - - # Compute attention output - attn_output = torch.matmul(attn_weights, value_states) - print(f"attn_output: {attn_output.shape}") - - # Transpose attention output - attn_output = attn_output.transpose(1,2).contiguous() - print(f"attn_output: {attn_output.shape}") - - # Reshape [batch_size, seq_len, -1] - attn_output = attn_output.reshape(batch_size, seq_len, -1) - print(f"attn_output after transpose: {attn_output.shape}") - - # Project back to hidden size - attn_output = self.o_proj(attn_output) - print(f"attn_output: {attn_output.shape}") - - return attn_output, kv_cache - -class SDPAttention(nn.Module): - """ - Scaled dot product attention mechanism. - - Using the scaled dot product attention method from pytorch - Ref: https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L524 - """ - - def __init__( - self, - hidden_size, - num_heads, - num_kv_heads, - head_dim, - rotary_emb, - attention_dropout=0.0, - is_causal=True, - attention_bias=False, - kv_max_seq_len=2048 - ): + def __init__(self, hidden_size, eps=1e-6): + """ + RMSNorm + designed for llama model but used for other models + """ super().__init__() - self.hidden_size = hidden_size - self.num_heads = num_heads - self.num_kv_heads = num_kv_heads - self.head_dim = head_dim - self.attention_dropout = attention_dropout - self.is_causal = is_causal - self.kv_max_seq_len = kv_max_seq_len - - # nn layers - self.q_proj = nn.Linear(hidden_size, num_heads * head_dim, bias=attention_bias) - self.k_proj = nn.Linear(hidden_size, num_kv_heads * head_dim, bias=attention_bias) - self.v_proj = nn.Linear(hidden_size, num_kv_heads * head_dim, bias=attention_bias) - self.o_proj = nn.Linear(num_heads * head_dim, hidden_size, bias=attention_bias) - self.rotary_emb = rotary_emb - - def forward( - self, - hidden_states: torch.Tensor, - position_ids: torch.Tensor, - attention_mask: Optional[torch.Tensor] = None, - position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, - kv_cache: Optional[ttm.KVCache] = None, - cos_sin_unsqueeze: int=1 - ) -> Tuple[torch.Tensor, Optional[ttm.KVCache]]: - batch_size, seq_len, _ = hidden_states.size() - - # Project to queries, keys, and values - query_states = self.q_proj(hidden_states) - key_states = self.k_proj(hidden_states) - value_states = self.v_proj(hidden_states) - print(f"query_states: {query_states.shape}") - print(f"key_states: {key_states.shape}") - print(f"value_states: {value_states.shape}") - - # Reshape to [batch_size, num_heads, seq_len, head_dim] - query_states = query_states.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2) - key_states = key_states.view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2) - value_states = value_states.view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2) - print(f"query_states: {query_states.shape}") - print(f"key_states: {key_states.shape}") - print(f"value_states: {value_states.shape}") - - # Apply rotary positional embeddings if position_ids are provided - # or use position_embeddings - if position_embeddings is not None: - cos, sin = position_embeddings - else: - cos, sin = self.rotary_emb(value_states, position_ids) - - print(f"cos: {cos.shape} | sin: {sin.shape}") - # Expand cos and sin to match hidden_states' shape - cos = cos.unsqueeze(cos_sin_unsqueeze) - sin = sin.unsqueeze(cos_sin_unsqueeze) - print(f"cos: {cos.shape} | sin: {sin.shape}") - - # Apply rotary embeddings to queries and keys - query_states = (query_states * cos) + (rotate_half(query_states) * sin) - key_states = (key_states * cos) + (rotate_half(key_states) * sin) - print(f"query_states: {query_states.shape}") - print(f"key_states: {key_states.shape}") - print(f"value_states: {value_states.shape}") - - # Caching - if kv_cache is not None: - if kv_cache.size >= self.max_seq_len: - # double the cache each time space is ran out - self.kv_max_seq_len = self.kv_max_seq_len + self.kv_max_seq_len - - kv_cache = ttm.KVCache( - batch_size=key_states.size(0), - max_seq_len=self.kv_max_seq_len, - num_heads=self.num_kv_heads, - head_dim=self.head_dim, - dtype=hidden_states.dtype - ) - - key_states, value_states = kv_cache.update(key_states, value_states) - - # **Slice KVCache to match `query_states` length** - key_states = key_states[:, :, :seq_len, :] - value_states = value_states[:, :, :seq_len, :] - - # kv_cache.update(key_states, value_states) - print(f"kv_cache: {kv_cache.size}") - print(f"from kv_cache / key_states: {key_states.shape}") - print(f"from kv_cache / value_states: {value_states.shape}") - - # Repeat keys and values if needed - #if self.num_heads > self.num_kv_heads: - n_rep = self.num_heads // self.num_kv_heads - key_states = torch.repeat_interleave(key_states, n_rep, dim=1) - value_states = torch.repeat_interleave(value_states, n_rep, dim=1) - - print(f"query_states: {query_states.shape}") - print(f"key_states: {key_states.shape}") - print(f"value_states: {value_states.shape}") - - causal_mask = attention_mask - if causal_mask is not None: - causal_mask = causal_mask[:, :, :, : key_states.shape[-2]] - print(f"causal_mask: {causal_mask.shape}") - - if query_states.device.type == "cuda" and causal_mask is not None: - query_states = query_states.contiguous() - key_states = key_states.contiguous() - value_states = value_states.contiguous() - - print(f"query_states: {query_states.shape}") - print(f"key_states: {key_states.shape}") - print(f"value_states: {value_states.shape}") - - is_causal = True if causal_mask is None and seq_len > 1 else False - - attn_output = F.scaled_dot_product_attention( - query_states, - key_states, - value_states, - attn_mask=causal_mask, - dropout_p=0.0, - is_causal=is_causal, - ) - - print(f"attn_output: {attn_output.shape}") - - attn_output = attn_output.transpose(1, 2).contiguous() - attn_output = attn_output.view(batch_size, seq_len, -1) - - attn_output = self.o_proj(attn_output) - - print(f"attn_output: {attn_output.shape}") - - return attn_output, kv_cache - + self.weight = nn.Parameter(torch.ones(hidden_size)) + self.eps = eps + + def forward(self, hidden_states): + input_dtype = hidden_states.dtype + hidden_states = hidden_states.to(torch.float32) + variance = hidden_states.pow(2).mean(-1, keepdim=True) + hidden_states = hidden_states * torch.rsqrt(variance + self.eps) + return self.weight * hidden_states.to(input_dtype) \ No newline at end of file diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py new file mode 100644 index 000000000..7b8e7bba3 --- /dev/null +++ b/exo/inference/torch/pt_inference.py @@ -0,0 +1,5 @@ +""" +TorchDynamicShardInferenceEngine +Sharded inference engine using PyTorch based torchtune models +""" + diff --git a/exo/inference/torch/tests/test_inference_engine.py b/exo/inference/torch/tests/test_inference_engine.py index 2b72b8592..c7230c894 100644 --- a/exo/inference/torch/tests/test_inference_engine.py +++ b/exo/inference/torch/tests/test_inference_engine.py @@ -5,7 +5,7 @@ import asyncio from exo.inference.shard import Shard -from exo.inference.torch.inference import TorchDynamicShardInferenceEngine +from exo.inference.torch.hf_inference import HFDynamicShardInferenceEngine from exo.download.hf.hf_shard_download import HFShardDownloader from exo.inference.inference_engine import InferenceEngine @@ -119,8 +119,8 @@ async def test_inference_engine( try: print("\n\n -------- TEST Qwen/Qwen2-0.5B-Instruct -------- \n\n") asyncio.run(test_inference_engine( - TorchDynamicShardInferenceEngine(HFShardDownloader()), - TorchDynamicShardInferenceEngine(HFShardDownloader()), + HFDynamicShardInferenceEngine(HFShardDownloader()), + HFDynamicShardInferenceEngine(HFShardDownloader()), "Qwen/Qwen2-0.5B-Instruct", 36 )) diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py new file mode 100644 index 000000000..a981db776 --- /dev/null +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -0,0 +1,121 @@ +""" +Test of pytorch based llama3 models +full layer run +""" + +from pathlib import Path +import torch +from huggingface_hub import snapshot_download + +import torchtune.generation as ttg +from torchtune.models import llama3 +from torchtune.data import Message + + +from exo.inference.torch.models.llama3 import ShardedLlamaModel +from exo.inference.shard import Shard + +from exo.inference.torch.models.llm_utils import ( + load_model_config, + load_model_weights_torchtune, +) + + +MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" +TEMP = 0.6 +TOP_K = 300 +MAX_GEN_TOKENS = 50 + +def main(model, prompt: str, device: torch.device=torch.device("cpu")): + # Tokenize input text + messages = [] + messages.extend([ + Message(role="system", content="You are a helpful and creative AI assistant."), + Message(role="user", content=prompt), + # Empty assistant message to kick-start generation + Message(role="assistant", content=""), + ]) + + tokenizer_out = llama_tokenizer({"messages": messages}, inference=True) + print(f"tokenizer_out: {tokenizer_out}") + tokens = torch.tensor(tokenizer_out["tokens"], dtype=torch.int, device=device) + + _, logits = model.generate(tokens=tokens) + + tokens = ttg.sample(logits=logits[:, -1].clone(), temperature=TEMP, top_k=TOP_K) + + print(f"tokens: {tokens}") + + generated_tokens = tokens.clone().tolist() + print(f"generated_tokens: {generated_tokens}") + print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(generated_tokens[0])}\n\n\n") + + +def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu")): + # Tokenize input text + messages = [] + messages.extend([ + Message(role="system", content="You are a helpful and creative AI assistant."), + Message(role="user", content=user_prompt), + # Empty assistant message to kick-start generation + Message(role="assistant", content=""), + ]) + + tokenizer_out = llama_tokenizer({"messages": messages}, inference=True) + prompt = torch.tensor(tokenizer_out["tokens"], dtype=torch.int, device=device) + print(f"tokens prompt: {prompt}") + print(f"pad_id: {llama_tokenizer.pad_id}") + + generated_tokens, _ = ttg.generate( + model=model.model, + prompt=prompt, + max_generated_tokens=MAX_GEN_TOKENS, + pad_id=llama_tokenizer.pad_id, + temperature=TEMP, + top_k=TOP_K, + stop_tokens=llama_tokenizer.stop_tokens, + ) + generated_tokens = generated_tokens[:, -MAX_GEN_TOKENS:].tolist() + + print(f"generated_tokens: {generated_tokens}") + + print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(generated_tokens[0])}\n\n\n") + + +if __name__ == "__main__": + # prompt = "hello" + prompt = "What is the capital of france?" + + # Get the path to the model files from the Hugging Face cache + cache_dir = Path(snapshot_download(MODEL_NAME)) + print(f"Cache directory: {cache_dir}") + + # Load model configuration + config = load_model_config(cache_dir / "config.json") + + print(f"current config\n{config}") + + # Setup shard + n_layers = int(config["num_layers"]) + shard_1 = Shard( + model_id=MODEL_NAME, + start_layer=0, + end_layer=n_layers-1, + n_layers=n_layers, + ) + + # Initialize tokenizer + llama_tokenizer_path = f"{cache_dir}/original/tokenizer.model" + llama_tokenizer = llama3.llama3_tokenizer(path=llama_tokenizer_path) + print(llama_tokenizer.stop_tokens) + + # Initialize LlamaModel with config and tokenizer + # device = torch.device("cuda") + device = None + shard_model_1 = ShardedLlamaModel(config, shard_1, llama_tokenizer, device=device) + print(f"\nshard_model_1: {shard_model_1}") + + load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) + + # main(shard_model_1, prompt, device) + normal_full(shard_model_1, prompt, device) diff --git a/exo/inference/torch/tests/test_llama3_model.py b/exo/inference/torch/tests/test_llama3_model.py deleted file mode 100644 index 1b16d91f0..000000000 --- a/exo/inference/torch/tests/test_llama3_model.py +++ /dev/null @@ -1,265 +0,0 @@ -""" -Test of pytorch based llama3 model -""" -from pathlib import Path -import gc -import time -import torch -from transformers import AutoTokenizer -from huggingface_hub import snapshot_download - -import torchtune.generation as ttg -from torchtune.models import llama3 -from torchtune.data import Message - - -from exo.inference.torch.models.llama3 import ShardedLlamaModel -from exo.inference.shard import Shard - -from exo.inference.torch.models.llm_utils import ( - load_model_config, - load_model_weights_torchtune, -) - - -MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" -TEMP=0.6 -TOP_K=35 -TOP_P=0.9 -MAX_SEQ_LEN=2048 - -def test_generation_1(shard_model, text): - """ - Test the generation capabilities of the LlamaModel with sample text. - """ - # Tokenize input text - messages = [] - messages.extend( - [ - Message(role="user", content=text), - # Empty assistant message to kick-start generation - Message(role="assistant", content=""), - ] - ) - - tokenizer_out = llama_tokenizer({"messages": messages}, inference=True) - print(f"tokenizer_out: {tokenizer_out}") - tokens = tokenizer_out["tokens"] - prompt = torch.tensor(tokens, dtype=torch.int) - - hidden_states, logits = shard_model.generate(prompt) - - if hidden_states is not None: - print(f"hidden_states[{len(hidden_states)}]: {hidden_states}") - - if logits is not None: - print(f"logits: {logits.shape}\n{logits}") - - return hidden_states, logits, prompt - #if prompt.ndim == 1: - # prompt = prompt.view(1, -1) - - #bsz, prompt_length = prompt.size() - #total_response_length = prompt_length + MAX_SEQ_LEN - #generated_tokens = prompt.clone() - #resp_max_seq_len = ( - # total_response_length - # if not shard_model.model.caches_are_enabled() - # else shard_model.model.decoder_max_cache_seq_len - #) - - ## masking for proper attention - #padding_masks = prompt != llama_tokenizer.pad_id - #if not padding_masks.all(): - # padding_masks = torch.nn.functional.pad( - # padding_masks, - # (0, MAX_SEQ_LEN), - # value=True - # ) - - # masks = ttg.get_causal_mask_from_padding_mask( - # padding_masks, - # target_seq_len=resp_max_seq_len - # ) - - # input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) - #else: - # masks = torch.tril( - # torch.ones( - # total_response_length, - # resp_max_seq_len if resp_max_seq_len is not None else MAX_SEQ_LEN, - # dtype=torch.bool, - # device=prompt.device, - # ) - # ).unsqueeze(0) - - # input_pos = torch.arange( - # 0, total_response_length, device=prompt.device - # ).unsqueeze(0) - - #if shard_model.model.caches_are_enabled(): - # curr_masks = masks[:, :prompt_length] - #else: - # curr_masks = masks[:, :prompt_length, :prompt_length] - - #rand_sample = torch.empty( - # ( - # prompt.size(0), - # self.model.tok_embeddings.num_embeddings - # ), device=prompt.device - #).exponential_(1, generator=None) - - #print(f"padding_masks: {padding_masks.shape}") - #print(padding_masks.all()) - - ## this can be sepearted out for dist inference - ## see https://github.com/pytorch/torchtune/blob/bc4acc19ffab2366a14468c97294992dbb7c50d1/torchtune/generation/_generation.py#L66 - #next_token, gen_logits = ttg.generate_next_token( - # shard_model.model, - # input_pos=input_pos[:, :prompt_length].squeeze(), - # x=prompt, - # mask=curr_masks, - # q=rand_sample - #) - - #print(f"next_token: {next_token}") - - #generated_tokens = torch.cat([generated_tokens, next_token], dim=-1) - - #print(f"generated_tokens: {generated_tokens}") - - #curr_pos = prompt_length - - ## stop tokens logic - #stop_tokens = None - #stop_token_reached = torch.zeros(bsz, dtype=torch.bool, device=prompt.device) - #stop_tokens = ( - # torch.tensor(stop_tokens, device=prompt.device, dtype=tokens.dtype) - # if stop_tokens - # else None - #) - #stop_token_mask = torch.ones( - # (bsz, prompt_length + 1), dtype=torch.int32, device=prompt.device - #) - - ## finish writing stop token logic using torchtune generation - ## ref https://github.com/pytorch/torchtune/blob/main/torchtune/generation/_generation.py#L337 - - #for _ in range(max_length): - - # if shard_model.model.caches_are_enabled(): - # curr_input_pos = input_pos[:, curr_pos] - # curr_masks = masks[:, curr_pos, None, :] - # else: - # tokens = generated_tokens.clone() - # curr_input_pos = input_pos[:, : curr_pos + 1] - # curr_masks = masks[:, : curr_pos + 1, : curr_pos + 1] - - #generated_tokens = generated_tokens.tolist() - #print(f"resp: {llama_tokenizer.decode(generated_tokens[0])}") - -def test_generation_2(shard_model, tokens, hidden_state): - print("Generate with the rest of layers") - hidden_states, logits = shard_model.generate( - tokens=tokens, - hidden_state=hidden_state - ) - - if hidden_states is not None: - print(f"hidden_states {hidden_states.shape}: {hidden_states}") - - if logits is not None: - print(f"logits: {logits.shape}\n{logits}") - - rand_sample = torch.empty( - ( - logits.size(0), - shard_model.model.tok_embeddings.num_embeddings - ), device=logits.device - ).exponential_(1, generator=None) - - logit = ttg.sample( - logits=logits[:, -1].clone(), - temperature=TEMP, - top_k=TOP_K, - q=rand_sample - ) - - print(f"logit: {logit}") - - generated_tokens = tokens.clone() - generated_tokens = torch.cat([generated_tokens, logit.squeeze(-1)], dim=-1).tolist() - - print(f"generated_tokens: {generated_tokens}") - - print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(logit.squeeze(-1).tolist())}\n\n\n") - - return hidden_states, logits - -if __name__ == "__main__": - print("\nTesting generation:") - # Get the path to the model files from the Hugging Face cache - cache_dir = Path(snapshot_download(MODEL_NAME)) - print(f"Cache directory: {cache_dir}") - - # Load model configuration - config = load_model_config(cache_dir / "config.json") - - print(f"current config\n{config}") - - # Setup shard - s1_end = int(int(config["num_hidden_layers"])/2) - shard_1 = Shard( - model_id=MODEL_NAME, - start_layer=0, - end_layer=s1_end, - n_layers=int(config["num_hidden_layers"]) - ) - - s2_start = s1_end + 1 - s2_end = shard_1.n_layers - 1 - shard_2 = Shard( - model_id=MODEL_NAME, - start_layer=s2_start, - end_layer=s2_end, - n_layers=int(config["num_hidden_layers"]) - ) - - # Initialize tokenizer - llama_tokenizer_path = f"{cache_dir}/original/tokenizer.model" - llama_tokenizer = llama3.llama3_tokenizer(path=llama_tokenizer_path) - #tokenizer = AutoTokenizer.from_pretrained( - # MODEL_NAME, - # add_eos_token=True - #) - - # Initialize LlamaModel with config and tokenizer - shard_model_1 = ShardedLlamaModel(config, shard_1, llama_tokenizer, use_cache=True) - print(f"\nshard_model_1: {shard_model_1}") - load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) - - # Sample text for testing - #prompt = "First letter in the word 'Red'" - prompt = "GM, say it back" - shard_1_hs, shard_1_logits, shard_1_tokens = test_generation_1(shard_model_1, prompt) - - print(f"shard_1_hs:\n{shard_1_hs}") - print(f"shard_1_logits:\n{shard_1_logits}") - print(f"shard_1_tokens:\n{shard_1_tokens}") - - gc.collect() - torch.cuda.empty_cache() - - if shard_model_1.model.caches_are_enabled(): - shard_model_1.model.reset_caches() - - del shard_model_1.model - del shard_model_1 - - #time.sleep(10) - - shard_model_2 = ShardedLlamaModel(config, shard_2, llama_tokenizer, use_cache=True) - print(f"\nshard_model_2: {shard_model_2}") - load_model_weights_torchtune(cache_dir, shard_2, shard_model_2) - shard_2_hs, shard_2_logits = test_generation_2(shard_model_2, shard_1_tokens, shard_1_hs) - diff --git a/exo/inference/torch/tests/test_llama3_split.py b/exo/inference/torch/tests/test_llama3_split.py new file mode 100644 index 000000000..7bc0fe7c9 --- /dev/null +++ b/exo/inference/torch/tests/test_llama3_split.py @@ -0,0 +1,131 @@ +""" +Test of pytorch based llama3 model +""" + +from pathlib import Path +import torch +from huggingface_hub import snapshot_download + +import torchtune.generation as ttg +from torchtune.models import llama3 +from torchtune.data import Message + + +from exo.inference.torch.models.llama3 import ShardedLlamaModel +from exo.inference.shard import Shard + +from exo.inference.torch.models.llm_utils import ( + load_model_config, + load_model_weights_torchtune, +) + + +MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" +TEMP = 0.6 +TOP_K = 300 + +def test_generation_1(shard_model, prompt): + """ + Test the generation capabilities of the LlamaModel with sample text. + """ + # Tokenize input text + messages = [] + messages.extend([ + Message(role="system", content="You are a helpful and creative AI assistant."), + Message(role="user", content=prompt), + # Empty assistant message to kick-start generation + Message(role="assistant", content=""), + ]) + + print(f"last?: {shard_model.shard.is_last_layer()}") + tokenizer_out = llama_tokenizer({"messages": messages}, inference=True) + print(f"tokenizer_out: {tokenizer_out}") + tokens = torch.tensor(tokenizer_out["tokens"], dtype=torch.int) + + hidden_states, _ = shard_model.generate(tokens) + + if hidden_states is not None: + print(f"hidden_states[{len(hidden_states)}]: {hidden_states}") + + return hidden_states, tokens + + +def test_generation_2(shard_model, in_tokens, hidden_state): + print("Generate with the rest of layers") + hidden_states, logits = shard_model.generate( + tokens=in_tokens, + hidden_state=hidden_state + ) + + if hidden_states is not None: + print(f"hidden_states {hidden_states.shape}: {hidden_states}") + + if logits is not None: + print(f"logits: {logits.shape}\n{logits}") + + # rand_sample = torch.empty(( + # logits.size(0), + # shard_model.model.tok_embeddings.num_embeddings + # ), + # device=logits.device + # ).exponential_(1, generator=None) + + tokens = ttg.sample( + logits=logits[:, -1].clone(), + temperature=TEMP, + top_k=TOP_K, + # q=rand_sample + ) + + print(f"tokens: {tokens}") + + generated_tokens = tokens.clone() + generated_tokens = generated_tokens.tolist() + + print(f"generated_tokens: {generated_tokens}") + + print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(generated_tokens[0])}\n\n\n") + + +if __name__ == "__main__": + print("\nTesting generation:") + + prompt = "What is the capital of france? Say it in one word and nothing else, please." + + # Get the path to the model files from the Hugging Face cache + cache_dir = Path(snapshot_download(MODEL_NAME)) + + # Load model configuration + config = load_model_config(cache_dir / "config.json") + + # Setup shard + n_layers = int(config["num_layers"]) + s1_end = int(n_layers/2) + shard_1 = Shard( + model_id=MODEL_NAME, + start_layer=0, + end_layer=s1_end, + n_layers=n_layers + ) + + shard_2 = Shard( + model_id=MODEL_NAME, + start_layer=s1_end + 1, + end_layer=n_layers - 1, + n_layers=n_layers + ) + + # Initialize tokenizer + llama_tokenizer_path = f"{cache_dir}/original/tokenizer.model" + llama_tokenizer = llama3.llama3_tokenizer(path=llama_tokenizer_path) + + # Initialize LlamaModel with config and tokenizer + shard_model_1 = ShardedLlamaModel(config, shard_1, llama_tokenizer) + print(f"\nshard_model_1: {shard_model_1}") + load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) + shard_1_hs, shard_1_tokens = test_generation_1(shard_model_1, prompt) + + shard_model_2 = ShardedLlamaModel(config, shard_2, llama_tokenizer) + print(f"\nshard_model_2: {shard_model_2}") + load_model_weights_torchtune(cache_dir, shard_2, shard_model_2) + test_generation_2(shard_model_2, shard_1_tokens, shard_1_hs) From d958bf98d4a18556e8c330a31f21fb547ed20fef Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 17 Nov 2024 06:09:15 -0900 Subject: [PATCH 486/589] split model working, updates to safetensor loading letting shard control --- exo/inference/torch/models/llama3.py | 2 +- exo/inference/torch/models/llm_utils.py | 10 ++++------ exo/inference/torch/tests/test_llama3_full.py | 3 +-- exo/inference/torch/tests/test_llama3_split.py | 4 ++-- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index cb3456ebb..dfa0aad95 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -230,7 +230,7 @@ def __init__( self.device = device if device is not None else torch.device("cpu") self.use_cache = self.config.get("use_cache", False) self.model = LlamaModel(config, self.shard).to(dtype=self.dtype, device=self.device) - self.max_seq_len = max_seq_len if max_seq_len is not None else 4096 + self.max_seq_len = self.config["max_seq_len"] def generate(self, tokens: torch.Tensor, hidden_state: Optional[torch.Tensor] = None) -> Tuple[Optional[List[torch.Tensor]], Optional[torch.Tensor]]: """ diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index f1a60e100..68c9ec8a4 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -76,7 +76,7 @@ def check_weights(model, state_dict): model_state_dict = model.state_dict() for name, param in model_state_dict.items(): if name in state_dict: - print(f"\nchecking {name}\n") + # print(f"\nchecking {name}\n") loaded_param = state_dict[name] if param.shape != loaded_param.shape: print(f"Shape mismatch for {name}: expected {param.shape}, got {loaded_param.shape}") @@ -115,9 +115,7 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): paried_embed_weight = None for key, value in full_state_dict.items(): # load layer by shard - lnrgx = re.findall(r"model\.layers\.(\d+).*", key) - layer_num = int(lnrgx[0]) if len(lnrgx) > 0 else None - if layer_num in shard_layer_range: + for layer_num in range(shard.start_layer, shard.end_layer + 1): # change input layer norm to sa_norm for torchtune re_iln = re.findall(rf"model.layers\.{layer_num}\.(input_layernorm)\.weight", key) if len(re_iln) != 0: @@ -153,7 +151,7 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): remapped_state_dict[new_key] = value # saving embed for paired weights - elif key == "model.embed_tokens.weight": + if key == "model.embed_tokens.weight": paried_embed_weight = value # change name for torchtune # print("model.embed_tokens.weight == model.tok_embeddings.weight") @@ -180,7 +178,7 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): # if DEBUG >= 7: # print("\n--- checking weights ----\n") # print(f"\nremapped_state_dict: {remapped_state_dict.keys()}\n") - # check_weights(model, remapped_state_dict) + check_weights(model, remapped_state_dict) class MultiLayerPreceptron(nn.Module): diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index a981db776..dabdae219 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -23,7 +23,7 @@ MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" TEMP = 0.6 -TOP_K = 300 +TOP_K = 25 MAX_GEN_TOKENS = 50 def main(model, prompt: str, device: torch.device=torch.device("cpu")): @@ -107,7 +107,6 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" # Initialize tokenizer llama_tokenizer_path = f"{cache_dir}/original/tokenizer.model" llama_tokenizer = llama3.llama3_tokenizer(path=llama_tokenizer_path) - print(llama_tokenizer.stop_tokens) # Initialize LlamaModel with config and tokenizer # device = torch.device("cuda") diff --git a/exo/inference/torch/tests/test_llama3_split.py b/exo/inference/torch/tests/test_llama3_split.py index 7bc0fe7c9..0d5f69df2 100644 --- a/exo/inference/torch/tests/test_llama3_split.py +++ b/exo/inference/torch/tests/test_llama3_split.py @@ -22,7 +22,7 @@ MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" TEMP = 0.6 -TOP_K = 300 +TOP_K = 25 def test_generation_1(shard_model, prompt): """ @@ -90,7 +90,7 @@ def test_generation_2(shard_model, in_tokens, hidden_state): if __name__ == "__main__": print("\nTesting generation:") - prompt = "What is the capital of france? Say it in one word and nothing else, please." + prompt = "Say 'Hello'" # Get the path to the model files from the Hugging Face cache cache_dir = Path(snapshot_download(MODEL_NAME)) From c8bdb0971c4b8836715903af1b86704ce4bd3c6b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 17 Nov 2024 08:31:14 -0900 Subject: [PATCH 487/589] reduced model loading ram by loading only some layers in layer list, inference is still very high --- exo/inference/torch/models/llama3.py | 137 ++++++++++++------ exo/inference/torch/models/llm_utils.py | 13 +- exo/inference/torch/tests/test_llama3_full.py | 2 + .../torch/tests/test_llama3_split.py | 41 +++--- 4 files changed, 124 insertions(+), 69 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index dfa0aad95..9d2b96073 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -14,11 +14,7 @@ from torchtune.modules.attention_utils import _MaskType from exo.inference.shard import Shard -from exo.inference.torch.models.llm_utils import ( - MultiLayerPreceptron, - RMSNorm, - get_torch_dtype -) +from exo.inference.torch.models.llm_utils import MultiLayerPreceptron, RMSNorm, get_torch_dtype class ShardTransformerDecoder(ttm.TransformerDecoder): @@ -27,6 +23,7 @@ class ShardTransformerDecoder(ttm.TransformerDecoder): Custom version of torchtune TransformerDecoder to allow for sharding of models and passing of hidden layers between shards """ + def __init__( self, *, @@ -55,6 +52,44 @@ def __init__( self.shard = shard + def setup_caches( + self, + batch_size: int, + dtype: torch.dtype, + *, + encoder_max_seq_len: Optional[int] = None, + decoder_max_seq_len: Optional[int] = None, + ): + """ + modified version for shard + + assume just decoder layers + """ + if decoder_max_seq_len is not None: + self.decoder_max_cache_seq_len = decoder_max_seq_len + else: + self.decoder_max_cache_seq_len = self.max_seq_len + + for layer in self.layers: + if layer is not None: + layer.setup_caches( + batch_size, + dtype, + encoder_max_seq_len=self.encoder_max_cache_seq_len, + decoder_max_seq_len=self.decoder_max_cache_seq_len, + ) + + def caches_are_enabled(self) -> bool: + """ + modified version for shard + """ + if self.layers[0] is not None: + return self.layers[0].caches_are_enabled() + else: + for layer in self.layers: + if layer is not None: + return layer.caches_are_enabled() + def forward( self, tokens: torch.Tensor, @@ -90,18 +125,19 @@ def forward( print(f"\nhidden layer in H[{i}]\n{h}\nmask\n{mask}\ninput_pos\n{input_pos}\n{self.output_hidden_states}\n") # Process through each transformer layer - h = layer( - h, - mask=mask, - encoder_input=encoder_input, - encoder_mask=encoder_mask, - input_pos=input_pos, - ) + with torch.no_grad(): + h = layer( + h, + mask=mask, + encoder_input=encoder_input, + encoder_mask=encoder_mask, + input_pos=input_pos, + ) - if i in self.output_hidden_states: - hidden.append(h) + if i in self.output_hidden_states: + hidden.append(h) - print(f"\nhidden layer out H[{i}]->H[{i + 1}]\n{h}\n") + print(f"\nhidden layer out H[{i}]->H[{i + 1}]\n{h}\n") # Apply normalization h = self.norm(h) @@ -117,6 +153,7 @@ def forward( print(f"\n\noutput {output}\n\n") return output + def LlamaModel(config: dict, shard: Shard): """ LlamaModel using torchtune @@ -132,8 +169,10 @@ def LlamaModel(config: dict, shard: Shard): scale_factor=scale_factor, ) - layers = [] - for _ in range(shard.n_layers): + # hack to align sharded weights with layers + # fill unused layer positions with None + layers = [None for _ in range(shard.n_layers)] + for i in range(shard.start_layer, shard.end_layer + 1): self_attn = ttm.MultiHeadAttention( embed_dim=config["embed_dim"], num_heads=config["num_heads"], @@ -164,11 +203,7 @@ def LlamaModel(config: dict, shard: Shard): pos_embeddings=rope, ) - mlp = MultiLayerPreceptron( - config["embed_dim"], - config["intermediate_dim"], - config["hidden_act"] - ) + mlp = MultiLayerPreceptron(config["embed_dim"], config["intermediate_dim"], config["hidden_act"]) layer = ttm.TransformerSelfAttentionLayer( attn=self_attn, @@ -177,16 +212,18 @@ def LlamaModel(config: dict, shard: Shard): mlp_norm=RMSNorm(config["embed_dim"], eps=config["norm_eps"]), ) - layers.append(layer) - + layers[i] = layer + + for i in range(len(layers)): + print(f"layers[{i}]: {layers[i]}") layers = nn.ModuleList(layers) tok_embeddings = nn.Embedding(config["vocab_size"], config["embed_dim"]) - # output_proj = ttm.TiedLinear(tok_embeddings) - output_proj = nn.Linear( - config["embed_dim"], - config["vocab_size"], - bias=config["attn_bias"], - ) + output_proj = ttm.TiedLinear(tok_embeddings) + # output_proj = nn.Linear( + # config["embed_dim"], + # config["vocab_size"], + # bias=config["attn_bias"], + # ) return ShardTransformerDecoder( tok_embeddings=tok_embeddings, @@ -197,7 +234,7 @@ def LlamaModel(config: dict, shard: Shard): head_dim=config["head_dim"], norm=RMSNorm(config["embed_dim"], eps=config["norm_eps"]), output=output_proj, - num_layers=config["num_layers"] + num_layers=config["num_layers"], ) # return ttm.TransformerDecoder( @@ -214,13 +251,14 @@ def LlamaModel(config: dict, shard: Shard): class ShardedLlamaModel(nn.Module): def __init__( - self, - config: dict, - shard: Shard, - tokenizer: Any, - device: Optional[torch.device] = None, - max_seq_len: Optional[int] = None - ): + self, + config: dict, + shard: Shard, + tokenizer: Any, + device: Optional[torch.device] = None, + max_new_tokens: Optional[int] = 10, + use_cache: Optional[bool] = False + ): super(ShardedLlamaModel, self).__init__() self.tokenizer = tokenizer @@ -228,11 +266,19 @@ def __init__( self.config = config self.dtype = get_torch_dtype(self.config["torch_dtype"]) if "torch_dtype" in self.config else torch.float self.device = device if device is not None else torch.device("cpu") - self.use_cache = self.config.get("use_cache", False) - self.model = LlamaModel(config, self.shard).to(dtype=self.dtype, device=self.device) + self.use_cache = self.config.get("use_cache", False) if not use_cache else use_cache + + + self.max_new_tokens = max_new_tokens self.max_seq_len = self.config["max_seq_len"] - def generate(self, tokens: torch.Tensor, hidden_state: Optional[torch.Tensor] = None) -> Tuple[Optional[List[torch.Tensor]], Optional[torch.Tensor]]: + self.model = LlamaModel(config, self.shard).to(dtype=self.dtype, device=self.device) + + def generate( + self, + tokens: torch.Tensor, + hidden_state: Optional[torch.Tensor] = None + ) -> Tuple[Optional[List[torch.Tensor]], Optional[torch.Tensor]]: """ Generate logits and/or hidden_states from llama model @@ -241,6 +287,7 @@ def generate(self, tokens: torch.Tensor, hidden_state: Optional[torch.Tensor] = hidden_state (torch.Tensor, optional) - hidden state from last activated hidden layer, if any max_seq_len (int) - Max sequence length of generation, default 4096 """ + print(self.shard) print(self.shard.is_last_layer()) @@ -252,7 +299,11 @@ def generate(self, tokens: torch.Tensor, hidden_state: Optional[torch.Tensor] = # setup cache if not self.model.caches_are_enabled() and self.use_cache: with self.device: - self.model.setup_caches(bsz, self.dtype, decoder_max_seq_len=self.model.decoder_max_cache_seq_len) + self.model.setup_caches( + bsz, + self.dtype, + decoder_max_seq_len=tokens.numel() + self.max_new_tokens + ) if not self.shard.is_last_layer(): self.model.output_hidden_states = [self.shard.end_layer] @@ -282,7 +333,7 @@ def generate(self, tokens: torch.Tensor, hidden_state: Optional[torch.Tensor] = ).unsqueeze(0) input_pos = torch.arange(0, total_response_length, device=generated_tokens.device).unsqueeze(0) - + if self.model.caches_are_enabled(): curr_masks = masks[:, :tokens_length] else: diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 68c9ec8a4..9edd779aa 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -152,27 +152,28 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): # saving embed for paired weights if key == "model.embed_tokens.weight": - paried_embed_weight = value + # paried_embed_weight = value # change name for torchtune # print("model.embed_tokens.weight == model.tok_embeddings.weight") remapped_state_dict["model.tok_embeddings.weight"] = value - elif key == "lm_head.weight": - paried_lmhead = False + # elif key == "lm_head.weight": + # paried_lmhead = False # get everything else except layers, embed_tokens and lm_head if len(re.findall(r"model\.layers\..*", key)) == 0 and key != "model.embed_tokens.weight" and key != "lm_head.weight": # print(f"loading other weight: {key}") remapped_state_dict[key] = value - if paried_lmhead: + # if paried_lmhead: # print(f"model.output.weight: {paried_embed_weight}") - remapped_state_dict["model.output.weight"] = paried_embed_weight + # remapped_state_dict["model.output.weight"] = paried_embed_weight # print("\nRemapped state dict\n") # for rsdk in remapped_state_dict.keys(): # print(f"-- {rsdk}") - + del state_dict + del full_state_dict model.load_state_dict(remapped_state_dict, strict=False) # if DEBUG >= 7: diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index dabdae219..f8b931608 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -66,6 +66,7 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" print(f"tokens prompt: {prompt}") print(f"pad_id: {llama_tokenizer.pad_id}") + generated_tokens, _ = ttg.generate( model=model.model, prompt=prompt, @@ -75,6 +76,7 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" top_k=TOP_K, stop_tokens=llama_tokenizer.stop_tokens, ) + generated_tokens = generated_tokens[:, -MAX_GEN_TOKENS:].tolist() print(f"generated_tokens: {generated_tokens}") diff --git a/exo/inference/torch/tests/test_llama3_split.py b/exo/inference/torch/tests/test_llama3_split.py index 0d5f69df2..c7155e305 100644 --- a/exo/inference/torch/tests/test_llama3_split.py +++ b/exo/inference/torch/tests/test_llama3_split.py @@ -23,6 +23,8 @@ MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" TEMP = 0.6 TOP_K = 25 +MAX_NEW_TOKENS=10 + def test_generation_1(shard_model, prompt): """ @@ -52,10 +54,7 @@ def test_generation_1(shard_model, prompt): def test_generation_2(shard_model, in_tokens, hidden_state): print("Generate with the rest of layers") - hidden_states, logits = shard_model.generate( - tokens=in_tokens, - hidden_state=hidden_state - ) + hidden_states, logits = shard_model.generate(tokens=in_tokens, hidden_state=hidden_state) if hidden_states is not None: print(f"hidden_states {hidden_states.shape}: {hidden_states}") @@ -90,7 +89,7 @@ def test_generation_2(shard_model, in_tokens, hidden_state): if __name__ == "__main__": print("\nTesting generation:") - prompt = "Say 'Hello'" + prompt = "Hello, just say 'Hello' back nothing else" # Get the path to the model files from the Hugging Face cache cache_dir = Path(snapshot_download(MODEL_NAME)) @@ -100,32 +99,34 @@ def test_generation_2(shard_model, in_tokens, hidden_state): # Setup shard n_layers = int(config["num_layers"]) - s1_end = int(n_layers/2) - shard_1 = Shard( - model_id=MODEL_NAME, - start_layer=0, - end_layer=s1_end, - n_layers=n_layers - ) + s1_end = int(n_layers / 2) + shard_1 = Shard(model_id=MODEL_NAME, start_layer=0, end_layer=s1_end, n_layers=n_layers) - shard_2 = Shard( - model_id=MODEL_NAME, - start_layer=s1_end + 1, - end_layer=n_layers - 1, - n_layers=n_layers - ) + shard_2 = Shard(model_id=MODEL_NAME, start_layer=s1_end + 1, end_layer=n_layers - 1, n_layers=n_layers) # Initialize tokenizer llama_tokenizer_path = f"{cache_dir}/original/tokenizer.model" llama_tokenizer = llama3.llama3_tokenizer(path=llama_tokenizer_path) # Initialize LlamaModel with config and tokenizer - shard_model_1 = ShardedLlamaModel(config, shard_1, llama_tokenizer) + shard_model_1 = ShardedLlamaModel( + config, + shard_1, + llama_tokenizer, + None, + MAX_NEW_TOKENS + ) print(f"\nshard_model_1: {shard_model_1}") load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) shard_1_hs, shard_1_tokens = test_generation_1(shard_model_1, prompt) - shard_model_2 = ShardedLlamaModel(config, shard_2, llama_tokenizer) + shard_model_2 = ShardedLlamaModel( + config, + shard_2, + llama_tokenizer, + None, + MAX_NEW_TOKENS + ) print(f"\nshard_model_2: {shard_model_2}") load_model_weights_torchtune(cache_dir, shard_2, shard_model_2) test_generation_2(shard_model_2, shard_1_tokens, shard_1_hs) From 75817ebd9c36ee63ce59e8892f5dea1273291c1d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 17 Nov 2024 09:07:45 -0900 Subject: [PATCH 488/589] updating readme --- exo/inference/torch/README.md | 48 +++++++++++-------- exo/inference/torch/models/llama3.py | 11 +++-- exo/inference/torch/tests/test_llama3_full.py | 16 +++++-- .../torch/tests/test_llama3_split.py | 6 ++- 4 files changed, 49 insertions(+), 32 deletions(-) diff --git a/exo/inference/torch/README.md b/exo/inference/torch/README.md index 2ac5a7436..9d4e757dc 100644 --- a/exo/inference/torch/README.md +++ b/exo/inference/torch/README.md @@ -20,6 +20,9 @@ Working on removing transformers due to inference and VRAM usage [issues](https: ### 10/27/2024 Still working on llama3 model but wanted to note that a better KVCache needs to be investigated. +#### 11/17/2024 +Llama sharded model now working and next step is inference engine. Still testing on small llama 3.2 1B but will try larger models. + ## Tech Tested on @@ -58,29 +61,32 @@ WIP pytorch llama model ``` # Llama-3.2-1B-Instruct # -LlamaModel( - (embed): Embedding(128256, 2048) - (layers): ModuleList( - (0-15): 16 x LlamaBlock( - (self_attn): SDPAttention( - (q_proj): Linear(in_features=2048, out_features=2048, bias=False) - (k_proj): Linear(in_features=2048, out_features=512, bias=False) - (v_proj): Linear(in_features=2048, out_features=512, bias=False) - (o_proj): Linear(in_features=2048, out_features=2048, bias=False) - (rotary_emb): RotaryEmbedding() - ) - (mlp): MultiLayerPreceptron( - (gate_proj): Linear(in_features=2048, out_features=8192, bias=False) - (up_proj): Linear(in_features=2048, out_features=8192, bias=False) - (down_proj): Linear(in_features=8192, out_features=2048, bias=False) - (act_fn): SiLU() +ShardedLlamaModel( + (model): ShardTransformerDecoder( + (tok_embeddings): Embedding(128256, 2048) + (layers): ModuleList( + (0-15): 16 x TransformerSelfAttentionLayer( + (attn): MultiHeadAttention( + (q_proj): Linear(in_features=2048, out_features=2048, bias=False) + (k_proj): Linear(in_features=2048, out_features=512, bias=False) + (v_proj): Linear(in_features=2048, out_features=512, bias=False) + (output_proj): Linear(in_features=2048, out_features=2048, bias=False) + (pos_embeddings): Llama3ScaledRoPE() + ) + (mlp): MultiLayerPreceptron( + (gate_proj): Linear(in_features=2048, out_features=8192, bias=False) + (up_proj): Linear(in_features=2048, out_features=8192, bias=False) + (down_proj): Linear(in_features=8192, out_features=2048, bias=False) + (act_fn): SiLU() + ) + (sa_norm): RMSNorm() + (mlp_norm): RMSNorm() + (sa_scale): Identity() + (mlp_scale): Identity() ) - (input_layer_norm): RMSNorm() - (post_attention_norm): RMSNorm() ) + (norm): RMSNorm() ) - (norm): RMSNorm() - (rotary_pos_emb): RotaryEmbedding() - (lm_head): Linear(in_features=2048, out_features=128256, bias=False) ) + ``` diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 9d2b96073..7b0076cbc 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -70,6 +70,7 @@ def setup_caches( else: self.decoder_max_cache_seq_len = self.max_seq_len + lic = 0 for layer in self.layers: if layer is not None: layer.setup_caches( @@ -78,6 +79,9 @@ def setup_caches( encoder_max_seq_len=self.encoder_max_cache_seq_len, decoder_max_seq_len=self.decoder_max_cache_seq_len, ) + + print(f"Setup cache for layer {lic}") + lic+=1 def caches_are_enabled(self) -> bool: """ @@ -266,7 +270,7 @@ def __init__( self.config = config self.dtype = get_torch_dtype(self.config["torch_dtype"]) if "torch_dtype" in self.config else torch.float self.device = device if device is not None else torch.device("cpu") - self.use_cache = self.config.get("use_cache", False) if not use_cache else use_cache + self.use_cache = use_cache if use_cache else self.config.get("use_cache", False) self.max_new_tokens = max_new_tokens @@ -287,10 +291,6 @@ def generate( hidden_state (torch.Tensor, optional) - hidden state from last activated hidden layer, if any max_seq_len (int) - Max sequence length of generation, default 4096 """ - - print(self.shard) - print(self.shard.is_last_layer()) - if tokens.ndim == 1: tokens = tokens.view(1, -1) @@ -299,6 +299,7 @@ def generate( # setup cache if not self.model.caches_are_enabled() and self.use_cache: with self.device: + print("\n\nSETTING UP CACHES\n\n") self.model.setup_caches( bsz, self.dtype, diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index f8b931608..3a2afcdb8 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -24,7 +24,7 @@ MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" TEMP = 0.6 TOP_K = 25 -MAX_GEN_TOKENS = 50 +MAX_NEW_TOKENS = 10 def main(model, prompt: str, device: torch.device=torch.device("cpu")): # Tokenize input text @@ -70,14 +70,14 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" generated_tokens, _ = ttg.generate( model=model.model, prompt=prompt, - max_generated_tokens=MAX_GEN_TOKENS, + max_generated_tokens=MAX_NEW_TOKENS, pad_id=llama_tokenizer.pad_id, temperature=TEMP, top_k=TOP_K, stop_tokens=llama_tokenizer.stop_tokens, ) - generated_tokens = generated_tokens[:, -MAX_GEN_TOKENS:].tolist() + generated_tokens = generated_tokens[:, -MAX_NEW_TOKENS:].tolist() print(f"generated_tokens: {generated_tokens}") @@ -113,8 +113,16 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" # Initialize LlamaModel with config and tokenizer # device = torch.device("cuda") device = None - shard_model_1 = ShardedLlamaModel(config, shard_1, llama_tokenizer, device=device) + shard_model_1 = ShardedLlamaModel( + config, + shard_1, + llama_tokenizer, + device, + MAX_NEW_TOKENS, + use_cache=True + ) print(f"\nshard_model_1: {shard_model_1}") + exit() load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) diff --git a/exo/inference/torch/tests/test_llama3_split.py b/exo/inference/torch/tests/test_llama3_split.py index c7155e305..682727654 100644 --- a/exo/inference/torch/tests/test_llama3_split.py +++ b/exo/inference/torch/tests/test_llama3_split.py @@ -114,7 +114,8 @@ def test_generation_2(shard_model, in_tokens, hidden_state): shard_1, llama_tokenizer, None, - MAX_NEW_TOKENS + MAX_NEW_TOKENS, + use_cache=True ) print(f"\nshard_model_1: {shard_model_1}") load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) @@ -125,7 +126,8 @@ def test_generation_2(shard_model, in_tokens, hidden_state): shard_2, llama_tokenizer, None, - MAX_NEW_TOKENS + MAX_NEW_TOKENS, + use_cache=True ) print(f"\nshard_model_2: {shard_model_2}") load_model_weights_torchtune(cache_dir, shard_2, shard_model_2) From 73630d1ef92d64c843c40413e483cf1c8fb5a0ed Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Mon, 18 Nov 2024 11:50:58 -0900 Subject: [PATCH 489/589] building out torch inference engine --- exo/inference/torch/models/llama3.py | 8 +-- exo/inference/torch/pt_inference.py | 62 +++++++++++++++++++ exo/inference/torch/tests/test_llama3_full.py | 1 - 3 files changed, 63 insertions(+), 8 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 7b0076cbc..5356218f2 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -70,7 +70,6 @@ def setup_caches( else: self.decoder_max_cache_seq_len = self.max_seq_len - lic = 0 for layer in self.layers: if layer is not None: layer.setup_caches( @@ -79,9 +78,6 @@ def setup_caches( encoder_max_seq_len=self.encoder_max_cache_seq_len, decoder_max_seq_len=self.decoder_max_cache_seq_len, ) - - print(f"Setup cache for layer {lic}") - lic+=1 def caches_are_enabled(self) -> bool: """ @@ -287,9 +283,8 @@ def generate( Generate logits and/or hidden_states from llama model Args - tokens (torch.Tensor) - tokens from prompt tokenization + tokens (torch.Tensor) - tokens from prompt tokenization and generation hidden_state (torch.Tensor, optional) - hidden state from last activated hidden layer, if any - max_seq_len (int) - Max sequence length of generation, default 4096 """ if tokens.ndim == 1: tokens = tokens.view(1, -1) @@ -299,7 +294,6 @@ def generate( # setup cache if not self.model.caches_are_enabled() and self.use_cache: with self.device: - print("\n\nSETTING UP CACHES\n\n") self.model.setup_caches( bsz, self.dtype, diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index 7b8e7bba3..e88ec0602 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -2,4 +2,66 @@ TorchDynamicShardInferenceEngine Sharded inference engine using PyTorch based torchtune models """ +import os +import asyncio +import torch + +from torchtune.models import llama3 + +from exo.inference.inference_engine import InferenceEngine +from exo.download.hf.hf_shard_download import HFShardDownloader +from exo.inference.shard import Shard +from exo.inference.torch.models.llm_utils import ( + load_model_config, + load_model_weights_torchtune, +) + +# supported models +from exo.inference.torch.models.llama3 import ShardedLlamaModel + +TEMP = 0.6 +TOP_K = 25 + +class TorchDynamicShardInferenceEngine(InferenceEngine): + def __init__(self, shard_downloader: HFShardDownloader, model_id: str="llama"): + self.shard = None + self.shard_downloader = shard_downloader + self.model_id = model_id + self.supported_models = ["llama"] + + # device settings + if os.environ.get("TORCH_DEVICE"): + self.device = torch.device(os.environ["TORCH_DEVICE"]) + elif torch.cuda.is_available(): + self.device = torch.device("cuda") + elif torch.backends.mps.is_available() and torch.backends.mps.is_built(): + self.device = torch.device("mps") + else: + self.device = torch.device("cpu") + + async def ensure_shard(self, shard: Shard): + if self.shard == shard: + return + + # download model safetensors and shard + model_path = await self.shard_downloader.ensure_shard(shard) + model_config = load_model_config(model_path / "config.json") + + self.tokenizer = llama3.llama3_tokenizer( + path=f"{model_path}/original/tokenizer.model" + ) + + if self.model_id not in self.supported_models: + raise ValueError( + f"Model {self.model_id} not supported, only supported models are\n{self.supported_models}" + ) + + self.sharded_model = ShardedLlamaModel( + model_config, + shard, + self.tokenizer, + self.device, + None, + use_cache=True + ) diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index 3a2afcdb8..7ffb4dce4 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -122,7 +122,6 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" use_cache=True ) print(f"\nshard_model_1: {shard_model_1}") - exit() load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) From ad993324f615a22941f0d764a0a87c8af54e53eb Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 23 Nov 2024 10:44:05 -0900 Subject: [PATCH 490/589] creating torch inference engine, separated torch and hf torch engines, adding hf engine tests --- exo/inference/torch/hf_inference.py | 10 ++- exo/inference/torch/models/llama3.py | 61 +++++++++++++---- exo/inference/torch/pt_inference.py | 67 ++++++++++++++++++- ..._engine.py => test_hf_inference_engine.py} | 0 .../torch/tests/test_pt_inference_engine.py | 53 +++++++++++++++ 5 files changed, 169 insertions(+), 22 deletions(-) rename exo/inference/torch/tests/{test_inference_engine.py => test_hf_inference_engine.py} (100%) create mode 100644 exo/inference/torch/tests/test_pt_inference_engine.py diff --git a/exo/inference/torch/hf_inference.py b/exo/inference/torch/hf_inference.py index 1b4f19e00..4912a0a2e 100644 --- a/exo/inference/torch/hf_inference.py +++ b/exo/inference/torch/hf_inference.py @@ -1,4 +1,7 @@ -# experimental, based off of tinygrad/inference.py +""" +HFDynamicShardInferenceEngine +Sharded inference engine using PyTorch based HuggingFace transformers +""" import asyncio import os import json @@ -26,11 +29,6 @@ TOP_P = 0.9 class HFDynamicShardInferenceEngine(InferenceEngine): - """ - HuggingFace Dynamic Shard Inference Engine - Performing model inference with sharded Pytorch based HuggingFace models. - """ - def __init__(self, shard_downloader: HFShardDownloader): """ Initialize the inference engine. diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 5356218f2..feef0baa8 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -78,7 +78,7 @@ def setup_caches( encoder_max_seq_len=self.encoder_max_cache_seq_len, decoder_max_seq_len=self.decoder_max_cache_seq_len, ) - + def caches_are_enabled(self) -> bool: """ modified version for shard @@ -89,7 +89,7 @@ def caches_are_enabled(self) -> bool: for layer in self.layers: if layer is not None: return layer.caches_are_enabled() - + def forward( self, tokens: torch.Tensor, @@ -159,6 +159,7 @@ def LlamaModel(config: dict, shard: Shard): LlamaModel using torchtune """ # rope scaling config + scale_factor = 32 if config["rope_scaling"] is not None: scale_factor = config["rope_scaling"].get("factor", 32) @@ -214,8 +215,8 @@ def LlamaModel(config: dict, shard: Shard): layers[i] = layer - for i in range(len(layers)): - print(f"layers[{i}]: {layers[i]}") + #for i in range(len(layers)): + # print(f"layers[{i}]: {layers[i]}") layers = nn.ModuleList(layers) tok_embeddings = nn.Embedding(config["vocab_size"], config["embed_dim"]) output_proj = ttm.TiedLinear(tok_embeddings) @@ -251,12 +252,12 @@ def LlamaModel(config: dict, shard: Shard): class ShardedLlamaModel(nn.Module): def __init__( - self, - config: dict, - shard: Shard, - tokenizer: Any, + self, + config: dict, + shard: Shard, + tokenizer: Any, device: Optional[torch.device] = None, - max_new_tokens: Optional[int] = 10, + max_new_tokens: int = 2048, use_cache: Optional[bool] = False ): super(ShardedLlamaModel, self).__init__() @@ -266,19 +267,23 @@ def __init__( self.config = config self.dtype = get_torch_dtype(self.config["torch_dtype"]) if "torch_dtype" in self.config else torch.float self.device = device if device is not None else torch.device("cpu") - self.use_cache = use_cache if use_cache else self.config.get("use_cache", False) - - self.max_new_tokens = max_new_tokens self.max_seq_len = self.config["max_seq_len"] + if use_cache: + self.use_cache = use_cache + else: + self.config.get("use_cache", False) + self.model = LlamaModel(config, self.shard).to(dtype=self.dtype, device=self.device) + print(f"model loaded: {self.model}\n") + def generate( self, tokens: torch.Tensor, hidden_state: Optional[torch.Tensor] = None - ) -> Tuple[Optional[List[torch.Tensor]], Optional[torch.Tensor]]: + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], bool]: """ Generate logits and/or hidden_states from llama model @@ -292,6 +297,7 @@ def generate( bsz, tokens_length = tokens.size() # setup cache + print(self.model) if not self.model.caches_are_enabled() and self.use_cache: with self.device: self.model.setup_caches( @@ -351,6 +357,26 @@ def generate( print(f"\nmodel_output: {model_output}") + # stop token + stop_tokens = None + + stop_token_reached = torch.zeros( + bsz, + dtype=torch.bool, + device=tokens.device + ) + stop_tokens = ( + torch.tensor( + stop_tokens, + device=tokens.device, + dtype=tokens.dtype + ) + if stop_tokens + else None + ) + + finished = False + if isinstance(model_output, list): model_logits = model_output[1] model_output.pop() # remove logits @@ -359,4 +385,11 @@ def generate( model_logits = model_output model_hs = None - return model_hs, model_logits + if stop_tokens is not None: + stop_token_reached = ttg._generation.update_stop_tokens_tracker( + tokens, stop_tokens, stop_token_reached + ) + + finished = True if stop_token_reached.all() else False + + return model_hs, model_logits, finished diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index e88ec0602..b5a1c8fd6 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -3,7 +3,11 @@ Sharded inference engine using PyTorch based torchtune models """ import os +from typing import Optional, Tuple, Union, List +import functools +from concurrent.futures import ThreadPoolExecutor +import numpy as np import asyncio import torch @@ -12,6 +16,7 @@ from exo.inference.inference_engine import InferenceEngine from exo.download.hf.hf_shard_download import HFShardDownloader from exo.inference.shard import Shard +from exo.helpers import DEBUG from exo.inference.torch.models.llm_utils import ( load_model_config, load_model_weights_torchtune, @@ -40,6 +45,57 @@ def __init__(self, shard_downloader: HFShardDownloader, model_id: str="llama"): else: self.device = torch.device("cpu") + async def infer_prompt( + self, + request_id: str, + shard: Shard, + prompt: str, + image_str: Optional[str] = None, + inference_state: Optional[str] = None + ) -> Tuple[np.ndarray, str, bool]: + if DEBUG >= 4: + print("infer_prompt called") + print(f"prompt: {prompt}") + print(f"shard: {shard}") + print(f"inference_state: {inference_state}") + # ensure shard + await self.ensure_shard(shard) + + # tokenize + tokens = torch.tensor( + self.tokenizer.encode(prompt, add_bos=True, add_eos=True), + dtype=torch.int + ) + hidden_states = None + + # generate + loop = asyncio.get_running_loop() + with ThreadPoolExecutor() as pool: + hidden_states, logits, finished = await loop.run_in_executor( + pool, + functools.partial( + self.sharded_model.generate, + tokens=tokens + ) + ) + + if hidden_states is not None: + return hidden_states.numpy(force=True), "", finished + else: + return logits.numpy(force=True), "", finished + + async def infer_tensor( + self, + request_id: str, + shard: Shard, + input_data: np.ndarray, + inference_state: Optional[str] = None + ) -> Tuple[np.ndarray, str, bool]: + # ensure shard + await self.ensure_shard(shard) + + return np.empty((1,1)), "", False + async def ensure_shard(self, shard: Shard): if self.shard == shard: return @@ -58,10 +114,17 @@ async def ensure_shard(self, shard: Shard): ) self.sharded_model = ShardedLlamaModel( - model_config, - shard, + model_config, + shard, self.tokenizer, self.device, None, use_cache=True ) + + # load sharded weights + load_model_weights_torchtune( + model_path, + shard, + self.sharded_model + ) diff --git a/exo/inference/torch/tests/test_inference_engine.py b/exo/inference/torch/tests/test_hf_inference_engine.py similarity index 100% rename from exo/inference/torch/tests/test_inference_engine.py rename to exo/inference/torch/tests/test_hf_inference_engine.py diff --git a/exo/inference/torch/tests/test_pt_inference_engine.py b/exo/inference/torch/tests/test_pt_inference_engine.py new file mode 100644 index 000000000..e430989ad --- /dev/null +++ b/exo/inference/torch/tests/test_pt_inference_engine.py @@ -0,0 +1,53 @@ +""" +Test inference engine and model sharding +""" +import time +import asyncio + +from exo.inference.shard import Shard +from exo.inference.torch.pt_inference import TorchDynamicShardInferenceEngine +from exo.download.hf.hf_shard_download import HFShardDownloader +from exo.inference.inference_engine import InferenceEngine + +import numpy as np + +async def test_inference_engine( + inference_engine_1: InferenceEngine, + inference_engine_2: InferenceEngine, + model_id: str, + n_layers: int): + + prompt = "In a single word only, what is the last name of the current president of the USA?" + + shard = Shard( + model_id=model_id, + start_layer=0, + end_layer=0, + n_layers=n_layers + ) + + resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( + "A", + shard=shard, + prompt=prompt + ) + + print("\n------------resp_full---------------\n") + print(resp_full) + print("\n------------resp_full---------------\n") + + time.sleep(5) + +if __name__ == '__main__': + try: + print("\n\n -------- TEST meta-llama/Llama-3.2-1B-Instruct -------- \n\n") + asyncio.run(test_inference_engine( + TorchDynamicShardInferenceEngine(HFShardDownloader()), + TorchDynamicShardInferenceEngine(HFShardDownloader()), + "meta-llama/Llama-3.2-1B-Instruct", + 16 + )) + except Exception as err: + print(f"\n!!!! LLAMA TEST FAILED \n{err}\n") + + From 6ab6f1c504ce46fec812ff283078ac7fc971e78d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 23 Nov 2024 11:08:59 -0900 Subject: [PATCH 491/589] merge --- build/lib/exo/__init__.py | 1 + build/lib/exo/api/__init__.py | 1 + build/lib/exo/api/chatgpt_api.py | 358 +++++++++++ build/lib/exo/download/__init__.py | 0 build/lib/exo/download/download_progress.py | 61 ++ build/lib/exo/download/hf/__init__.py | 0 build/lib/exo/download/hf/hf_helpers.py | 403 ++++++++++++ .../lib/exo/download/hf/hf_shard_download.py | 77 +++ build/lib/exo/download/shard_download.py | 26 + build/lib/exo/helpers.py | 234 +++++++ build/lib/exo/inference/__init__.py | 0 .../exo/inference/debug_inference_engine.py | 59 ++ build/lib/exo/inference/inference_engine.py | 34 + build/lib/exo/inference/mlx/__init__.py | 0 .../lib/exo/inference/mlx/models/__init__.py | 0 build/lib/exo/inference/mlx/models/base.py | 9 + .../exo/inference/mlx/models/deepseek_v2.py | 127 ++++ build/lib/exo/inference/mlx/models/llama.py | 125 ++++ build/lib/exo/inference/mlx/models/llava.py | 585 ++++++++++++++++++ .../inference/mlx/sharded_inference_engine.py | 40 ++ build/lib/exo/inference/mlx/sharded_model.py | 86 +++ build/lib/exo/inference/mlx/sharded_utils.py | 207 +++++++ .../exo/inference/mlx/test_sharded_llama.py | 40 ++ .../exo/inference/mlx/test_sharded_llava.py | 64 ++ .../exo/inference/mlx/test_sharded_model.py | 52 ++ build/lib/exo/inference/pytorch/__init__.py | 0 build/lib/exo/inference/pytorch/helpers.py | 24 + build/lib/exo/inference/pytorch/inference.py | 211 +++++++ .../exo/inference/pytorch/model/__init__.py | 0 build/lib/exo/inference/pytorch/model/hf.py | 155 +++++ .../lib/exo/inference/pytorch/model/utils.py | 83 +++ .../pytorch/test_inference_engine.py | 141 +++++ build/lib/exo/inference/shard.py | 39 ++ .../exo/inference/test_inference_engine.py | 64 ++ build/lib/exo/inference/tokenizers.py | 45 ++ build/lib/exo/models.py | 44 ++ build/lib/exo/networking/__init__.py | 5 + build/lib/exo/networking/discovery.py | 17 + build/lib/exo/networking/grpc/__init__.py | 0 .../lib/exo/networking/grpc/grpc_discovery.py | 188 ++++++ .../exo/networking/grpc/grpc_peer_handle.py | 109 ++++ build/lib/exo/networking/grpc/grpc_server.py | 118 ++++ .../exo/networking/grpc/node_service_pb2.py | 61 ++ .../networking/grpc/node_service_pb2_grpc.py | 272 ++++++++ .../networking/grpc/test_grpc_discovery.py | 22 + build/lib/exo/networking/peer_handle.py | 48 ++ build/lib/exo/networking/server.py | 11 + build/lib/exo/orchestration/__init__.py | 4 + build/lib/exo/orchestration/node.py | 47 ++ build/lib/exo/orchestration/standard_node.py | 385 ++++++++++++ build/lib/exo/orchestration/test_node.py | 57 ++ build/lib/exo/stats/__init__.py | 0 build/lib/exo/stats/metrics.py | 29 + build/lib/exo/test_callbacks.py | 50 ++ build/lib/exo/topology/__init__.py | 0 build/lib/exo/topology/device_capabilities.py | 207 +++++++ .../lib/exo/topology/partitioning_strategy.py | 40 ++ ...g_memory_weighted_partitioning_strategy.py | 18 + .../exo/topology/test_device_capabilities.py | 91 +++ build/lib/exo/topology/test_map_partitions.py | 81 +++ ...g_memory_weighted_partitioning_strategy.py | 90 +++ build/lib/exo/topology/topology.py | 49 ++ build/lib/exo/viz/__init__.py | 0 build/lib/exo/viz/test_topology_viz.py | 129 ++++ build/lib/exo/viz/topology_viz.py | 307 +++++++++ 65 files changed, 5830 insertions(+) create mode 100644 build/lib/exo/__init__.py create mode 100644 build/lib/exo/api/__init__.py create mode 100644 build/lib/exo/api/chatgpt_api.py create mode 100644 build/lib/exo/download/__init__.py create mode 100644 build/lib/exo/download/download_progress.py create mode 100644 build/lib/exo/download/hf/__init__.py create mode 100644 build/lib/exo/download/hf/hf_helpers.py create mode 100644 build/lib/exo/download/hf/hf_shard_download.py create mode 100644 build/lib/exo/download/shard_download.py create mode 100644 build/lib/exo/helpers.py create mode 100644 build/lib/exo/inference/__init__.py create mode 100644 build/lib/exo/inference/debug_inference_engine.py create mode 100644 build/lib/exo/inference/inference_engine.py create mode 100644 build/lib/exo/inference/mlx/__init__.py create mode 100644 build/lib/exo/inference/mlx/models/__init__.py create mode 100644 build/lib/exo/inference/mlx/models/base.py create mode 100644 build/lib/exo/inference/mlx/models/deepseek_v2.py create mode 100644 build/lib/exo/inference/mlx/models/llama.py create mode 100644 build/lib/exo/inference/mlx/models/llava.py create mode 100644 build/lib/exo/inference/mlx/sharded_inference_engine.py create mode 100644 build/lib/exo/inference/mlx/sharded_model.py create mode 100644 build/lib/exo/inference/mlx/sharded_utils.py create mode 100644 build/lib/exo/inference/mlx/test_sharded_llama.py create mode 100644 build/lib/exo/inference/mlx/test_sharded_llava.py create mode 100644 build/lib/exo/inference/mlx/test_sharded_model.py create mode 100644 build/lib/exo/inference/pytorch/__init__.py create mode 100644 build/lib/exo/inference/pytorch/helpers.py create mode 100644 build/lib/exo/inference/pytorch/inference.py create mode 100644 build/lib/exo/inference/pytorch/model/__init__.py create mode 100644 build/lib/exo/inference/pytorch/model/hf.py create mode 100644 build/lib/exo/inference/pytorch/model/utils.py create mode 100644 build/lib/exo/inference/pytorch/test_inference_engine.py create mode 100644 build/lib/exo/inference/shard.py create mode 100644 build/lib/exo/inference/test_inference_engine.py create mode 100644 build/lib/exo/inference/tokenizers.py create mode 100644 build/lib/exo/models.py create mode 100644 build/lib/exo/networking/__init__.py create mode 100644 build/lib/exo/networking/discovery.py create mode 100644 build/lib/exo/networking/grpc/__init__.py create mode 100644 build/lib/exo/networking/grpc/grpc_discovery.py create mode 100644 build/lib/exo/networking/grpc/grpc_peer_handle.py create mode 100644 build/lib/exo/networking/grpc/grpc_server.py create mode 100644 build/lib/exo/networking/grpc/node_service_pb2.py create mode 100644 build/lib/exo/networking/grpc/node_service_pb2_grpc.py create mode 100644 build/lib/exo/networking/grpc/test_grpc_discovery.py create mode 100644 build/lib/exo/networking/peer_handle.py create mode 100644 build/lib/exo/networking/server.py create mode 100644 build/lib/exo/orchestration/__init__.py create mode 100644 build/lib/exo/orchestration/node.py create mode 100644 build/lib/exo/orchestration/standard_node.py create mode 100644 build/lib/exo/orchestration/test_node.py create mode 100644 build/lib/exo/stats/__init__.py create mode 100644 build/lib/exo/stats/metrics.py create mode 100644 build/lib/exo/test_callbacks.py create mode 100644 build/lib/exo/topology/__init__.py create mode 100644 build/lib/exo/topology/device_capabilities.py create mode 100644 build/lib/exo/topology/partitioning_strategy.py create mode 100644 build/lib/exo/topology/ring_memory_weighted_partitioning_strategy.py create mode 100644 build/lib/exo/topology/test_device_capabilities.py create mode 100644 build/lib/exo/topology/test_map_partitions.py create mode 100644 build/lib/exo/topology/test_ring_memory_weighted_partitioning_strategy.py create mode 100644 build/lib/exo/topology/topology.py create mode 100644 build/lib/exo/viz/__init__.py create mode 100644 build/lib/exo/viz/test_topology_viz.py create mode 100644 build/lib/exo/viz/topology_viz.py diff --git a/build/lib/exo/__init__.py b/build/lib/exo/__init__.py new file mode 100644 index 000000000..e802d331b --- /dev/null +++ b/build/lib/exo/__init__.py @@ -0,0 +1 @@ +from exo.helpers import DEBUG as DEBUG, DEBUG_DISCOVERY as DEBUG_DISCOVERY, VERSION as VERSION diff --git a/build/lib/exo/api/__init__.py b/build/lib/exo/api/__init__.py new file mode 100644 index 000000000..660e75078 --- /dev/null +++ b/build/lib/exo/api/__init__.py @@ -0,0 +1 @@ +from exo.api.chatgpt_api import ChatGPTAPI as ChatGPTAPI diff --git a/build/lib/exo/api/chatgpt_api.py b/build/lib/exo/api/chatgpt_api.py new file mode 100644 index 000000000..1abda85fe --- /dev/null +++ b/build/lib/exo/api/chatgpt_api.py @@ -0,0 +1,358 @@ +import uuid +import time +import asyncio +import json +from pathlib import Path +from transformers import AutoTokenizer +from typing import List, Literal, Union, Dict +from aiohttp import web +import aiohttp_cors +import traceback +from exo import DEBUG, VERSION +from exo.helpers import PrefixDict +from exo.inference.shard import Shard +from exo.inference.tokenizers import resolve_tokenizer +from exo.orchestration import Node +from exo.models import model_base_shards +from typing import Callable + +class Message: + def __init__(self, role: str, content: Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]): + self.role = role + self.content = content + + def to_dict(self): + return {"role": self.role, "content": self.content} + + +class ChatCompletionRequest: + def __init__(self, model: str, messages: List[Message], temperature: float): + self.model = model + self.messages = messages + self.temperature = temperature + + def to_dict(self): + return {"model": self.model, "messages": [message.to_dict() for message in self.messages], "temperature": self.temperature} + + +def generate_completion( + chat_request: ChatCompletionRequest, + tokenizer, + prompt: str, + request_id: str, + tokens: List[int], + stream: bool, + finish_reason: Union[Literal["length", "stop"], None], + object_type: Literal["chat.completion", "text_completion"], +) -> dict: + completion = { + "id": f"chatcmpl-{request_id}", + "object": object_type, + "created": int(time.time()), + "model": chat_request.model, + "system_fingerprint": f"exo_{VERSION}", + "choices": [{ + "index": 0, + "message": {"role": "assistant", "content": tokenizer.decode(tokens)}, + "logprobs": None, + "finish_reason": finish_reason, + }], + } + + if not stream: + completion["usage"] = { + "prompt_tokens": len(tokenizer.encode(prompt)), + "completion_tokens": len(tokens), + "total_tokens": len(tokenizer.encode(prompt)) + len(tokens), + } + + choice = completion["choices"][0] + if object_type.startswith("chat.completion"): + key_name = "delta" if stream else "message" + choice[key_name] = {"role": "assistant", "content": tokenizer.decode(tokens)} + elif object_type == "text_completion": + choice["text"] = tokenizer.decode(tokens) + else: + ValueError(f"Unsupported response type: {object_type}") + + return completion + + +def remap_messages(messages: List[Message]) -> List[Message]: + remapped_messages = [] + last_image = None + for message in messages: + if not isinstance(message.content, list): + remapped_messages.append(message) + continue + + remapped_content = [] + for content in message.content: + if isinstance(content, dict): + if content.get("type") in ["image_url", "image"]: + image_url = content.get("image_url", {}).get("url") or content.get("image") + if image_url: + last_image = {"type": "image", "image": image_url} + remapped_content.append({"type": "text", "text": "[An image was uploaded but is not displayed here]"}) + else: + remapped_content.append(content) + else: + remapped_content.append(content) + remapped_messages.append(Message(role=message.role, content=remapped_content)) + + if last_image: + # Replace the last image placeholder with the actual image content + for message in reversed(remapped_messages): + for i, content in enumerate(message.content): + if isinstance(content, dict): + if content.get("type") == "text" and content.get("text") == "[An image was uploaded but is not displayed here]": + message.content[i] = last_image + return remapped_messages + + return remapped_messages + + +def build_prompt(tokenizer, _messages: List[Message]): + if len(_messages) == 1: + user_msg = _messages[0] + + # get instruct sys message + sys_msg = Message(role="system", content="You are a helpful assistant.") + + # restructure for sys_msg to go first + _messages = [sys_msg, user_msg] + + messages = remap_messages(_messages) + prompt = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) + + if DEBUG >= 3: + print(f"prompt: {str(prompt)}") + for msg in messages: + print(f"chat role: {msg.role}\ncontent: {msg.content}") + + image_str = None + for message in messages: + if not isinstance(message.content, list): + continue + + for content in message.content: + # note: we only support one image at a time right now. Multiple is possible. See: https://github.com/huggingface/transformers/blob/e68ec18ce224af879f22d904c7505a765fb77de3/docs/source/en/model_doc/llava.md?plain=1#L41 + # follows the convention in https://platform.openai.com/docs/guides/vision + if isinstance(content, dict) and content.get("type", None) == "image": + image_str = content.get("image", None) + break + + return prompt, image_str + + +def parse_message(data: dict): + if "role" not in data or "content" not in data: + raise ValueError(f"Invalid message: {data}. Must have 'role' and 'content'") + return Message(data["role"], data["content"]) + + +def parse_chat_request(data: dict): + return ChatCompletionRequest( + data.get("model", "llama-3.1-8b"), + [parse_message(msg) for msg in data["messages"]], + data.get("temperature", 0.0), + ) + + +class PromptSession: + def __init__(self, request_id: str, timestamp: int, prompt: str): + self.request_id = request_id + self.timestamp = timestamp + self.prompt = prompt + + +class ChatGPTAPI: + def __init__(self, node: Node, inference_engine_classname: str, response_timeout_secs: int = 90, on_chat_completion_request: Callable[[str, ChatCompletionRequest, str], None] = None): + self.node = node + self.inference_engine_classname = inference_engine_classname + self.response_timeout_secs = response_timeout_secs + self.on_chat_completion_request = on_chat_completion_request + self.app = web.Application(client_max_size=100*1024*1024) # 100MB to support image upload + self.prompts: PrefixDict[str, PromptSession] = PrefixDict() + self.prev_token_lens: Dict[str, int] = {} + self.stream_tasks: Dict[str, asyncio.Task] = {} + cors = aiohttp_cors.setup(self.app) + cors_options = aiohttp_cors.ResourceOptions( + allow_credentials=True, + expose_headers="*", + allow_headers="*", + allow_methods="*", + ) + cors.add(self.app.router.add_get("/models", self.handle_get_models), {"*": cors_options}) + cors.add(self.app.router.add_get("/v1/models", self.handle_get_models), {"*": cors_options}) + cors.add(self.app.router.add_post("/chat/token/encode", self.handle_post_chat_token_encode), {"*": cors_options}) + cors.add(self.app.router.add_post("/v1/chat/token/encode", self.handle_post_chat_token_encode), {"*": cors_options}) + cors.add(self.app.router.add_post("/chat/completions", self.handle_post_chat_completions), {"*": cors_options}) + cors.add(self.app.router.add_post("/v1/chat/completions", self.handle_post_chat_completions), {"*": cors_options}) + + self.static_dir = Path(__file__).parent.parent.parent/"tinychat/examples/tinychat" + self.app.router.add_get("/", self.handle_root) + self.app.router.add_static("/", self.static_dir, name="static") + + # Add middleware to log every request + self.app.middlewares.append(self.log_request) + + async def log_request(self, app, handler): + async def middleware(request): + if DEBUG >= 2: print(f"Received request: {request.method} {request.path}") + return await handler(request) + + return middleware + + async def handle_root(self, request): + return web.FileResponse(self.static_dir/"index.html") + + async def handle_get_models(self, request): + return web.json_response([{"id": model_name, "object": "model", "owned_by": "exo", "ready": True } for model_name, _ in model_base_shards.items()]) + + async def handle_post_chat_token_encode(self, request): + data = await request.json() + shard = model_base_shards.get(data.get("model", "llama-3.1-8b"), {}).get(self.inference_engine_classname) + messages = [parse_message(msg) for msg in data.get("messages", [])] + tokenizer = await resolve_tokenizer(shard.model_id) + return web.json_response({"length": len(build_prompt(tokenizer, messages)[0])}) + + async def handle_post_chat_completions(self, request): + data = await request.json() + if DEBUG >= 2: print(f"Handling chat completions request from {request.remote}: {data}") + stream = data.get("stream", False) + chat_request = parse_chat_request(data) + if chat_request.model and chat_request.model.startswith("gpt-"): # to be compatible with ChatGPT tools, point all gpt- model requests to llama instead + chat_request.model = "llama-3.1-8b" + if not chat_request.model or chat_request.model not in model_base_shards: + if DEBUG >= 1: print(f"Invalid model: {chat_request.model}. Supported: {list(model_base_shards.keys())}. Defaulting to llama-3.1-8b") + chat_request.model = "llama-3.1-8b" + shard = model_base_shards[chat_request.model].get(self.inference_engine_classname, None) + if not shard: + supported_models = [model for model, engines in model_base_shards.items() if self.inference_engine_classname in engines] + return web.json_response( + {"detail": f"Unsupported model: {chat_request.model} with inference engine {self.inference_engine_classname}. Supported models for this engine: {supported_models}"}, + status=400, + ) + + tokenizer = await resolve_tokenizer(shard.model_id) + if DEBUG >= 4: print(f"Resolved tokenizer: {tokenizer}") + + prompt, image_str = build_prompt(tokenizer, chat_request.messages) + request_id = str(uuid.uuid4()) + if self.on_chat_completion_request: + try: + self.on_chat_completion_request(request_id, chat_request, prompt) + except Exception as e: + if DEBUG >= 2: traceback.print_exc() + # request_id = None + # match = self.prompts.find_longest_prefix(prompt) + # if match and len(prompt) > len(match[1].prompt): + # if DEBUG >= 2: + # print(f"Prompt for request starts with previous prompt {len(match[1].prompt)} of {len(prompt)}: {match[1].prompt}") + # request_id = match[1].request_id + # self.prompts.add(prompt, PromptSession(request_id=request_id, timestamp=int(time.time()), prompt=prompt)) + # # remove the matching prefix from the prompt + # prompt = prompt[len(match[1].prompt):] + # else: + # request_id = str(uuid.uuid4()) + # self.prompts.add(prompt, PromptSession(request_id=request_id, timestamp=int(time.time()), prompt=prompt)) + + callback_id = f"chatgpt-api-wait-response-{request_id}" + callback = self.node.on_token.register(callback_id) + + if DEBUG >= 2: print(f"Sending prompt from ChatGPT api {request_id=} {shard=} {prompt=} {image_str=}") + try: + await self.node.process_prompt(shard, prompt, image_str, request_id=request_id) + except Exception as e: + if DEBUG >= 2: traceback.print_exc() + return web.json_response({"detail": f"Error processing prompt (see logs with DEBUG>=2): {str(e)}"}, status=500) + + try: + if DEBUG >= 2: print(f"Waiting for response to finish. timeout={self.response_timeout_secs}s") + + if stream: + response = web.StreamResponse( + status=200, + reason="OK", + headers={ + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + }, + ) + await response.prepare(request) + + async def stream_result(request_id: str, tokens: List[int], is_finished: bool): + prev_last_tokens_len = self.prev_token_lens.get(request_id, 0) + self.prev_token_lens[request_id] = max(prev_last_tokens_len, len(tokens)) + new_tokens = tokens[prev_last_tokens_len:] + finish_reason = None + eos_token_id = tokenizer.special_tokens_map.get("eos_token_id") if hasattr(tokenizer, "_tokenizer") and isinstance(tokenizer._tokenizer, + AutoTokenizer) else getattr(tokenizer, "eos_token_id", None) + if len(new_tokens) > 0 and new_tokens[-1] == eos_token_id: + new_tokens = new_tokens[:-1] + if is_finished: + finish_reason = "stop" + if is_finished and not finish_reason: + finish_reason = "length" + + completion = generate_completion( + chat_request, + tokenizer, + prompt, + request_id, + new_tokens, + stream, + finish_reason, + "chat.completion", + ) + if DEBUG >= 2: print(f"Streaming completion: {completion}") + try: + await response.write(f"data: {json.dumps(completion)}\n\n".encode()) + except Exception as e: + if DEBUG >= 2: print(f"Error streaming completion: {e}") + if DEBUG >= 2: traceback.print_exc() + + def on_result(_request_id: str, tokens: List[int], is_finished: bool): + self.stream_tasks[request_id] = asyncio.create_task(stream_result(request_id, tokens, is_finished)) + + return _request_id == request_id and is_finished + + _, tokens, _ = await callback.wait(on_result, timeout=self.response_timeout_secs) + if request_id in self.stream_tasks: # in case there is still a stream task running, wait for it to complete + if DEBUG >= 2: print("Pending stream task. Waiting for stream task to complete.") + try: + await asyncio.wait_for(self.stream_tasks[request_id], timeout=30) + except asyncio.TimeoutError: + print("WARNING: Stream task timed out. This should not happen.") + await response.write_eof() + return response + else: + _, tokens, _ = await callback.wait( + lambda _request_id, tokens, is_finished: _request_id == request_id and is_finished, + timeout=self.response_timeout_secs, + ) + + finish_reason = "length" + eos_token_id = tokenizer.special_tokens_map.get("eos_token_id") if isinstance(getattr(tokenizer, "_tokenizer", None), AutoTokenizer) else tokenizer.eos_token_id + if DEBUG >= 2: print(f"Checking if end of tokens result {tokens[-1]=} is {eos_token_id=}") + if tokens[-1] == eos_token_id: + tokens = tokens[:-1] + finish_reason = "stop" + + return web.json_response(generate_completion(chat_request, tokenizer, prompt, request_id, tokens, stream, finish_reason, "chat.completion")) + except asyncio.TimeoutError: + return web.json_response({"detail": "Response generation timed out"}, status=408) + finally: + deregistered_callback = self.node.on_token.deregister(callback_id) + if DEBUG >= 2: print(f"Deregister {callback_id=} {deregistered_callback=}") + + async def run(self, host: str = "0.0.0.0", port: int = 8000): + runner = web.AppRunner(self.app) + await runner.setup() + site = web.TCPSite(runner, host, port) + await site.start() diff --git a/build/lib/exo/download/__init__.py b/build/lib/exo/download/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/build/lib/exo/download/download_progress.py b/build/lib/exo/download/download_progress.py new file mode 100644 index 000000000..779e53287 --- /dev/null +++ b/build/lib/exo/download/download_progress.py @@ -0,0 +1,61 @@ +from typing import Dict, Callable, Coroutine, Any, Literal +from dataclasses import dataclass +from datetime import timedelta + + +@dataclass +class RepoFileProgressEvent: + repo_id: str + repo_revision: str + file_path: str + downloaded: int + downloaded_this_session: int + total: int + speed: int + eta: timedelta + status: Literal["not_started", "in_progress", "complete"] + + def to_dict(self): + return { + "repo_id": self.repo_id, "repo_revision": self.repo_revision, "file_path": self.file_path, "downloaded": self.downloaded, "downloaded_this_session": self.downloaded_this_session, + "total": self.total, "speed": self.speed, "eta": self.eta.total_seconds(), "status": self.status + } + + @classmethod + def from_dict(cls, data): + if 'eta' in data: data['eta'] = timedelta(seconds=data['eta']) + return cls(**data) + + +@dataclass +class RepoProgressEvent: + repo_id: str + repo_revision: str + completed_files: int + total_files: int + downloaded_bytes: int + downloaded_bytes_this_session: int + total_bytes: int + overall_speed: int + overall_eta: timedelta + file_progress: Dict[str, RepoFileProgressEvent] + status: Literal["not_started", "in_progress", "complete"] + + def to_dict(self): + return { + "repo_id": self.repo_id, "repo_revision": self.repo_revision, "completed_files": self.completed_files, "total_files": self.total_files, "downloaded_bytes": self.downloaded_bytes, + "downloaded_bytes_this_session": self.downloaded_bytes_this_session, "total_bytes": self.total_bytes, "overall_speed": self.overall_speed, "overall_eta": self.overall_eta.total_seconds(), + "file_progress": {k: v.to_dict() + for k, v in self.file_progress.items()}, "status": self.status + } + + @classmethod + def from_dict(cls, data): + if 'overall_eta' in data: data['overall_eta'] = timedelta(seconds=data['overall_eta']) + if 'file_progress' in data: data['file_progress'] = {k: RepoFileProgressEvent.from_dict(v) for k, v in data['file_progress'].items()} + + return cls(**data) + + +RepoFileProgressCallback = Callable[[RepoFileProgressEvent], Coroutine[Any, Any, None]] +RepoProgressCallback = Callable[[RepoProgressEvent], Coroutine[Any, Any, None]] diff --git a/build/lib/exo/download/hf/__init__.py b/build/lib/exo/download/hf/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/build/lib/exo/download/hf/hf_helpers.py b/build/lib/exo/download/hf/hf_helpers.py new file mode 100644 index 000000000..8fd96dc5f --- /dev/null +++ b/build/lib/exo/download/hf/hf_helpers.py @@ -0,0 +1,403 @@ +import asyncio +import aiohttp +import json +import os +from urllib.parse import urljoin +from typing import Callable, Optional, Coroutine, Any, Dict, List, Union, Literal +from datetime import datetime, timedelta +from fnmatch import fnmatch +from pathlib import Path +from typing import Generator, Iterable, TypeVar, TypedDict +from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type +from exo.helpers import DEBUG +from exo.download.download_progress import RepoProgressEvent, RepoFileProgressEvent, RepoProgressCallback, RepoFileProgressCallback +from exo.inference.shard import Shard +import aiofiles +from aiofiles import os as aios + +T = TypeVar("T") + +async def get_local_snapshot_dir(repo_id: str, revision: str = "main") -> Optional[Path]: + refs_dir = get_repo_root(repo_id)/"refs" + refs_file = refs_dir/revision + if await aios.path.exists(refs_file): + async with aiofiles.open(refs_file, 'r') as f: + commit_hash = (await f.read()).strip() + snapshot_dir = get_repo_root(repo_id)/"snapshots"/commit_hash + return snapshot_dir + return None + + +def filter_repo_objects( + items: Iterable[T], + *, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + key: Optional[Callable[[T], str]] = None, +) -> Generator[T, None, None]: + if isinstance(allow_patterns, str): + allow_patterns = [allow_patterns] + if isinstance(ignore_patterns, str): + ignore_patterns = [ignore_patterns] + if allow_patterns is not None: + allow_patterns = [_add_wildcard_to_directories(p) for p in allow_patterns] + if ignore_patterns is not None: + ignore_patterns = [_add_wildcard_to_directories(p) for p in ignore_patterns] + + if key is None: + + def _identity(item: T) -> str: + if isinstance(item, str): + return item + if isinstance(item, Path): + return str(item) + raise ValueError(f"Please provide `key` argument in `filter_repo_objects`: `{item}` is not a string.") + + key = _identity + + for item in items: + path = key(item) + if allow_patterns is not None and not any(fnmatch(path, r) for r in allow_patterns): + continue + if ignore_patterns is not None and any(fnmatch(path, r) for r in ignore_patterns): + continue + yield item + + +def _add_wildcard_to_directories(pattern: str) -> str: + if pattern[-1] == "/": + return pattern + "*" + return pattern + + +def get_hf_home() -> Path: + """Get the Hugging Face home directory.""" + return Path(os.environ.get("HF_HOME", Path.home()/".cache"/"huggingface")) + + +async def get_hf_token(): + """Retrieve the Hugging Face token from the user's HF_HOME directory.""" + token_path = get_hf_home()/"token" + if await aios.path.exists(token_path): + async with aiofiles.open(token_path, 'r') as f: + return (await f.read()).strip() + return None + + +async def get_auth_headers(): + """Get authentication headers if a token is available.""" + token = await get_hf_token() + if token: + return {"Authorization": f"Bearer {token}"} + return {} + + +def get_repo_root(repo_id: str) -> Path: + """Get the root directory for a given repo ID in the Hugging Face cache.""" + sanitized_repo_id = repo_id.replace("/", "--") + return get_hf_home()/"hub"/f"models--{sanitized_repo_id}" + + +async def fetch_file_list(session, repo_id, revision, path=""): + api_url = f"https://huggingface.co/api/models/{repo_id}/tree/{revision}" + url = f"{api_url}/{path}" if path else api_url + + headers = await get_auth_headers() + async with session.get(url, headers=headers) as response: + if response.status == 200: + data = await response.json() + files = [] + for item in data: + if item["type"] == "file": + files.append({"path": item["path"], "size": item["size"]}) + elif item["type"] == "directory": + subfiles = await fetch_file_list(session, repo_id, revision, item["path"]) + files.extend(subfiles) + return files + else: + raise Exception(f"Failed to fetch file list: {response.status}") + + +@retry( + stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=60), retry=retry_if_exception_type((aiohttp.ClientError, asyncio.TimeoutError, aiohttp.ClientResponseError)), reraise=True +) +async def download_file( + session: aiohttp.ClientSession, repo_id: str, revision: str, file_path: str, save_directory: str, progress_callback: Optional[RepoFileProgressCallback] = None, use_range_request: bool = True +): + base_url = f"https://huggingface.co/{repo_id}/resolve/{revision}/" + url = urljoin(base_url, file_path) + local_path = os.path.join(save_directory, file_path) + + await aios.makedirs(os.path.dirname(local_path), exist_ok=True) + + # Check if file already exists and get its size + local_file_size = await aios.path.getsize(local_path) if await aios.path.exists(local_path) else 0 + + headers = await get_auth_headers() + if use_range_request: + headers["Range"] = f"bytes={local_file_size}-" + + async with session.get(url, headers=headers) as response: + total_size = int(response.headers.get('Content-Length', 0)) + downloaded_size = local_file_size + downloaded_this_session = 0 + mode = 'ab' if use_range_request else 'wb' + if downloaded_size == total_size: + if DEBUG >= 2: print(f"File already downloaded: {file_path}") + if progress_callback: + await progress_callback(RepoFileProgressEvent(repo_id, revision, file_path, downloaded_size, downloaded_this_session, total_size, 0, timedelta(0), "complete")) + return + + if response.status == 200: + # File doesn't support range requests or we're not using them, start from beginning + mode = 'wb' + downloaded_size = 0 + elif response.status == 206: + # Partial content, resume download + content_range = response.headers.get('Content-Range', '') + try: + total_size = int(content_range.split('/')[-1]) + except ValueError: + if DEBUG >= 1: print(f"Failed to parse Content-Range header: {content_range}. Starting download from scratch...") + return await download_file(session, repo_id, revision, file_path, save_directory, progress_callback, use_range_request=False) + elif response.status == 416: + # Range not satisfiable, get the actual file size + content_range = response.headers.get('Content-Range', '') + try: + total_size = int(content_range.split('/')[-1]) + if downloaded_size == total_size: + if DEBUG >= 2: print(f"File fully downloaded on first pass: {file_path}") + if progress_callback: + await progress_callback(RepoFileProgressEvent(repo_id, revision, file_path, downloaded_size, downloaded_this_session, total_size, 0, timedelta(0), "complete")) + return + except ValueError: + if DEBUG >= 1: print(f"Failed to parse Content-Range header: {content_range}. Starting download from scratch...") + return await download_file(session, repo_id, revision, file_path, save_directory, progress_callback, use_range_request=False) + else: + raise aiohttp.ClientResponseError(response.request_info, response.history, status=response.status, message=f"Failed to download {file_path}: {response.status}") + + if downloaded_size == total_size: + print(f"File already downloaded: {file_path}") + if progress_callback: + await progress_callback(RepoFileProgressEvent(repo_id, revision, file_path, downloaded_size, downloaded_this_session, total_size, 0, timedelta(0), "complete")) + return + + DOWNLOAD_CHUNK_SIZE = 32768 + start_time = datetime.now() + async with aiofiles.open(local_path, mode) as f: + async for chunk in response.content.iter_chunked(DOWNLOAD_CHUNK_SIZE): + await f.write(chunk) + downloaded_size += len(chunk) + downloaded_this_session += len(chunk) + if progress_callback and total_size: + elapsed_time = (datetime.now() - start_time).total_seconds() + speed = int(downloaded_this_session/elapsed_time) if elapsed_time > 0 else 0 + remaining_size = total_size - downloaded_size + eta = timedelta(seconds=remaining_size/speed) if speed > 0 else timedelta(0) + status = "in_progress" if downloaded_size < total_size else "complete" + if DEBUG >= 8: print(f"HF repo file download progress: {file_path=} {elapsed_time=} {speed=} Downloaded={downloaded_size}/{total_size} {remaining_size=} {eta=} {status=}") + await progress_callback(RepoFileProgressEvent(repo_id, revision, file_path, downloaded_size, downloaded_this_session, total_size, speed, eta, status)) + if DEBUG >= 2: print(f"Downloaded: {file_path}") + + +async def download_repo_files( + repo_id: str, + revision: str = "main", + progress_callback: Optional[RepoProgressCallback] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + max_parallel_downloads: int = 4 +) -> Path: + repo_root = get_repo_root(repo_id) + refs_dir = repo_root/"refs" + snapshots_dir = repo_root/"snapshots" + cachedreqs_dir = repo_root/"cachedreqs" + + # Ensure directories exist + await aios.makedirs(refs_dir, exist_ok=True) + await aios.makedirs(snapshots_dir, exist_ok=True) + await aios.makedirs(cachedreqs_dir, exist_ok=True) + + # Check if we have a cached commit hash + refs_file = refs_dir/revision + if await aios.path.exists(refs_file): + async with aiofiles.open(refs_file, 'r') as f: + commit_hash = (await f.read()).strip() + if DEBUG >= 2: print(f"Commit hash is already hashed at {refs_file}: {commit_hash}") + else: + async with aiohttp.ClientSession() as session: + # Fetch the commit hash for the given revision + api_url = f"https://huggingface.co/api/models/{repo_id}/revision/{revision}" + headers = await get_auth_headers() + async with session.get(api_url, headers=headers) as response: + if response.status != 200: + raise Exception(f"Failed to fetch revision info from {api_url}: {response.status}") + revision_info = await response.json() + commit_hash = revision_info['sha'] + + # Cache the commit hash + async with aiofiles.open(refs_file, 'w') as f: + await f.write(commit_hash) + + # Set up the snapshot directory + snapshot_dir = snapshots_dir/commit_hash + await aios.makedirs(snapshot_dir, exist_ok=True) + + # Set up the cached file list directory + cached_file_list_dir = cachedreqs_dir/commit_hash + await aios.makedirs(cached_file_list_dir, exist_ok=True) + cached_file_list_path = cached_file_list_dir/"fetch_file_list.json" + + async with aiohttp.ClientSession() as session: + # Check if we have a cached file list + if await aios.path.exists(cached_file_list_path): + async with aiofiles.open(cached_file_list_path, 'r') as f: + file_list = json.loads(await f.read()) + if DEBUG >= 2: print(f"Using cached file list from {cached_file_list_path}") + else: + file_list = await fetch_file_list(session, repo_id, revision) + # Cache the file list + async with aiofiles.open(cached_file_list_path, 'w') as f: + await f.write(json.dumps(file_list)) + if DEBUG >= 2: print(f"Cached file list at {cached_file_list_path}") + + filtered_file_list = list(filter_repo_objects(file_list, allow_patterns=allow_patterns, ignore_patterns=ignore_patterns, key=lambda x: x["path"])) + total_files = len(filtered_file_list) + total_bytes = sum(file["size"] for file in filtered_file_list) + file_progress: Dict[str, RepoFileProgressEvent] = { + file["path"]: RepoFileProgressEvent(repo_id, revision, file["path"], 0, 0, file["size"], 0, timedelta(0), "not_started") + for file in filtered_file_list + } + start_time = datetime.now() + + async def download_with_progress(file_info, progress_state): + local_path = snapshot_dir/file_info["path"] + if await aios.path.exists(local_path) and (await aios.stat(local_path)).st_size == file_info["size"]: + if DEBUG >= 2: print(f"File already fully downloaded: {file_info['path']}") + progress_state['completed_files'] += 1 + progress_state['downloaded_bytes'] += file_info["size"] + file_progress[file_info["path"]] = RepoFileProgressEvent(repo_id, revision, file_info["path"], file_info["size"], 0, file_info["size"], 0, timedelta(0), "complete") + if progress_callback: + elapsed_time = (datetime.now() - start_time).total_seconds() + overall_speed = int(progress_state['downloaded_bytes_this_session']/elapsed_time) if elapsed_time > 0 else 0 + remaining_bytes = total_bytes - progress_state['downloaded_bytes'] + overall_eta = timedelta(seconds=remaining_bytes/overall_speed) if overall_speed > 0 else timedelta(seconds=0) + status = "in_progress" if progress_state['completed_files'] < total_files else "complete" + await progress_callback( + RepoProgressEvent( + repo_id, revision, progress_state['completed_files'], total_files, progress_state['downloaded_bytes'], progress_state['downloaded_bytes_this_session'], total_bytes, overall_speed, + overall_eta, file_progress, status + ) + ) + return + + async def file_progress_callback(event: RepoFileProgressEvent): + progress_state['downloaded_bytes'] += event.downloaded - file_progress[event.file_path].downloaded + progress_state['downloaded_bytes_this_session'] += event.downloaded_this_session - file_progress[event.file_path].downloaded_this_session + file_progress[event.file_path] = event + if progress_callback: + elapsed_time = (datetime.now() - start_time).total_seconds() + overall_speed = int(progress_state['downloaded_bytes_this_session']/elapsed_time) if elapsed_time > 0 else 0 + remaining_bytes = total_bytes - progress_state['downloaded_bytes'] + overall_eta = timedelta(seconds=remaining_bytes/overall_speed) if overall_speed > 0 else timedelta(seconds=0) + status = "in_progress" if progress_state['downloaded_bytes'] < total_bytes else "complete" + await progress_callback( + RepoProgressEvent( + repo_id, revision, progress_state['completed_files'], total_files, progress_state['downloaded_bytes'], progress_state['downloaded_bytes_this_session'], total_bytes, overall_speed, + overall_eta, file_progress, status + ) + ) + + await download_file(session, repo_id, revision, file_info["path"], snapshot_dir, file_progress_callback) + progress_state['completed_files'] += 1 + file_progress[ + file_info["path"] + ] = RepoFileProgressEvent(repo_id, revision, file_info["path"], file_info["size"], file_progress[file_info["path"]].downloaded_this_session, file_info["size"], 0, timedelta(0), "complete") + if progress_callback: + elapsed_time = (datetime.now() - start_time).total_seconds() + overall_speed = int(progress_state['downloaded_bytes_this_session']/elapsed_time) if elapsed_time > 0 else 0 + remaining_bytes = total_bytes - progress_state['downloaded_bytes'] + overall_eta = timedelta(seconds=remaining_bytes/overall_speed) if overall_speed > 0 else timedelta(seconds=0) + status = "in_progress" if progress_state['completed_files'] < total_files else "complete" + await progress_callback( + RepoProgressEvent( + repo_id, revision, progress_state['completed_files'], total_files, progress_state['downloaded_bytes'], progress_state['downloaded_bytes_this_session'], total_bytes, overall_speed, + overall_eta, file_progress, status + ) + ) + + progress_state = {'completed_files': 0, 'downloaded_bytes': 0, 'downloaded_bytes_this_session': 0} + + semaphore = asyncio.Semaphore(max_parallel_downloads) + + async def download_with_semaphore(file_info): + async with semaphore: + await download_with_progress(file_info, progress_state) + + tasks = [asyncio.create_task(download_with_semaphore(file_info)) for file_info in filtered_file_list] + await asyncio.gather(*tasks) + + return snapshot_dir + + +async def get_weight_map(repo_id: str, revision: str = "main") -> Optional[Dict[str, str]]: + """ + Retrieve the weight map from the model.safetensors.index.json file. + + Args: + repo_id (str): The Hugging Face repository ID. + revision (str): The revision of the repository to use. + + Returns: + Optional[Dict[str, str]]: The weight map if it exists, otherwise None. + """ + + # Download the index file + await download_repo_files(repo_id=repo_id, revision=revision, allow_patterns="model.safetensors.index.json") + + # Check if the file exists + repo_root = get_repo_root(repo_id) + snapshot_dir = repo_root/"snapshots" + index_file = next((f for f in await aios.listdir(snapshot_dir) if f.endswith("model.safetensors.index.json")), None) + + if index_file: + index_file_path = snapshot_dir/index_file + if await aios.path.exists(index_file_path): + async with aiofiles.open(index_file_path, 'r') as f: + index_data = json.loads(await f.read()) + return index_data.get("weight_map") + + return None + + +def extract_layer_num(tensor_name: str) -> Optional[int]: + # This is a simple example and might need to be adjusted based on the actual naming convention + parts = tensor_name.split('.') + for part in parts: + if part.isdigit(): + return int(part) + return None + + +def get_allow_patterns(weight_map: Dict[str, str], shard: Shard) -> List[str]: + default_patterns = [ + "*.json", + "*.py", + "tokenizer.model", + "*.tiktoken", + "*.txt", + ] + shard_specific_patterns = [] + if weight_map: + for tensor_name, filename in weight_map.items(): + layer_num = extract_layer_num(tensor_name) + if layer_num is not None and shard.start_layer <= layer_num <= shard.end_layer: + shard_specific_patterns.append(filename) + sorted_file_names = sorted(weight_map.values()) + if shard.is_first_layer(): + shard_specific_patterns.append(sorted_file_names[0]) + elif shard.is_last_layer(): + shard_specific_patterns.append(sorted_file_names[-1]) + else: + shard_specific_patterns = ["*.safetensors"] + return list(set(default_patterns + shard_specific_patterns)) # Remove duplicates diff --git a/build/lib/exo/download/hf/hf_shard_download.py b/build/lib/exo/download/hf/hf_shard_download.py new file mode 100644 index 000000000..eb562c3c9 --- /dev/null +++ b/build/lib/exo/download/hf/hf_shard_download.py @@ -0,0 +1,77 @@ +import asyncio +import traceback +from pathlib import Path +from typing import Dict, List, Tuple +from exo.inference.shard import Shard +from exo.download.shard_download import ShardDownloader +from exo.download.download_progress import RepoProgressEvent +from exo.download.hf.hf_helpers import download_repo_files, RepoProgressEvent, get_weight_map, get_allow_patterns, get_repo_root +from exo.helpers import AsyncCallbackSystem, DEBUG + + +class HFShardDownloader(ShardDownloader): + def __init__(self, quick_check: bool = False, max_parallel_downloads: int = 4): + self.quick_check = quick_check + self.max_parallel_downloads = max_parallel_downloads + self.active_downloads: Dict[Shard, asyncio.Task] = {} + self.completed_downloads: Dict[Shard, Path] = {} + self._on_progress = AsyncCallbackSystem[str, Tuple[Shard, RepoProgressEvent]]() + + async def ensure_shard(self, shard: Shard) -> Path: + if shard in self.completed_downloads: + return self.completed_downloads[shard] + if self.quick_check: + repo_root = get_repo_root(shard.model_id) + snapshots_dir = repo_root/"snapshots" + if snapshots_dir.exists(): + visible_dirs = [d for d in snapshots_dir.iterdir() if not d.name.startswith('.')] + if visible_dirs: + most_recent_dir = max(visible_dirs, key=lambda x: x.stat().st_mtime) + return most_recent_dir + + # If a download on this shard is already in progress, keep that one + for active_shard in self.active_downloads: + if active_shard == shard: + if DEBUG >= 2: print(f"Download already in progress for {shard}. Keeping that one.") + return await self.active_downloads[shard] + + # Cancel any downloads for this model_id on a different shard + existing_active_shards = [active_shard for active_shard in self.active_downloads.keys() if active_shard.model_id == shard.model_id] + for active_shard in existing_active_shards: + if DEBUG >= 2: print(f"Cancelling download for {active_shard} (replacing with {shard})") + task = self.active_downloads[active_shard] + task.cancel() + try: + await task + except asyncio.CancelledError: + pass # This is expected when cancelling a task + except Exception as e: + if DEBUG >= 2: print(f"Error in cancelling download {active_shard}: {e}") + traceback.print_exc() + self.active_downloads = {active_shard: task for active_shard, task in self.active_downloads.items() if active_shard.model_id != shard.model_id} + + # Start new download + download_task = asyncio.create_task(self._download_shard(shard)) + self.active_downloads[shard] = download_task + try: + path = await download_task + self.completed_downloads[shard] = path + return path + finally: + # Ensure the task is removed even if an exception occurs + print(f"Removing download task for {shard}: {shard in self.active_downloads}") + if shard in self.active_downloads: + self.active_downloads.pop(shard) + + async def _download_shard(self, shard: Shard) -> Path: + async def wrapped_progress_callback(event: RepoProgressEvent): + self._on_progress.trigger_all(shard, event) + + weight_map = await get_weight_map(shard.model_id) + allow_patterns = get_allow_patterns(weight_map, shard) + + return await download_repo_files(repo_id=shard.model_id, progress_callback=wrapped_progress_callback, allow_patterns=allow_patterns, max_parallel_downloads=self.max_parallel_downloads) + + @property + def on_progress(self) -> AsyncCallbackSystem[str, Tuple[Shard, RepoProgressEvent]]: + return self._on_progress diff --git a/build/lib/exo/download/shard_download.py b/build/lib/exo/download/shard_download.py new file mode 100644 index 000000000..771fb8683 --- /dev/null +++ b/build/lib/exo/download/shard_download.py @@ -0,0 +1,26 @@ +from abc import ABC, abstractmethod +from typing import Optional, Tuple +from pathlib import Path +from exo.inference.shard import Shard +from exo.download.download_progress import RepoProgressEvent +from exo.helpers import AsyncCallbackSystem + + +class ShardDownloader(ABC): + @abstractmethod + async def ensure_shard(self, shard: Shard) -> Path: + """ + Ensures that the shard is downloaded. + Does not allow multiple overlapping downloads at once. + If you try to download a Shard which overlaps a Shard that is already being downloaded, + the download will be cancelled and a new download will start. + + Args: + shard (Shard): The shard to download. + """ + pass + + @property + @abstractmethod + def on_progress(self) -> AsyncCallbackSystem[str, Tuple[Shard, RepoProgressEvent]]: + pass diff --git a/build/lib/exo/helpers.py b/build/lib/exo/helpers.py new file mode 100644 index 000000000..d8a5c6cc2 --- /dev/null +++ b/build/lib/exo/helpers.py @@ -0,0 +1,234 @@ +import os +import asyncio +from typing import Callable, TypeVar, Optional, Dict, Generic, Tuple, List +import socket +import random +import platform +import psutil +import uuid +import netifaces +from pathlib import Path + +DEBUG = int(os.getenv("DEBUG", default="0")) +DEBUG_DISCOVERY = int(os.getenv("DEBUG_DISCOVERY", default="0")) +VERSION = "0.0.1" + +exo_text = r""" + _____ _____ + / _ \ \/ / _ \ +| __/> < (_) | + \___/_/\_\___/ + """ + + +def get_system_info(): + if psutil.MACOS: + if platform.machine() == "arm64": + return "Apple Silicon Mac" + if platform.machine() in ["x86_64", "i386"]: + return "Intel Mac" + return "Unknown Mac architecture" + if psutil.LINUX: + return "Linux" + return "Non-Mac, non-Linux system" + +def find_available_port(host: str = "", min_port: int = 49152, max_port: int = 65535) -> int: + used_ports_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".exo_used_ports") + + def read_used_ports(): + if os.path.exists(used_ports_file): + with open(used_ports_file, "r") as f: + return [int(line.strip()) for line in f if line.strip().isdigit()] + return [] + + def write_used_port(port, used_ports): + with open(used_ports_file, "w") as f: + print(used_ports[-19:]) + for p in used_ports[-19:] + [port]: + f.write(f"{p}\n") + + used_ports = read_used_ports() + available_ports = set(range(min_port, max_port + 1)) - set(used_ports) + + while available_ports: + port = random.choice(list(available_ports)) + if DEBUG >= 2: print(f"Trying to find available port {port=}") + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind((host, port)) + write_used_port(port, used_ports) + return port + except socket.error: + available_ports.remove(port) + + raise RuntimeError("No available ports in the specified range") + + +def print_exo(): + print(exo_text) + + +def print_yellow_exo(): + yellow = "\033[93m" # ANSI escape code for yellow + reset = "\033[0m" # ANSI escape code to reset color + print(f"{yellow}{exo_text}{reset}") + + +def terminal_link(uri, label=None): + if label is None: + label = uri + parameters = "" + + # OSC 8 ; params ; URI ST OSC 8 ;; ST + escape_mask = "\033]8;{};{}\033\\{}\033]8;;\033\\" + + return escape_mask.format(parameters, uri, label) + + +T = TypeVar("T") +K = TypeVar("K") + + +class AsyncCallback(Generic[T]): + def __init__(self) -> None: + self.condition: asyncio.Condition = asyncio.Condition() + self.result: Optional[Tuple[T, ...]] = None + self.observers: list[Callable[..., None]] = [] + + async def wait(self, check_condition: Callable[..., bool], timeout: Optional[float] = None) -> Tuple[T, ...]: + async with self.condition: + await asyncio.wait_for(self.condition.wait_for(lambda: self.result is not None and check_condition(*self.result)), timeout) + assert self.result is not None # for type checking + return self.result + + def on_next(self, callback: Callable[..., None]) -> None: + self.observers.append(callback) + + def set(self, *args: T) -> None: + self.result = args + for observer in self.observers: + observer(*args) + asyncio.create_task(self.notify()) + + async def notify(self) -> None: + async with self.condition: + self.condition.notify_all() + + +class AsyncCallbackSystem(Generic[K, T]): + def __init__(self) -> None: + self.callbacks: Dict[K, AsyncCallback[T]] = {} + + def register(self, name: K) -> AsyncCallback[T]: + if name not in self.callbacks: + self.callbacks[name] = AsyncCallback[T]() + return self.callbacks[name] + + def deregister(self, name: K) -> None: + if name in self.callbacks: + del self.callbacks[name] + + def trigger(self, name: K, *args: T) -> None: + if name in self.callbacks: + self.callbacks[name].set(*args) + + def trigger_all(self, *args: T) -> None: + for callback in self.callbacks.values(): + callback.set(*args) + + +K = TypeVar('K', bound=str) +V = TypeVar('V') + + +class PrefixDict(Generic[K, V]): + def __init__(self): + self.items: Dict[K, V] = {} + + def add(self, key: K, value: V) -> None: + self.items[key] = value + + def find_prefix(self, argument: str) -> List[Tuple[K, V]]: + return [(key, value) for key, value in self.items.items() if argument.startswith(key)] + + def find_longest_prefix(self, argument: str) -> Optional[Tuple[K, V]]: + matches = self.find_prefix(argument) + if len(matches) == 0: + return None + + return max(matches, key=lambda x: len(x[0])) + + +def is_valid_uuid(val): + try: + uuid.UUID(str(val)) + return True + except ValueError: + return False + + +def get_or_create_node_id(): + NODE_ID_FILE = Path(os.path.dirname(os.path.abspath(__file__)))/".exo_node_id" + try: + if NODE_ID_FILE.is_file(): + with open(NODE_ID_FILE, "r") as f: + stored_id = f.read().strip() + if is_valid_uuid(stored_id): + if DEBUG >= 2: print(f"Retrieved existing node ID: {stored_id}") + return stored_id + else: + if DEBUG >= 2: print("Stored ID is not a valid UUID. Generating a new one.") + + new_id = str(uuid.uuid4()) + with open(NODE_ID_FILE, "w") as f: + f.write(new_id) + + if DEBUG >= 2: print(f"Generated and stored new node ID: {new_id}") + return new_id + except IOError as e: + if DEBUG >= 2: print(f"IO error creating node_id: {e}") + return str(uuid.uuid4()) + except Exception as e: + if DEBUG >= 2: print(f"Unexpected error creating node_id: {e}") + return str(uuid.uuid4()) + + +def pretty_print_bytes(size_in_bytes: int) -> str: + if size_in_bytes < 1024: + return f"{size_in_bytes} B" + elif size_in_bytes < 1024**2: + return f"{size_in_bytes / 1024:.2f} KB" + elif size_in_bytes < 1024**3: + return f"{size_in_bytes / (1024 ** 2):.2f} MB" + elif size_in_bytes < 1024**4: + return f"{size_in_bytes / (1024 ** 3):.2f} GB" + else: + return f"{size_in_bytes / (1024 ** 4):.2f} TB" + + +def pretty_print_bytes_per_second(bytes_per_second: int) -> str: + if bytes_per_second < 1024: + return f"{bytes_per_second} B/s" + elif bytes_per_second < 1024**2: + return f"{bytes_per_second / 1024:.2f} KB/s" + elif bytes_per_second < 1024**3: + return f"{bytes_per_second / (1024 ** 2):.2f} MB/s" + elif bytes_per_second < 1024**4: + return f"{bytes_per_second / (1024 ** 3):.2f} GB/s" + else: + return f"{bytes_per_second / (1024 ** 4):.2f} TB/s" + + +def get_all_ip_addresses(): + try: + ip_addresses = [] + for interface in netifaces.interfaces(): + ifaddresses = netifaces.ifaddresses(interface) + if netifaces.AF_INET in ifaddresses: + for link in ifaddresses[netifaces.AF_INET]: + ip = link['addr'] + ip_addresses.append(ip) + return list(set(ip_addresses)) + except: + if DEBUG >= 1: print("Failed to get all IP addresses. Defaulting to localhost.") + return ["localhost"] diff --git a/build/lib/exo/inference/__init__.py b/build/lib/exo/inference/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/build/lib/exo/inference/debug_inference_engine.py b/build/lib/exo/inference/debug_inference_engine.py new file mode 100644 index 000000000..27bcb592f --- /dev/null +++ b/build/lib/exo/inference/debug_inference_engine.py @@ -0,0 +1,59 @@ +from exo.inference.inference_engine import InferenceEngine +from exo.inference.shard import Shard +from exo.inference.tinygrad.inference import TinygradDynamicShardInferenceEngine +import asyncio +import numpy as np + + +# An inference engine should work the same for any number of Shards, as long as the Shards are continuous. +async def test_inference_engine(inference_engine_1: InferenceEngine, inference_engine_2: InferenceEngine, model_id: str): + from exo.inference.tinygrad.inference import Tokenizer + from pathlib import Path + + _tokenizer = Tokenizer(str(Path(model_id)/"tokenizer.model")) + + prompt = "In a single word only, what is the last name of the president of the United States? " + resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt("A", shard=Shard(model_id=model_id, start_layer=0, end_layer=31, n_layers=32), prompt=prompt) + next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( + "A", + shard=Shard(model_id=model_id, start_layer=0, end_layer=31, n_layers=32), + input_data=resp_full, + inference_state=inference_state_full, + ) + + resp1, inference_state_1, _ = await inference_engine_1.infer_prompt("B", shard=Shard(model_id=model_id, start_layer=0, end_layer=30, n_layers=32), prompt=prompt) + resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( + "B", + shard=Shard(model_id=model_id, start_layer=31, end_layer=31, n_layers=32), + input_data=resp1, + inference_state=inference_state_1, + ) + resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( + "B", + shard=Shard(model_id=model_id, start_layer=0, end_layer=30, n_layers=32), + input_data=resp2, + inference_state=inference_state_2, + ) + resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( + "B", + shard=Shard(model_id=model_id, start_layer=31, end_layer=31, n_layers=32), + input_data=resp3, + inference_state=inference_state_3, + ) + + print(f"{resp2=}") + print(f"full: {_tokenizer.decode(resp_full)}") + print(f"next full: {_tokenizer.decode(next_resp_full)}") + print(f"resp2: {_tokenizer.decode(resp2)}") + print(f"{resp4=}") + print(f"resp4: {_tokenizer.decode(resp4)}") + + assert np.array_equal(resp_full, resp2) + assert np.array_equal(next_resp_full, resp4) + + +asyncio.run(test_inference_engine( + TinygradDynamicShardInferenceEngine(), + TinygradDynamicShardInferenceEngine(), + "llama3-8b-sfr", +)) diff --git a/build/lib/exo/inference/inference_engine.py b/build/lib/exo/inference/inference_engine.py new file mode 100644 index 000000000..2b98adbe8 --- /dev/null +++ b/build/lib/exo/inference/inference_engine.py @@ -0,0 +1,34 @@ +import numpy as np +import os + +from typing import Tuple, Optional +from abc import ABC, abstractmethod +from .shard import Shard + + +class InferenceEngine(ABC): + @abstractmethod + async def infer_prompt(self, request_id: str, shard: Shard, prompt: str, image_str: Optional[str] = None, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + pass + + @abstractmethod + async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarray, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: + pass + + +def get_inference_engine(inference_engine_name: str, shard_downloader: 'ShardDownloader'): + if inference_engine_name == "mlx": + from exo.inference.mlx.sharded_inference_engine import MLXDynamicShardInferenceEngine + + return MLXDynamicShardInferenceEngine(shard_downloader) + elif inference_engine_name == "tinygrad": + from exo.inference.tinygrad.inference import TinygradDynamicShardInferenceEngine + import tinygrad.helpers + tinygrad.helpers.DEBUG.value = int(os.getenv("TINYGRAD_DEBUG", default="0")) + + return TinygradDynamicShardInferenceEngine(shard_downloader) + elif inference_engine_name == "pytorch": + from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine + return PyTorchDynamicShardInferenceEngine(shard_downloader) + else: + raise ValueError(f"Inference engine {inference_engine_name} not supported") diff --git a/build/lib/exo/inference/mlx/__init__.py b/build/lib/exo/inference/mlx/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/build/lib/exo/inference/mlx/models/__init__.py b/build/lib/exo/inference/mlx/models/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/build/lib/exo/inference/mlx/models/base.py b/build/lib/exo/inference/mlx/models/base.py new file mode 100644 index 000000000..a1f1878cb --- /dev/null +++ b/build/lib/exo/inference/mlx/models/base.py @@ -0,0 +1,9 @@ +from typing import Optional +import mlx.core as mx +import mlx.nn as nn +from mlx_lm.models.base import KVCache + + +class IdentityBlock(nn.Module): + def __call__(self, x: mx.array, mask: Optional[mx.array] = None, cache: Optional[KVCache] = None) -> mx.array: + return x diff --git a/build/lib/exo/inference/mlx/models/deepseek_v2.py b/build/lib/exo/inference/mlx/models/deepseek_v2.py new file mode 100644 index 000000000..9ea271edf --- /dev/null +++ b/build/lib/exo/inference/mlx/models/deepseek_v2.py @@ -0,0 +1,127 @@ +from dataclasses import dataclass, field +from typing import Optional + +import mlx.core as mx +import mlx.nn as nn + +from mlx_lm.models.base import KVCache +from mlx_lm.models.deepseek_v2 import ModelArgs, DeepseekV2DecoderLayer +from .base import IdentityBlock +from exo.inference.shard import Shard + + +@dataclass +class ModelArgs(ModelArgs): + shard: Shard = field(default_factory=lambda: Shard("", 0, 0, 0)) + + def __post_init__(self): + if isinstance(self.shard, Shard): + return + if not isinstance(self.shard, dict): + raise TypeError(f"Expected shard to be a Shard instance or a dict, got {type(self.shard)} instead") + + self.shard = Shard(**self.shard) + + +class DeepseekV2Model(nn.Module): + def __init__(self, config: ModelArgs): + super().__init__() + self.args = config + self.num_hidden_layers = config.num_hidden_layers + self.vocab_size = config.vocab_size + if self.args.shard.is_first_layer(): + self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size) + + self.layers = [] + for i in range(self.num_hidden_layers): + if self.args.shard.start_layer <= i <= self.args.shard.end_layer: + self.layers.append(DeepseekV2DecoderLayer(config, i)) + else: + self.layers.append(IdentityBlock()) + + if self.args.shard.is_last_layer(): + self.norm = nn.RMSNorm(config.hidden_size, eps=config.rms_norm_eps) + + def __call__( + self, + x: mx.array, + cache: Optional[KVCache] = None, + ) -> mx.array: + if self.args.shard.is_first_layer(): + h = self.embed_tokens(x) + else: + h = x + + mask = None + T = h.shape[1] + if T > 1: + mask = nn.MultiHeadAttention.create_additive_causal_mask(T) + mask = mask.astype(h.dtype) + + if cache is None: + cache = [None]*len(self.layers) + + for layer, c in zip(self.layers, cache): + h = layer(h, mask, c) + + if self.args.shard.is_last_layer(): + h = self.norm(h) + return h + + +class Model(nn.Module): + def __init__(self, config: ModelArgs): + super().__init__() + self.args = config + self.model_type = config.model_type + self.model = DeepseekV2Model(config) + if self.args.shard.is_last_layer(): + self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) + + def __call__( + self, + inputs: mx.array, + cache: Optional[KVCache] = None, + ): + out = self.model(inputs, cache) + if self.args.shard.is_last_layer(): + return self.lm_head(out) + return out + + def sanitize(self, weights): + shard_state_dict = {} + + for key, value in weights.items(): + if key.startswith('model.layers.'): + layer_num = int(key.split('.')[2]) + if self.args.shard.start_layer <= layer_num <= self.args.shard.end_layer: + shard_state_dict[key] = value + elif self.args.shard.is_first_layer() and key.startswith('model.embed_tokens'): + shard_state_dict[key] = value + elif self.args.shard.is_last_layer() and (key.startswith('model.norm') or key.startswith('lm_head')): + shard_state_dict[key] = value + + for l in range(self.args.num_hidden_layers): + prefix = f"model.layers.{l}" + for n, m in [("w1", "gate_proj"), ("w2", "down_proj"), ("w3", "up_proj")]: + for k in ["weight", "scales", "biases"]: + if f"{prefix}.mlp.experts.0.{m}.{k}" in shard_state_dict: + to_join = [shard_state_dict.pop(f"{prefix}.mlp.experts.{e}.{m}.{k}") for e in range(self.args.n_routed_experts)] + shard_state_dict[f"{prefix}.mlp.switch_mlp.{m}.{k}"] = mx.stack(to_join) + + return shard_state_dict + + @property + def layers(self): + return self.model.layers + + @property + def head_dim(self): + return ( + self.args.qk_nope_head_dim + self.args.qk_rope_head_dim, + self.args.v_head_dim, + ) + + @property + def n_kv_heads(self): + return self.args.num_key_value_heads diff --git a/build/lib/exo/inference/mlx/models/llama.py b/build/lib/exo/inference/mlx/models/llama.py new file mode 100644 index 000000000..719d6a886 --- /dev/null +++ b/build/lib/exo/inference/mlx/models/llama.py @@ -0,0 +1,125 @@ +from dataclasses import dataclass, field + +import mlx.core as mx +import mlx.nn as nn + +from mlx_lm.models.base import create_attention_mask +from mlx_lm.models.llama import TransformerBlock, ModelArgs + +from ...shard import Shard +from .base import IdentityBlock + + +@dataclass +class ModelArgs(ModelArgs): + shard: Shard = field(default_factory=lambda: Shard("", 0, 0, 0)) + + def __post_init__(self): + super().__post_init__() # Ensure parent initializations are respected + + if isinstance(self.shard, Shard): + return + if not isinstance(self.shard, dict): + raise TypeError(f"Expected shard to be a Shard instance or a dict, got {type(self.shard)} instead") + + self.shard = Shard(**self.shard) + + +class LlamaModel(nn.Module): + def __init__(self, args: ModelArgs): + super().__init__() + self.args = args + self.vocab_size = args.vocab_size + self.num_hidden_layers = args.num_hidden_layers + assert self.vocab_size > 0 + if self.args.shard.is_first_layer(): + self.embed_tokens = nn.Embedding(args.vocab_size, args.hidden_size) + self.layers = [] + for i in range(self.num_hidden_layers): + if self.args.shard.start_layer <= i <= self.args.shard.end_layer: + self.layers.append(TransformerBlock(args=args)) + else: + self.layers.append(IdentityBlock()) + if self.args.shard.is_last_layer(): + self.norm = nn.RMSNorm(args.hidden_size, eps=args.rms_norm_eps) + + def __call__( + self, + inputs: mx.array, + cache=None, + ): + if self.args.shard.is_first_layer(): + h = self.embed_tokens(inputs) + else: + h = inputs + + mask = None + if h.shape[1] > 1: + mask = create_attention_mask(h, cache) + + if cache is None: + cache = [None]*len(self.layers) + + for layer, c in zip(self.layers, cache): + h = layer(h, mask, cache=c) + + if self.args.shard.is_last_layer(): + h = self.norm(h) + return h + + +class Model(nn.Module): + def __init__(self, args: ModelArgs): + super().__init__() + self.args = args + self.model_type = args.model_type + self.model = LlamaModel(args) + if self.args.shard.is_last_layer(): + if not args.tie_word_embeddings: + self.lm_head = nn.Linear(args.hidden_size, args.vocab_size, bias=False) + + def __call__( + self, + inputs: mx.array, + cache=None, + ): + out = self.model(inputs, cache) + if self.args.shard.is_last_layer(): + if self.args.tie_word_embeddings: + out = self.model.embed_tokens.as_linear(out) + else: + out = self.lm_head(out) + return out + + def sanitize(self, weights): + shard_state_dict = {} + + for key, value in weights.items(): + if "self_attn.rotary_emb.inv_freq" in key: + continue + if key.startswith('model.layers.'): + layer_num = int(key.split('.')[2]) + if self.args.shard.start_layer <= layer_num <= self.args.shard.end_layer: + shard_state_dict[key] = value + elif self.args.shard.is_first_layer() and key.startswith('model.embed_tokens'): + shard_state_dict[key] = value + elif (self.args.shard.is_last_layer() and self.args.tie_word_embeddings) and key.startswith('model.embed_tokens'): + shard_state_dict[key] = value + elif (self.args.shard.is_last_layer() and not self.args.tie_word_embeddings) and key.startswith('lm_head'): + shard_state_dict[key] = value + elif self.args.shard.is_last_layer() and (key.startswith('model.norm')): + shard_state_dict[key] = value + + return shard_state_dict + + @property + def layers(self): + return self.model.layers + + @property + def head_dim(self): + return (self.args.head_dim or self.args.hidden_size // self.args.num_attention_heads) + + @property + def n_kv_heads(self): + return self.args.num_key_value_heads diff --git a/build/lib/exo/inference/mlx/models/llava.py b/build/lib/exo/inference/mlx/models/llava.py new file mode 100644 index 000000000..b734b09b4 --- /dev/null +++ b/build/lib/exo/inference/mlx/models/llava.py @@ -0,0 +1,585 @@ +# Copyright © 2024 Apple Inc. + +import math +import inspect +from dataclasses import dataclass, field +from typing import Optional, Dict, Union + +import mlx.core as mx +import mlx.nn as nn +from mlx_lm.models.base import BaseModelArgs, KVCache +from exo.inference.shard import Shard +from .base import IdentityBlock +import numpy as np + + +@dataclass +class VisionConfig: + model_type: str + num_hidden_layers: int = 24 + hidden_size: int = 1024 + intermediate_size: int = 4096 + num_attention_heads: int = 16 + image_size: int = 336 + patch_size: int = 14 + projection_dim: int = 768 + vocab_size: int = 32000 + num_channels: int = 3 + layer_norm_eps: float = 1e-5 + + @classmethod + def from_dict(cls, params): + return cls(**{k: v for k, v in params.items() if k in inspect.signature(cls).parameters}) + + +class VisionAttention(nn.Module): + def __init__( + self, + dims: int, + num_heads: int, + query_input_dims: Optional[int] = None, + key_input_dims: Optional[int] = None, + value_input_dims: Optional[int] = None, + value_dims: Optional[int] = None, + value_output_dims: Optional[int] = None, + bias: bool = False, + ): + super().__init__() + + if (dims % num_heads) != 0: + raise ValueError("The input feature dimensions should be divisible by the " + f"number of heads ({dims} % {num_heads}) != 0") + + query_input_dims = query_input_dims or dims + key_input_dims = key_input_dims or dims + value_input_dims = value_input_dims or key_input_dims + value_dims = value_dims or dims + value_output_dims = value_output_dims or dims + + self.num_heads = num_heads + self.q_proj = nn.Linear(query_input_dims, dims, bias=bias) + self.k_proj = nn.Linear(key_input_dims, dims, bias=bias) + self.v_proj = nn.Linear(value_input_dims, value_dims, bias=bias) + self.out_proj = nn.Linear(value_dims, value_output_dims, bias=bias) + + def __call__(self, queries, keys, values, mask=None): + queries = self.q_proj(queries) + keys = self.k_proj(keys) + values = self.v_proj(values) + + num_heads = self.num_heads + B, L, D = queries.shape + _, S, _ = keys.shape + queries = queries.reshape(B, L, num_heads, -1).transpose(0, 2, 1, 3) + keys = keys.reshape(B, S, num_heads, -1).transpose(0, 2, 3, 1) + values = values.reshape(B, S, num_heads, -1).transpose(0, 2, 1, 3) + + scale = math.sqrt(1/queries.shape[-1]) + scores = (queries*scale) @ keys + if mask is not None: + scores = scores + mask.astype(scores.dtype) + scores = mx.softmax(scores, axis=-1) + values_hat = (scores @ values).transpose(0, 2, 1, 3).reshape(B, L, -1) + + return self.out_proj(values_hat) + + +class VisionMLP(nn.Module): + def __init__(self, config: VisionConfig): + super().__init__() + self.activation_fn = nn.GELU(approx="fast") + self.fc1 = nn.Linear(config.hidden_size, config.intermediate_size) + self.fc2 = nn.Linear(config.intermediate_size, config.hidden_size) + + def __call__(self, x: mx.array) -> mx.array: + x = self.activation_fn(self.fc1(x)) + x = self.fc2(x) + return x + + +class VisionEncoderLayer(nn.Module): + def __init__(self, config: VisionConfig): + super().__init__() + self.embed_dim = config.hidden_size + self.self_attn = VisionAttention(config.hidden_size, config.num_attention_heads, bias=True) + self.layer_norm1 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps) + self.mlp = VisionMLP(config) + self.layer_norm2 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps) + + def __call__(self, x: mx.array, mask: Optional[mx.array] = None) -> mx.array: + y = self.layer_norm1(x) + y = self.self_attn(y, y, y, mask) + x = x + y + y = self.layer_norm2(x) + y = self.mlp(y) + return x + y + + +class VisionEncoder(nn.Module): + def __init__(self, config: VisionConfig): + super().__init__() + self.layers = [VisionEncoderLayer(config) for _ in range(config.num_hidden_layers)] + + +class VisionEmbeddings(nn.Module): + def __init__(self, config: VisionConfig): + super().__init__() + self.config = config + self.embed_dim = config.hidden_size + self.image_size = config.image_size + self.patch_size = config.patch_size + + self.class_embedding = mx.zeros((config.hidden_size,)) + + self.patch_embedding = nn.Conv2d( + in_channels=config.num_channels, + out_channels=self.embed_dim, + kernel_size=self.patch_size, + stride=self.patch_size, + bias=False, + ) + + self.num_patches = (self.image_size // self.patch_size)**2 + self.num_positions = self.num_patches + 1 + self.position_embedding = nn.Embedding(self.num_positions, self.embed_dim) + + def __call__(self, x: mx.array) -> mx.array: + batch_size = x.shape[0] + patch_embeddings = self.patch_embedding(x) + patch_embeddings = mx.flatten(patch_embeddings, start_axis=1, end_axis=2) + embed_dim = patch_embeddings.shape[-1] + cls_embeddings = mx.broadcast_to(self.class_embedding, (batch_size, 1, embed_dim)) + embeddings = mx.concatenate((cls_embeddings, patch_embeddings), axis=1) + embeddings += self.position_embedding.weight + return embeddings + + +class ClipVisionModel(nn.Module): + def __init__(self, config: VisionConfig): + super().__init__() + self.embeddings = VisionEmbeddings(config) + self.pre_layrnorm = nn.LayerNorm(config.hidden_size) + self.encoder = VisionEncoder(config) + self.post_layernorm = nn.LayerNorm(config.hidden_size) + + def __call__( + self, + x: mx.array, + output_hidden_states: Optional[bool] = None, + ) -> mx.array: + x = self.embeddings(x) + x = self.pre_layrnorm(x) + + encoder_states = (x,) if output_hidden_states else None + + for l in self.encoder.layers: + x = l(x, mask=None) + if output_hidden_states: + encoder_states = encoder_states + (x,) + + pooler_output = self.post_layernorm(x[:, 0, :]) + return pooler_output, x, encoder_states + + +class VisionModel(nn.Module): + def __init__(self, config: VisionConfig): + super().__init__() + + self.model_type = config.model_type + if self.model_type != "clip_vision_model": + raise ValueError(f"Unsupported model type: {self.model_type}") + + self.vision_model = ClipVisionModel(config) + + def __call__(self, x: mx.array, output_hidden_states: Optional[bool] = None) -> mx.array: + return self.vision_model(x, output_hidden_states) + + def sanitize(self, weights): + sanitized_weights = {} + for k, v in weights.items(): + if "position_ids" in k: + # Remove unused position_ids + continue + elif "patch_embedding.weight" in k: + # PyTorch conv2d weight tensors have shape: + # [out_channels, in_channels, kH, KW] + # MLX conv2d expects the weight be of shape: + # [out_channels, kH, KW, in_channels] + sanitized_weights[k] = v.transpose(0, 2, 3, 1) + else: + sanitized_weights[k] = v + + return sanitized_weights + + +@dataclass +class TextConfig: + model_type: str + hidden_size: int = 4096 + num_hidden_layers: int = 32 + intermediate_size: int = 11008 + num_attention_heads: int = 32 + head_dim: int = None + rms_norm_eps: float = 1e-6 + vocab_size: int = 32000 + num_key_value_heads: int = None + rope_theta: float = 10000 + rope_traditional: bool = False + rope_scaling: Optional[Dict[str, Union[float, str]]] = None + + @classmethod + def from_dict(cls, params): + return cls(**{k: v for k, v in params.items() if k in inspect.signature(cls).parameters}) + + def __post_init__(self): + if self.num_key_value_heads is None: + self.num_key_value_heads = self.num_attention_heads + + if self.head_dim is None: + self.head_dim = self.hidden_size // self.num_attention_heads + + if self.model_type is None: + self.model_type = "llama" + + if self.rope_scaling: + required_keys = {"factor", "type"} + if not all(key in self.rope_scaling for key in required_keys): + raise ValueError(f"rope_scaling must contain keys {required_keys}") + + if self.rope_scaling["type"] != "linear": + raise ValueError("rope_scaling 'type' currently only supports 'linear'") + + +class TextAttention(nn.Module): + def __init__(self, config: TextConfig): + super().__init__() + + dim = config.hidden_size + self.n_heads = n_heads = config.num_attention_heads + self.n_kv_heads = n_kv_heads = config.num_key_value_heads + + self.repeats = n_heads // n_kv_heads + + head_dim = config.hidden_size // n_heads + self.scale = head_dim**-0.5 + + self.q_proj = nn.Linear(dim, n_heads*head_dim, bias=False) + self.k_proj = nn.Linear(dim, n_kv_heads*head_dim, bias=False) + self.v_proj = nn.Linear(dim, n_kv_heads*head_dim, bias=False) + self.o_proj = nn.Linear(n_heads*head_dim, dim, bias=False) + + rope_scale = (1/config.rope_scaling["factor"] if config.rope_scaling is not None and config.rope_scaling["type"] == "linear" else 1) + self.rope = nn.RoPE( + head_dim, + traditional=config.rope_traditional, + base=config.rope_theta, + scale=rope_scale, + ) + + def __call__( + self, + x: mx.array, + mask: Optional[mx.array] = None, + cache: Optional[KVCache] = None, + ) -> mx.array: + B, L, D = x.shape + + queries, keys, values = self.q_proj(x), self.k_proj(x), self.v_proj(x) + + # Prepare the queries, keys and values for the attention computation + queries = queries.reshape(B, L, self.n_heads, -1).transpose(0, 2, 1, 3) + keys = keys.reshape(B, L, self.n_kv_heads, -1).transpose(0, 2, 1, 3) + values = values.reshape(B, L, self.n_kv_heads, -1).transpose(0, 2, 1, 3) + + if cache is not None: + queries = self.rope(queries, offset=cache.offset) + keys = self.rope(keys, offset=cache.offset) + keys, values = cache.update_and_fetch(keys, values) + else: + queries = self.rope(queries) + keys = self.rope(keys) + + output = mx.fast.scaled_dot_product_attention(queries, keys, values, scale=self.scale, mask=mask) + output = output.transpose(0, 2, 1, 3).reshape(B, L, -1) + return self.o_proj(output) + + +class TextMLP(nn.Module): + def __init__(self, dim, hidden_dim): + super().__init__() + self.gate_proj = nn.Linear(dim, hidden_dim, bias=False) + self.down_proj = nn.Linear(hidden_dim, dim, bias=False) + self.up_proj = nn.Linear(dim, hidden_dim, bias=False) + + def __call__(self, x) -> mx.array: + return self.down_proj(nn.silu(self.gate_proj(x))*self.up_proj(x)) + + +class TransformerBlock(nn.Module): + def __init__(self, config: TextConfig): + super().__init__() + self.num_attention_heads = config.num_attention_heads + self.hidden_size = config.hidden_size + self.self_attn = TextAttention(config) + self.mlp = TextMLP(config.hidden_size, config.intermediate_size) + self.input_layernorm = nn.RMSNorm(config.hidden_size, eps=config.rms_norm_eps) + self.post_attention_layernorm = nn.RMSNorm(config.hidden_size, eps=config.rms_norm_eps) + self.config = config + + def __call__( + self, + x: mx.array, + mask: Optional[mx.array] = None, + cache: Optional[KVCache] = None, + ) -> mx.array: + r = self.self_attn(self.input_layernorm(x), mask, cache) + h = x + r + r = self.mlp(self.post_attention_layernorm(h)) + out = h + r + return out + + +class Llama(nn.Module): + def __init__(self, config: TextConfig, shard: Shard): + super().__init__() + self.config = config + self.shard = shard + self.vocab_size = config.vocab_size + self.model_type = config.model_type + self.num_hidden_layers = config.num_hidden_layers + self.num_key_value_heads = config.num_key_value_heads + self.head_dim = config.head_dim + assert self.vocab_size > 0 + if self.shard.is_first_layer(): + self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size) + self.layers = [] + for i in range(self.num_hidden_layers): + if self.shard.start_layer <= i <= self.shard.end_layer: + self.layers.append(TransformerBlock(config=config)) + else: + self.layers.append(IdentityBlock()) + if self.shard.is_last_layer(): + self.norm = nn.RMSNorm(config.hidden_size, eps=config.rms_norm_eps) + + def __call__( + self, + inputs: mx.array, + cache=None, + inputs_embeds=None, + ): + # for passing merged input embeddings + if inputs_embeds is None: + if self.shard.is_first_layer(): + h = self.embed_tokens(inputs) + else: + h = inputs + else: + h = inputs_embeds + + mask = None + if h.shape[1] > 1: + mask = nn.MultiHeadAttention.create_additive_causal_mask(h.shape[1]) + mask = mask.astype(h.dtype) + + if cache is None: + cache = [None]*len(self.layers) + + for layer, c in zip(self.layers, cache): + h = layer(h, mask, c) + + if self.shard.is_last_layer(): + h = self.norm(h) + return h + + +class LanguageModel(nn.Module): + def __init__(self, config: TextConfig, shard: Shard): + super().__init__() + self.model_type = config.model_type + if self.model_type != "llama": + raise ValueError(f"Model type {self.model_type} not supported. Currently only 'llama' is supported") + self.shard = shard + self.model = Llama(config, shard) + if self.shard.is_last_layer(): + self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) + + def __call__( + self, + inputs: mx.array, + cache=None, + inputs_embeds=None, + ): + out = self.model(inputs, cache, inputs_embeds) + if self.shard.is_last_layer(): + out = self.lm_head(out) + return out + + def sanitize(self, weights): + shard_state_dict = {} + for key, value in weights.items(): + if "self_attn.rotary_emb.inv_freq" in key: + continue + + if key.startswith('language_model.model.layers.'): + layer_num = int(key.split('.')[3]) + if layer_num < self.shard.start_layer or layer_num > self.shard.end_layer: + continue + if not self.shard.is_first_layer() and key.startswith('language_model.model.embed_tokens'): + continue + elif not self.shard.is_last_layer() and (key.startswith('language_model.model.norm') or key.startswith('language_model.lm_head')): + continue + + shard_state_dict[key] = value + + return shard_state_dict + + +@dataclass +class LlaVAConfig(BaseModelArgs): + text_config: TextConfig + vision_config: VisionConfig = None + model_type: str = "llava" + ignore_index: int = -100 + image_token_index: int = 32000 + vision_feature_select_strategy: str = "default" + vision_feature_layer: int = -2 + vocab_size: int = 32000 + + @classmethod + def from_dict(cls, params): + updated_params = {} + class_params = inspect.signature(cls).parameters + for k, v in params.items(): + if k in class_params: + if k in ["text_config", "vision_config"]: + v = class_params[k].annotation.from_dict(v) + updated_params.update({k: v}) + + return cls(**updated_params) + + +@dataclass +class ModelArgs(LlaVAConfig): + shard: Shard = field(default_factory=lambda: Shard("", 0, 0, 0)) + + def __post_init__(self): + if isinstance(self.shard, dict): + self.shard = Shard(**self.shard) + + if not isinstance(self.shard, Shard): + raise TypeError(f"Expected shard to be a Shard instance or a dict, got {type(self.shard)} instead") + + if not self.shard.is_first_layer(): + self.vision_config = None + + +class LlavaMultiModalProjector(nn.Module): + def __init__(self, config: LlaVAConfig): + super().__init__() + self.linear_1 = nn.Linear(config.vision_config.hidden_size, config.text_config.hidden_size, bias=True) + self.gelu = nn.GELU() + self.linear_2 = nn.Linear(config.text_config.hidden_size, config.text_config.hidden_size, bias=True) + + def __call__(self, x: mx.array) -> mx.array: + x = self.linear_1(x) + x = self.gelu(x) + x = self.linear_2(x) + return x + + +class Model(nn.Module): + def __init__(self, config: ModelArgs): + super().__init__() + self.config = config + self.model_type = config.model_type + if config.vision_config: + self.vision_tower = VisionModel(config.vision_config) + self.multi_modal_projector = LlavaMultiModalProjector(config) + self.vision_feature_layer = config.vision_feature_layer + self.vision_feature_select_strategy = config.vision_feature_select_strategy + self.language_model = LanguageModel(config.text_config, config.shard) + + def get_input_embeddings( + self, + input_ids: Optional[mx.array] = None, + pixel_values: Optional[mx.array] = None, + ): + if pixel_values is None: + return self.language_model(input_ids) + + # Get the input embeddings from the language model + inputs_embeds = self.language_model.model.embed_tokens(input_ids) + + # Get the ouptut hidden states from the vision model + *_, hidden_states = self.vision_tower(pixel_values.transpose(0, 2, 3, 1), output_hidden_states=True) + + # Select the hidden states from the desired layer + selected_image_feature = hidden_states[self.vision_feature_layer] + + if self.vision_feature_select_strategy == "default": + selected_image_feature = selected_image_feature[:, 1:] + elif self.vision_feature_select_strategy == "full": + selected_image_feature = selected_image_feature + else: + raise ValueError("Unexpected feature selection strategy: " + f"{self.vision_feature_select_strategy}") + + # Pass image features through the multi-modal projector + image_features = self.multi_modal_projector(selected_image_feature) + + # Insert special image tokens in the input_ids + final_inputs_embeds = self._merge_input_ids_with_image_features(image_features, inputs_embeds, input_ids) + return final_inputs_embeds + + def _merge_input_ids_with_image_features(self, image_features, inputs_embeds, input_ids): + image_token_index = self.config.image_token_index + num_images, num_image_patches, embed_dim = image_features.shape + + # Positions of tokens in input_ids, assuming batch size is 1 + image_positions = np.where(input_ids[0] == image_token_index)[0].tolist() + + if len(image_positions) != num_images: + raise ValueError(f"The number of image tokens ({len(image_positions)}) does not " + f" match the number of image inputs ({num_images}).") + + text_segments = [] + start_idx = 0 + + for position in image_positions: + text_segments.append(inputs_embeds[:, start_idx:position]) + start_idx = position + 1 + + image_embeddings = mx.split(image_features, image_features.shape[0]) + final_embeddings = [v for p in zip(text_segments, image_embeddings) for v in p] + final_embeddings += [inputs_embeds[:, start_idx:]] + + # Create a final embedding of shape + # (1, num_image_patches*num_images + sequence_len, embed_dim) + return mx.concatenate(final_embeddings, axis=1) + + def __call__(self, input_ids: mx.array, pixel_values: mx.array = None, cache=None): + input_embddings = None + if pixel_values is not None: + input_embddings = self.get_input_embeddings(input_ids, pixel_values) + logits = self.language_model(input_ids, cache=cache, inputs_embeds=input_embddings) + return logits + + def sanitize(self, weights): + if self.config.vision_config: + weights = self.vision_tower.sanitize(weights) + else: + weights = {k: v for k, v in weights.items() if not k.startswith(('vision_tower', 'multi_modal_projector', 'vision_feature_layer', 'vision_feature_select_strategy'))} + weights = self.language_model.sanitize(weights) + return weights + + @property + def layers(self): + return self.language_model.model.layers + + @property + def head_dim(self): + return (self.language_model.model.head_dim or self.language_model.model.hidden_size // self.language_model.model.num_attention_heads) + + @property + def n_kv_heads(self): + return self.language_model.model.num_key_value_heads diff --git a/build/lib/exo/inference/mlx/sharded_inference_engine.py b/build/lib/exo/inference/mlx/sharded_inference_engine.py new file mode 100644 index 000000000..40cabfeb6 --- /dev/null +++ b/build/lib/exo/inference/mlx/sharded_inference_engine.py @@ -0,0 +1,40 @@ +import numpy as np +import mlx.core as mx +from ..inference_engine import InferenceEngine +from .sharded_model import StatefulShardedModel +from .sharded_utils import load_shard, get_image_from_str +from ..shard import Shard +from typing import Optional +from exo.download.shard_download import ShardDownloader + + +class MLXDynamicShardInferenceEngine(InferenceEngine): + def __init__(self, shard_downloader: ShardDownloader): + self.shard = None + self.shard_downloader = shard_downloader + + async def infer_prompt(self, request_id: str, shard: Shard, prompt: str, image_str: Optional[str] = None, inference_state: Optional[str] = None) -> (np.ndarray, str, bool): + await self.ensure_shard(shard) + if image_str: + image = await get_image_from_str(image_str) + inputs = self.tokenizer(prompt, image, return_tensors="np") + pixel_values = mx.array(inputs["pixel_values"]) + input_ids = mx.array(inputs["input_ids"]) + output_data: np.ndarray = np.array(self.stateful_sharded_model.step(request_id, input_ids, pixel_values)) + else: + output_data: np.ndarray = np.array(self.stateful_sharded_model.step(request_id, mx.array(self.tokenizer.encode(prompt)))) + return output_data, "", output_data.size == 1 and output_data.item() == self.tokenizer.eos_token_id + + async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarray, inference_state: Optional[str] = None) -> (np.ndarray, str, bool): + await self.ensure_shard(shard) + output_data: np.ndarray = np.array(self.stateful_sharded_model.step(request_id, mx.array(input_data))) + return output_data, "", output_data.size == 1 and output_data.item() == self.tokenizer.eos_token_id + + async def ensure_shard(self, shard: Shard): + if self.shard == shard: + return + + model_path = await self.shard_downloader.ensure_shard(shard) + model_shard, self.tokenizer = await load_shard(model_path, shard) + self.stateful_sharded_model = StatefulShardedModel(shard, model_shard) + self.shard = shard diff --git a/build/lib/exo/inference/mlx/sharded_model.py b/build/lib/exo/inference/mlx/sharded_model.py new file mode 100644 index 000000000..c4570fbf6 --- /dev/null +++ b/build/lib/exo/inference/mlx/sharded_model.py @@ -0,0 +1,86 @@ +from typing import Dict, Generator, Optional, Tuple +from collections import OrderedDict + +import mlx.core as mx +import mlx.nn as nn +from mlx_lm.models.base import KVCache, RotatingKVCache +from mlx_lm.sample_utils import top_p_sampling + +from ..shard import Shard + + +class StatefulShardedModel: + def __init__(self, shard: Shard, model: nn.Module, max_kv_size: int = 1024, max_caches: int = 2): + self.shard = shard + self.model = model + self.max_kv_size = max_kv_size + self.max_caches = max_caches + self.caches = OrderedDict() + + def step( + self, + request_id: str, + x, + pixel_values=None, + temp: float = 0.0, + top_p: float = 1.0, + logit_bias: Optional[Dict[int, float]] = None, + ) -> Generator[Tuple[mx.array, mx.array], None, None]: + def sample(logits: mx.array) -> Tuple[mx.array, float]: + if logit_bias: + indices = mx.array(list(logit_bias.keys())) + values = mx.array(list(logit_bias.values())) + logits[:, indices] += values + + if temp == 0: + token = mx.argmax(logits, axis=-1) + else: + if top_p > 0 and top_p < 1.0: + token = top_p_sampling(logits, top_p, temp) + else: + token = mx.random.categorical(logits*(1/temp)) + + return token + + y = x + + if request_id not in self.caches: + self.init_cache(request_id) + else: + self.caches.move_to_end(request_id) + + cache = self.caches[request_id] + + if pixel_values is None: + output = self.model(y[None] if self.shard.is_first_layer() else y, cache=cache) + else: + output = self.model(y, pixel_values=pixel_values, cache=cache) + + if self.shard.is_last_layer(): + logits = output[:, -1, :] + y = sample(logits) + return y + else: + return output + + def __call__( + self, + request_id: str, + x, + temp: float = 0.0, + top_p: float = 1.0, + logit_bias: Optional[Dict[int, float]] = None, + ) -> Generator[Tuple[mx.array, mx.array], None, None]: + return self.step(request_id, x, temp=temp, top_p=top_p, logit_bias=logit_bias) + + def init_cache(self, request_id: str): + kv_heads = ([self.model.n_kv_heads]*len(self.model.layers) if isinstance(self.model.n_kv_heads, int) else self.model.n_kv_heads) + if self.max_kv_size is not None: + cache = [RotatingKVCache(self.model.head_dim, n, max_size=self.max_kv_size, keep=4) for n in kv_heads] + else: + cache = [KVCache(self.model.head_dim, n) for n in kv_heads] + + if len(self.caches) >= self.max_caches: + self.caches.popitem(last=False) + + self.caches[request_id] = cache diff --git a/build/lib/exo/inference/mlx/sharded_utils.py b/build/lib/exo/inference/mlx/sharded_utils.py new file mode 100644 index 000000000..7fa38eaa6 --- /dev/null +++ b/build/lib/exo/inference/mlx/sharded_utils.py @@ -0,0 +1,207 @@ +# Adapted from https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/utils.py + +import glob +import importlib +import json +import logging +import asyncio +import aiohttp +from functools import partial +from pathlib import Path +from typing import Optional, Tuple, Union, List, Callable +from PIL import Image +from io import BytesIO +import base64 + +import mlx.core as mx +import mlx.nn as nn +from transformers import AutoProcessor + +from mlx_lm.tokenizer_utils import load_tokenizer, TokenizerWrapper +from mlx_lm.tuner.utils import apply_lora_layers + +from exo import DEBUG +from ..shard import Shard + + +class ModelNotFoundError(Exception): + def __init__(self, message): + self.message = message + super().__init__(self.message) + + +MODEL_REMAPPING = { + "mistral": "llama", # mistral is compatible with llama + "phi-msft": "phixtral", +} + + +def _get_classes(config: dict): + """ + Retrieve the model and model args classes based on the configuration. + + Args: + config (dict): The model configuration. + + Returns: + A tuple containing the Model class and the ModelArgs class. + """ + model_type = config["model_type"] + model_type = MODEL_REMAPPING.get(model_type, model_type) + try: + arch = importlib.import_module(f"exo.inference.mlx.models.{model_type}") + except ImportError: + msg = f"Model type {model_type} not supported." + logging.error(msg) + raise ValueError(msg) + + return arch.Model, arch.ModelArgs + + +def load_config(model_path: Path) -> dict: + try: + with open(model_path/"config.json", "r") as f: + config = json.load(f) + except FileNotFoundError: + logging.error(f"Config file not found in {model_path}") + raise + return config + + +def load_model_shard( + model_path: Path, + shard: Shard, + lazy: bool = False, + model_config: dict = {}, +) -> nn.Module: + """ + Load and initialize the model from a given path. + + Args: + model_path (Path): The path to load the model from. + lazy (bool): If False eval the model parameters to make sure they are + loaded in memory before returning, otherwise they will be loaded + when needed. Default: ``False`` + model_config(dict, optional): Configuration parameters for the model. + Defaults to an empty dictionary. + + Returns: + nn.Module: The loaded and initialized model. + + Raises: + FileNotFoundError: If the weight files (.safetensors) are not found. + ValueError: If the model class or args class are not found or cannot be instantiated. + """ + config = load_config(model_path) + config.update(model_config) + + # TODO hack + config["shard"] = { + "model_id": model_path.name, + "start_layer": shard.start_layer, + "end_layer": shard.end_layer, + "n_layers": shard.n_layers, + } + + weight_files = glob.glob(str(model_path/"model*.safetensors")) + + if not weight_files: + # Try weight for back-compat + weight_files = glob.glob(str(model_path/"weight*.safetensors")) + + if not weight_files: + logging.error(f"No safetensors found in {model_path}") + raise FileNotFoundError(f"No safetensors found in {model_path}") + + weights = {} + for wf in sorted(weight_files): + if DEBUG >= 8: + layer_nums = set() + for k in mx.load(wf): + if k.startswith("model.layers."): + layer_num = int(k.split(".")[2]) + layer_nums.add(layer_num) + if k.startswith("language_model.model.layers."): + layer_num = int(k.split(".")[3]) + layer_nums.add(layer_num) + print(f"\"{wf.split('/')[-1]}\": {sorted(layer_nums)},") + + weights.update(mx.load(wf)) + + model_class, model_args_class = _get_classes(config=config) + + model_args = model_args_class.from_dict(config) + model = model_class(model_args) + + if hasattr(model, "sanitize"): + weights = model.sanitize(weights) + + if (quantization := config.get("quantization", None)) is not None: + # Handle legacy models which may not have everything quantized + def class_predicate(p, m): + if not hasattr(m, "to_quantized"): + return False + return f"{p}.scales" in weights + + nn.quantize( + model, + **quantization, + class_predicate=class_predicate, + ) + + model.load_weights(list(weights.items()), strict=True) + + if not lazy: + mx.eval(model.parameters()) + + model.eval() + return model + + +async def load_shard( + model_path: str, + shard: Shard, + tokenizer_config={}, + model_config={}, + adapter_path: Optional[str] = None, + lazy: bool = False, +) -> Tuple[nn.Module, TokenizerWrapper]: + model = load_model_shard(model_path, shard, lazy, model_config) + if adapter_path is not None: + model = apply_lora_layers(model, adapter_path) + model.eval() + + # TODO: figure out a generic solution + if model.model_type == "llava": + processor = AutoProcessor.from_pretrained(model_path) + processor.eos_token_id = processor.tokenizer.eos_token_id + processor.encode = processor.tokenizer.encode + return model, processor + else: + tokenizer = load_tokenizer(model_path, tokenizer_config) + return model, tokenizer + + +async def get_image_from_str(_image_str: str): + image_str = _image_str.strip() + + if image_str.startswith("http"): + async with aiohttp.ClientSession() as session: + async with session.get(image_str, timeout=10) as response: + content = await response.read() + return Image.open(BytesIO(content)).convert("RGB") + elif image_str.startswith("data:image/"): + # Extract the image format and base64 data + format_prefix, base64_data = image_str.split(";base64,") + image_format = format_prefix.split("/")[1].lower() + if DEBUG >= 2: print(f"{image_str=} {image_format=}") + imgdata = base64.b64decode(base64_data) + img = Image.open(BytesIO(imgdata)) + + # Convert to RGB if not already + if img.mode != "RGB": + img = img.convert("RGB") + + return img + else: + raise ValueError("Invalid image_str format. Must be a URL or a base64 encoded image.") diff --git a/build/lib/exo/inference/mlx/test_sharded_llama.py b/build/lib/exo/inference/mlx/test_sharded_llama.py new file mode 100644 index 000000000..1c48b936c --- /dev/null +++ b/build/lib/exo/inference/mlx/test_sharded_llama.py @@ -0,0 +1,40 @@ +import mlx.core as mx +from exo.inference.mlx.sharded_model import StatefulShardedModel +from exo.inference.mlx.sharded_utils import load_shard +from exo.inference.shard import Shard + +# 79, 80 for Llama-3-70B +shard_full = Shard("llama", 0, 31, 32) +shard1 = Shard("llama", 0, 12, 32) +shard2 = Shard("llama", 13, 31, 32) + +full_model_shard, full_tokenizer = load_shard("mlx-community/Meta-Llama-3-8B-Instruct-4bit", shard=shard_full) +model_shard1, tokenizer1 = load_shard("mlx-community/Meta-Llama-3-8B-Instruct-4bit", shard=shard1) +model_shard2, tokenizer2 = load_shard("mlx-community/Meta-Llama-3-8B-Instruct-4bit", shard=shard2) + +full = StatefulShardedModel(shard_full, full_model_shard) +m1 = StatefulShardedModel(shard1, model_shard1) +m2 = StatefulShardedModel(shard2, model_shard2) + +prompt = "write a beautiful haiku about a utopia where people own their AI with edge intelligence:" +prompt_tokens = mx.array(full_tokenizer.encode(prompt)) +max_tokens = 50 + +resp = prompt_tokens +full_generated_tokens = [] +for _ in range(max_tokens): + resp = full.step(resp) + full_generated_tokens.append(resp.item()) + +print("full response: ", full_tokenizer.decode(full_generated_tokens)) + +sharded_generated_tokens = [] +sharded_resp = prompt_tokens +for _ in range(max_tokens): + resp1 = m1.step(sharded_resp) + sharded_resp = m2.step(resp1) + sharded_generated_tokens.append(sharded_resp.item()) + +print("sharded response: ", tokenizer1.decode(sharded_generated_tokens)) + +assert tokenizer1.decode(full_generated_tokens) == tokenizer1.decode(sharded_generated_tokens) diff --git a/build/lib/exo/inference/mlx/test_sharded_llava.py b/build/lib/exo/inference/mlx/test_sharded_llava.py new file mode 100644 index 000000000..958a5acc8 --- /dev/null +++ b/build/lib/exo/inference/mlx/test_sharded_llava.py @@ -0,0 +1,64 @@ +import codecs +import asyncio +import requests +from PIL import Image +from io import BytesIO + +import mlx.core as mx +from mlx_lm.models.base import KVCache + +from exo.inference.mlx.sharded_model import StatefulShardedModel +from exo.inference.mlx.sharded_utils import load_shard +from exo.inference.shard import Shard + +shard_full = Shard("llava", 0, 31, 32) +shard1 = Shard("llava", 0, 12, 32) +shard2 = Shard("llava", 13, 31, 32) + +model_path = "llava-hf/llava-1.5-7b-hf" + +full_model_shard, full_processor = asyncio.run(load_shard(model_path, shard=shard_full)) +model_shard1, processor1 = asyncio.run(load_shard(model_path, shard=shard1)) +model_shard2, processor2 = asyncio.run(load_shard(model_path, shard=shard2)) + +full = StatefulShardedModel(shard_full, full_model_shard) +m1 = StatefulShardedModel(shard1, model_shard1) +m2 = StatefulShardedModel(shard2, model_shard2) + +PROMPT = "USER: \nWhat are these?\nASSISTANT:" +IMAGE_FILE = "http://images.cocodataset.org/val2017/000000039769.jpg" +response = requests.get(IMAGE_FILE) +img = Image.open(BytesIO(response.content)) +prompt = codecs.decode(PROMPT, "unicode_escape") +inputs = full_processor(prompt, img, return_tensors="np") +pixel_values = mx.array(inputs["pixel_values"]) +input_ids = mx.array(inputs["input_ids"]) + +print(prompt) +y = full.step("full", input_ids, pixel_values, temp=0) +full_generated_tokens = [y.item()] + +for _ in range(13): + y = full.step("full", y, temp=0) + full_generated_tokens.append(y.item()) + +full_response = full_processor.tokenizer.decode(full_generated_tokens) +print("full response:", full_response) + +inputs = processor1(prompt, img, return_tensors="np") +pixel_values = mx.array(inputs["pixel_values"]) +input_ids = mx.array(inputs["input_ids"]) + +y = m1.step("shard", input_ids, pixel_values, temp=0) +y = m2.step("shard", y, temp=0) +full_generated_tokens = [y.item()] + +for _ in range(13): + y = m1.step("shard", y, temp=0) + y = m2.step("shard", y, temp=0) + full_generated_tokens.append(y.item()) + +sharded_response = processor2.tokenizer.decode(full_generated_tokens) +print("sharded response:", sharded_response) + +assert full_response == sharded_response diff --git a/build/lib/exo/inference/mlx/test_sharded_model.py b/build/lib/exo/inference/mlx/test_sharded_model.py new file mode 100644 index 000000000..c9743d078 --- /dev/null +++ b/build/lib/exo/inference/mlx/test_sharded_model.py @@ -0,0 +1,52 @@ +from exo.inference.shard import Shard +import mlx.core as mx +import mlx.nn as nn +from typing import Optional +import numpy as np + + +class DummyModel(nn.Module): + def __init__(self, shard: Optional[Shard] = None): + self.shard = shard + self.layers = [ + nn.Linear(8, 128), + nn.Linear(128, 128), + nn.Linear(128, 128), + nn.Linear(128, 128), + nn.Linear(128, 8), + ] + + self.n_kv_heads = 4 + self.head_dim = 4 + + def __call__(self, x, cache=None): + if self.shard: + for layer in self.layers[self.shard.start_layer:self.shard.end_layer + 1]: + x = layer(x) + if self.shard.is_last_layer(): + x = x.reshape((1, 2, 4)) + else: + for layer in self.layers: + x = layer(x) + x = x.reshape((1, 2, 4)) + + return x + + +model = DummyModel() +model.save_weights("./test_weights.npz") +n_layers = 5 +shard1 = Shard("test", 0, n_layers // 2, n_layers) +sharded_model1 = DummyModel(shard1) +shard2 = Shard("test", n_layers//2 + 1, n_layers - 1, n_layers) +sharded_model2 = DummyModel(shard2) + +model.load_weights("./test_weights.npz") +sharded_model1.load_weights("./test_weights.npz") +sharded_model2.load_weights("./test_weights.npz") + +fullresp = model(mx.array([1, 2, 3, 4, 5, 6, 7, 8])) +resp1 = sharded_model1(mx.array([1, 2, 3, 4, 5, 6, 7, 8])) +resp2 = sharded_model2(resp1) + +assert np.all(np.array(fullresp) == np.array(resp2)) diff --git a/build/lib/exo/inference/pytorch/__init__.py b/build/lib/exo/inference/pytorch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/build/lib/exo/inference/pytorch/helpers.py b/build/lib/exo/inference/pytorch/helpers.py new file mode 100644 index 000000000..addea2db7 --- /dev/null +++ b/build/lib/exo/inference/pytorch/helpers.py @@ -0,0 +1,24 @@ +# Helper functions for pytorch inference +# Some code coming from tinygrad but written towards pytorch + +import asyncio +import aiohttp +from tqdm import tqdm +from pathlib import Path +from typing import List + +async def fetch_file_async(session, url: str, output_path: Path): + async with session.get(url) as response: + response.raise_for_status() + with open(output_path, 'wb') as f: + async for chunk in response.content.iter_chunked(8192): + f.write(chunk) + +async def download_files(urls: List[str], output_paths: List[Path]): + async with aiohttp.ClientSession() as session: + tasks = [] + for url, output_path in zip(urls, output_paths): + tasks.append(fetch_file_async(session, url, output_path)) + + for f in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Downloading files"): + await f diff --git a/build/lib/exo/inference/pytorch/inference.py b/build/lib/exo/inference/pytorch/inference.py new file mode 100644 index 000000000..ba834eb67 --- /dev/null +++ b/build/lib/exo/inference/pytorch/inference.py @@ -0,0 +1,211 @@ +# experimental, based off of tinygrad/inference.py +import numpy as np +import torch +import numpy as np +import json +from typing import Optional, Tuple +from exo.inference.shard import Shard +from exo.inference.inference_engine import InferenceEngine +from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel +from exo.api.chatgpt_api import resolve_tokenizer +from exo.helpers import DEBUG +from transformers import DynamicCache +from accelerate import disk_offload + +class PyTorchDynamicShardInferenceEngine(InferenceEngine): + """ + PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. + """ + + def __init__(self, shard): + """ + Initialize the inference engine. + + Args: + debug (bool): If True, enables debug logging. Defaults to False. + """ + self.shard = shard + self.model = None + self.tokenizer = None + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + async def infer_prompt( + self, + request_id: str, + shard: Optional[Shard] = None, + prompt: str = "", + image_str: Optional[str] = None, + inference_state: Optional[str] = None + ) -> Tuple[np.ndarray, str, bool]: + + await self.ensure_shard(shard) + + # need to make this so inference_state is not a string + # cant use it with dynamic cache + + tokens = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device) + tokens = self.model.embed_tokens(tokens) + current_kvs = None + + if DEBUG >= 4: + print("infer_prompt called") + print(f"tokens: {tokens}\n") + print(f"layer_count: {self.shard.get_layer_count()}") + print(f"is_first_layer: {self.shard.is_first_layer()}") + print(f"is_last_layer: {self.shard.is_last_layer()}") + + # convert inference_state or cache from json to DynamicCache + past_kv = DynamicCache() + if inference_state != None: + cache_dict = json.loads(inference_state) + past_kv.key_cache = [torch.tensor(data).to(self.device) for data in cache_dict['key_cache']] + past_kv.value_cache = [torch.tensor(data).to(self.device) for data in cache_dict['value_cache']] + + output_data, current_kvs = self.model.forward( + tokens, + past_kv + ) + + is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] + + if DEBUG >= 4: + print(f"output_data: {output_data}\n") + print(f"output_data.size {output_data.size}\n") + + print(f"finished: {is_finished}") + print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") + print(f"output_data[-1] {output_data[-1]}") + + if output_data.size == 1: + print(f"size 1 output_data.item() {output_data.item()}") + print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") + + cache_dict = { + 'key_cache': [tensor.tolist() for tensor in current_kvs.key_cache], + 'value_cache': [tensor.tolist() for tensor in current_kvs.value_cache] + } + + return ( + output_data, + json.dumps(cache_dict), + is_finished + ) + + async def infer_tensor( + self, + request_id: str, + shard: Shard, + input_data: np.ndarray, + inference_state: Optional[str] = None + ) -> Tuple[np.ndarray, str, bool]: + + await self.ensure_shard(shard) + + current_kvs = None + + if input_data.size == 1: + in_tensor = torch.from_numpy( + input_data, + ).unsqueeze(0).long().to(self.device) + else: + in_tensor = torch.from_numpy( + input_data + ).long().to(self.device) + + in_tensor = self.model.embed_tokens(in_tensor) + + if DEBUG >= 4: + print("infer_tensor called") + print(f"input_data: {input_data}") + print(f"input_data.size: {input_data.size}") + print(f"input_tensor: {in_tensor}\n") + print(f"shard: {self.shard}") + print(f"layer_count: {self.shard.get_layer_count()}") + print(f"is_first_layer: {self.shard.is_first_layer()}") + print(f"is_last_layer: {self.shard.is_last_layer()}") + + # convert inference_state or cache from json to DynamicCache + past_kv = DynamicCache() + if inference_state != None: + try: + cache_dict = json.loads(inference_state) + past_kv.key_cache = [torch.tensor(data).to(self.device) for data in cache_dict['key_cache']] + past_kv.value_cache = [torch.tensor(data).to(self.device) for data in cache_dict['value_cache']] + + if DEBUG >= 4: + print("Loaded past_kv from JSON") + print(f"past_kv: {past_kv}") + print(f"past_kv.key_cache len: {len(past_kv.key_cache)}") + print(f"past_kv.value_cache len: {len(past_kv.value_cache)}") + except json.JSONDecodeError: + print(f"ERROR DECODING INFERENCE STATE") + + output_data, current_kvs = self.model.forward( + in_tensor, + past_kv + ) + + is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] + + if DEBUG >= 4: + print(f"in_tensor: {in_tensor}\n") + print(f"output_data: {output_data}\n") + print(f"output_data.size {output_data.size}\n") + print(f"finished: {is_finished}") + print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") + print(f"output_data[-1] {output_data[-1]}") + + if output_data.size == 1: + print(f"size 1 output_data.item() {output_data.item()}") + print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") + + + cache_dict = { + 'key_cache': [tensor.tolist() for tensor in current_kvs.key_cache], + 'value_cache': [tensor.tolist() for tensor in current_kvs.value_cache] + } + + return ( + output_data, + json.dumps(cache_dict), + is_finished + ) + + async def ensure_shard(self, shard: Optional[Shard]): + """ + Ensure the model shard is loaded and ready for inference. + + Args: + shard (Optional[Shard]): Shard information for the model. + """ + # if self.shard == shard: + # return + + if DEBUG >= 4: + print(f"Loading new shard: {shard}") + + if self.model: + if DEBUG >= 2: + print(f"\nCLEARING MODEL {shard.model_id}\n") + print(f"before allocated: {torch.cuda.memory_allocated()}") + print(f"before reserved: {torch.cuda.memory_reserved()}") + + # delete model and free up memory to reload + # self.model.cuda() + # disk_offload(model=self.model, offload_dir="./.offload") + import gc + + del self.model + gc.collect() + torch.cuda.empty_cache() + + if DEBUG >= 2: + print(f"after allocated: {torch.cuda.memory_allocated()}") + print(f"after reserved: {torch.cuda.memory_reserved()}") + + self.shard = shard + self.tokenizer = await resolve_tokenizer(shard.model_id) + self.model = ShardedHuggingFaceModel(shard, self.tokenizer) + + if DEBUG >= 4: + print(f"Shard loaded successfully: {shard}") \ No newline at end of file diff --git a/build/lib/exo/inference/pytorch/model/__init__.py b/build/lib/exo/inference/pytorch/model/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/build/lib/exo/inference/pytorch/model/hf.py b/build/lib/exo/inference/pytorch/model/hf.py new file mode 100644 index 000000000..aa2873c56 --- /dev/null +++ b/build/lib/exo/inference/pytorch/model/hf.py @@ -0,0 +1,155 @@ +import torch +import numpy as np +from transformers import AutoModelForCausalLM, DynamicCache, Cache +from exo.inference.shard import Shard +from exo.helpers import DEBUG +from typing import Tuple, Optional, Union, List +from exo.inference.pytorch.model.utils import sample_logits + +TOP_P = 0.75 #0.95 +TOP_K = 20 +TEMP = 0.8 + +class ShardedHuggingFaceModel(torch.nn.Module): + def __init__(self, shard: Shard, tokenizer: any): + super(ShardedHuggingFaceModel, self).__init__() + + if torch.cuda.is_available(): + self.device = torch.device("cuda") + else: + self.device = torch.device("cpu") + + self.shard = shard + self.tokenizer = tokenizer + + # Load the model + try: + self.llm_model = AutoModelForCausalLM.from_pretrained( + shard.model_id, + torch_dtype=torch.float32, + device_map="auto", + # offload_buffers=True + ) + + # disk_offload(model=self.llm_model, offload_dir="./.offload") + + self.base_model = self.llm_model.model + except Exception as err: + print(f"Error loading model: {err}") + raise + + if DEBUG >= 2: + print(f"\nShardedHuggingFaceModel init with shard {shard}") + print(f"self.llm_model: {self.llm_model}") + print(f"self.base_model: {self.base_model}") + + if DEBUG >= 2: + print(f"full_model.model layer: {len(self.base_model.layers)}") + + # Embeddings and final layer norm + # used for doing what forward LlamaModel does in transformers + self.norm = self.base_model.norm + self.lm_head = self.llm_model.lm_head + self.embed_tokens = self.base_model.embed_tokens + + def forward( + self, + input_ids: torch.tensor, + past_kvs: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + ) -> Tuple[np.ndarray, any]: + """ + Forward through layers using the base model + + Args: + input_ids: tensor input + past_kvs: past key value stores for cache + use_cache: use cache + + Returns: + hidden_states: numpy of states between layers + or logits: numpy of normalization and linearization of last hidden state + past_kvs: DynamicCache of past key values if use_cache is true + + Ref: + https://github.com/huggingface/transformers/blob/v4.44.2/src/transformers/models/qwen2/modeling_qwen2.py#L804 + https://github.com/huggingface/transformers/blob/v4.44.2/src/transformers/models/llama/modeling_llama.py#L887 + """ + if DEBUG >= 4: + print("forward called") + print(f"input_ids: {input_ids}\n") + print(f"layer_count: {self.shard.get_layer_count()}") + print(f"is_first_layer: {self.shard.is_first_layer()}") + print(f"is_last_layer: {self.shard.is_last_layer()}") + + past_kvs = DynamicCache.from_legacy_cache(past_kvs) + past_seen_tokens = past_kvs.get_seq_length() if past_kvs is not None else 0 + + cache_position = torch.arange( + past_seen_tokens, + past_seen_tokens + input_ids.shape[1], + device=input_ids.device + ).to(self.device) + + position_ids = cache_position.unsqueeze(0).to(self.device) + + try: + position_embeddings = self.base_model.rotary_emb( + input_ids, + position_ids + ) + except Exception as err: + print(f"rotary_emb not found in base_model") + position_embeddings = None + + # progress through layers + for i in range(self.shard.start_layer, self.shard.end_layer + 1): + decoder_layer = self.base_model.layers[i] + + if DEBUG >= 4: + print("Going through layer") + print(f"{decoder_layer}") + print("input_ids") + print(f"{input_ids}") + + layer_outputs = decoder_layer( + input_ids, + position_ids=position_ids if not position_embeddings else None, + position_embeddings=position_embeddings, + past_key_value=past_kvs, + use_cache=True, + cache_position=cache_position, + ) + + hidden_states = layer_outputs[0] + next_kvs = layer_outputs[1] + + if DEBUG >= 3: + print(f"layer_outputs {layer_outputs}") + + if self.shard.is_last_layer(): + hs_norm = self.norm(hidden_states) + hs_lm_head = self.llm_model.lm_head(hs_norm).float() + + # Use the sampling function with default settings + with torch.no_grad(): + output_token = sample_logits( + hs_lm_head[:, -1, :], + TEMP, + TOP_P, + TOP_K + ).numpy(force=True).flatten() + + if DEBUG >= 2: + print(f"hs_norm: {hs_norm}") + print(f"hs_lm_head: {hs_lm_head}") + print(f"output_token: {output_token}") + + return (output_token, next_kvs) + + with torch.no_grad(): + out_hidden_states = hidden_states.numpy(force=True) + + return ( + out_hidden_states, + next_kvs + ) \ No newline at end of file diff --git a/build/lib/exo/inference/pytorch/model/utils.py b/build/lib/exo/inference/pytorch/model/utils.py new file mode 100644 index 000000000..df84b3977 --- /dev/null +++ b/build/lib/exo/inference/pytorch/model/utils.py @@ -0,0 +1,83 @@ +import torch +from torch.nn import functional as F + +def top_p_sampling(scaled_logits: torch.Tensor, top_p: float) -> torch.Tensor: + """ + Apply top-p (nucleus) sampling to logits. + + Args: + scaled_logits (torch.Tensor): The scaled logits from the model's output. + top_p (float): The cumulative probability threshold for top-p filtering. + temp (float): Temperature parameter for softmax distribution reshaping. + + Returns: + torch.Tensor: Token selected based on the top-p criterion. + + Ref: + https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/sample_utils.py#L67C1-L97C17 + """ + scaled_logits = torch.where(torch.isnan(scaled_logits), torch.zeros_like(scaled_logits), scaled_logits) + scaled_logits = torch.where(torch.isinf(scaled_logits), torch.full_like(scaled_logits, 1e6), scaled_logits) + + probs = torch.softmax(scaled_logits, dim=-1) + + sorted_probs, sorted_indices = torch.sort( + probs, + descending=True, + dim=-1 + ) + + cumulative_probs = torch.cumsum(sorted_probs, dim=-1) + mask = cumulative_probs > top_p + + top_probs = torch.where(mask, torch.zeros_like(sorted_probs), sorted_probs) + sum_probs = top_probs.sum(dim=-1, keepdim=True) + top_probs = torch.where(sum_probs > 0, top_probs / sum_probs, torch.ones_like(top_probs) / top_probs.size(-1)) + + if torch.isnan(top_probs).any() or torch.isinf(top_probs).any(): + print("Warning: Top probabilities contain NaN or Inf values after normalization") + top_probs = torch.where(torch.isnan(top_probs) | torch.isinf(top_probs), + 1.0 / top_probs.size(-1), + top_probs) + + sorted_token = torch.multinomial(top_probs, num_samples=1) + + token = sorted_indices.gather(-1, sorted_token) + + return token.squeeze(-1) + +def sample_logits(logits, temp, top_p, top_k): + """ + Sample tokens from logits using temperature, top-k, and top-p (nucleus) sampling. + + Args: + logits (torch.Tensor): The logits distribution to sample from. + temp (float): temp for scaling logits. + top_p (float): The cumulative probability threshold for nucleus sampling. + + Returns: + torch.Tensor: The selected token index. + """ + + # Ensure logits are float + logits = logits.float() + + # If temp is very low, just use argmax + if temp == 0: + return logits.argmax(dim=-1) + + scaled_logits = logits/temp + + # top k + if top_k > 0: + top_values, top_indices = torch.topk(scaled_logits, top_k, dim=-1) + scaled_logits = torch.zeros_like(logits).scatter_(-1, top_indices, top_values) + + # Top-p sampling + if 0 < top_p < 1.0: + return top_p_sampling(scaled_logits, top_p) + else: + # random distribution selection + probs = torch.softmax(scaled_logits, dim=-1) + rand_sample = torch.distributions.Categorical(probs) + return rand_sample.sample().squeeze() \ No newline at end of file diff --git a/build/lib/exo/inference/pytorch/test_inference_engine.py b/build/lib/exo/inference/pytorch/test_inference_engine.py new file mode 100644 index 000000000..bacf53bcc --- /dev/null +++ b/build/lib/exo/inference/pytorch/test_inference_engine.py @@ -0,0 +1,141 @@ + +import asyncio +from exo.inference.shard import Shard +from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine +from exo.download.hf.hf_shard_download import HFShardDownloader +from exo.inference.inference_engine import InferenceEngine +from exo.inference.shard import Shard +from exo.helpers import DEBUG +import os +import numpy as np + +async def test_inference_engine(inference_engine_1: InferenceEngine, inference_engine_2: InferenceEngine, model_id: str, n_layers: int): + # prompt = "Why is the sky blue?" + prompt = "In a single word only, what is the last name of the current president of the USA?" + + # shard = Shard( + # model_id=model_id, + # start_layer=0, + # end_layer=n_layers-1, + # n_layers=n_layers + # ) + + # resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( + # "A", + # shard=shard, + # prompt=prompt + # ) + + # print(f"resp_full: {resp_full}") + + # next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( + # "A", + # shard=shard, + # input_data=resp_full, + # inference_state=inference_state_full, + # ) + + # print(f"next_resp_full: {next_resp_full}") + + pp = int(n_layers/2) + + resp_shard = Shard( + model_id=model_id, + start_layer=0, + end_layer=pp, + n_layers=n_layers + ) + + resp_shard2 = Shard( + model_id=model_id, + start_layer=pp + 1, + end_layer=n_layers-1, + n_layers=n_layers + ) + + resp1, inference_state_1, _ = await inference_engine_1.infer_prompt( + "B", + shard=resp_shard, + prompt=prompt + ) + + resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( + "B", + shard=resp_shard2, + input_data=resp1, + inference_state=inference_state_1, + ) + + # resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( + # "B", + # shard=resp_shard, + # input_data=resp2, + # inference_state=inference_state_2, + # ) + + # resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( + # "B", + # shard=resp_shard2, + # input_data=resp3, + # inference_state=inference_state_3, + # ) + + assert np.array_equal(resp_full, resp2) + assert np.array_equal(next_resp_full, resp4) + +if __name__ == '__main__': + # try: + # print(f"\n\n -------- TEST QWEN2 -------- \n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "Qwen/Qwen2-0.5B-Instruct", + # 24 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") + + # try: + # print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "andrijdavid/Llama3-1B-Base", + # 3 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! LLAMA3-1B-Base TEST FAILED \n{err}\n") + + # try: + # print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "meta-llama/Meta-Llama-3.1-8B", + # 32 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") + + # try: + # print(f"\n\n ------- TEST Chickaboo/ChickaQ-Large -----\n\n") + # asyncio.run(test_inference_engine( + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + # "Chickaboo/ChickaQ-Large", + # 24 + # )) + # except Exception as err: + # print(f"\n\n !!!!!!!!!!! Chickaboo/ChickaQ-Large TEST FAILED \n{err}\n") + + try: + print(f"\n\n --------- TEST ambrosfitz/TinyLlama-1.1B-Chat-yawp -------\n\n") + asyncio.run(test_inference_engine( + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + PyTorchDynamicShardInferenceEngine(HFShardDownloader()), + "ambrosfitz/TinyLlama-1.1B-Chat-yawp", + 22 + )) + except Exception as err: + print(f"\n\n !!!!!!!!!!! ambrosfitz/TinyLlama-1.1B-Chat-yawp TEST FAILED \n{err}\n") + diff --git a/build/lib/exo/inference/shard.py b/build/lib/exo/inference/shard.py new file mode 100644 index 000000000..21b662f63 --- /dev/null +++ b/build/lib/exo/inference/shard.py @@ -0,0 +1,39 @@ +from dataclasses import dataclass, field + + +@dataclass(frozen=True) +class Shard: + model_id: str + start_layer: int + end_layer: int + n_layers: int + + def __hash__(self): + return hash((self.model_id, self.start_layer, self.end_layer, self.n_layers)) + + def is_first_layer(self) -> bool: + return self.start_layer == 0 + + def is_last_layer(self) -> bool: + return self.end_layer == self.n_layers - 1 + + def get_layer_count(self) -> int: + return self.end_layer - self.start_layer + 1 + + def to_dict(self) -> dict: + return { + "model_id": self.model_id, + "start_layer": self.start_layer, + "end_layer": self.end_layer, + "n_layers": self.n_layers, + } + + def from_dict(data: dict) -> 'Shard': + return Shard(**data) + + def overlaps(self, other: 'Shard') -> bool: + return shards_overlap(self, other) + + +def shards_overlap(shard1: Shard, shard2: Shard) -> bool: + return (shard1.model_id == shard2.model_id and max(shard1.start_layer, shard2.start_layer) <= min(shard1.end_layer, shard2.end_layer)) diff --git a/build/lib/exo/inference/test_inference_engine.py b/build/lib/exo/inference/test_inference_engine.py new file mode 100644 index 000000000..e57c608d9 --- /dev/null +++ b/build/lib/exo/inference/test_inference_engine.py @@ -0,0 +1,64 @@ +from exo.inference.mlx.sharded_inference_engine import MLXDynamicShardInferenceEngine +from exo.download.hf.hf_shard_download import HFShardDownloader +from exo.inference.inference_engine import InferenceEngine +from exo.inference.shard import Shard +from exo.helpers import DEBUG +import os +import asyncio +import numpy as np + + +# An inference engine should work the same for any number of Shards, as long as the Shards are continuous. +async def test_inference_engine(inference_engine_1: InferenceEngine, inference_engine_2: InferenceEngine, model_id: str): + prompt = "In a single word only, what is the last name of the current president of the USA?" + resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt("A", shard=Shard(model_id=model_id, start_layer=0, end_layer=31, n_layers=32), prompt=prompt) + next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( + "A", + shard=Shard(model_id=model_id, start_layer=0, end_layer=31, n_layers=32), + input_data=resp_full, + inference_state=inference_state_full, + ) + + pp = 15 + resp1, inference_state_1, _ = await inference_engine_1.infer_prompt("B", shard=Shard(model_id=model_id, start_layer=0, end_layer=pp, n_layers=32), prompt=prompt) + resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( + "B", + shard=Shard(model_id=model_id, start_layer=pp + 1, end_layer=31, n_layers=32), + input_data=resp1, + inference_state=inference_state_1, + ) + resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( + "B", + shard=Shard(model_id=model_id, start_layer=0, end_layer=pp, n_layers=32), + input_data=resp2, + inference_state=inference_state_2, + ) + resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( + "B", + shard=Shard(model_id=model_id, start_layer=pp + 1, end_layer=31, n_layers=32), + input_data=resp3, + inference_state=inference_state_3, + ) + + assert np.array_equal(resp_full, resp2) + assert np.array_equal(next_resp_full, resp4) + + +asyncio.run(test_inference_engine( + MLXDynamicShardInferenceEngine(HFShardDownloader()), + MLXDynamicShardInferenceEngine(HFShardDownloader()), + "mlx-community/Meta-Llama-3-8B-Instruct-4bit", +)) + +if os.getenv("RUN_TINYGRAD", default="0") == "1": + import tinygrad + import os + from exo.inference.tinygrad.inference import TinygradDynamicShardInferenceEngine + tinygrad.helpers.DEBUG.value = int(os.getenv("TINYGRAD_DEBUG", default="0")) + asyncio.run( + test_inference_engine( + TinygradDynamicShardInferenceEngine(HFShardDownloader()), + TinygradDynamicShardInferenceEngine(HFShardDownloader()), + "TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R", + ) + ) diff --git a/build/lib/exo/inference/tokenizers.py b/build/lib/exo/inference/tokenizers.py new file mode 100644 index 000000000..9accd9436 --- /dev/null +++ b/build/lib/exo/inference/tokenizers.py @@ -0,0 +1,45 @@ +import traceback +from aiofiles import os as aios +from transformers import AutoTokenizer, AutoProcessor +from exo.download.hf.hf_helpers import get_local_snapshot_dir +from exo.helpers import DEBUG + +async def resolve_tokenizer(model_id: str): + local_path = await get_local_snapshot_dir(model_id) + if DEBUG >= 2: print(f"Checking if local path exists to load tokenizer from local {local_path=}") + try: + if await aios.path.exists(local_path): + if DEBUG >= 2: print(f"Resolving tokenizer for {model_id=} from {local_path=}") + return await _resolve_tokenizer(local_path) + except: + if DEBUG >= 5: print(f"Local check for {local_path=} failed. Resolving tokenizer for {model_id=} normally...") + if DEBUG >= 5: traceback.print_exc() + return await _resolve_tokenizer(model_id) + +async def _resolve_tokenizer(model_id_or_local_path: str): + try: + if DEBUG >= 4: print(f"Trying AutoProcessor for {model_id_or_local_path}") + if "Mistral-Large" in str(model_id_or_local_path): + use_fast = True + else: + use_fast = False + processor = AutoProcessor.from_pretrained(model_id_or_local_path, use_fast=use_fast) + if not hasattr(processor, 'eos_token_id'): + processor.eos_token_id = getattr(processor, 'tokenizer', getattr(processor, '_tokenizer', processor)).eos_token_id + if not hasattr(processor, 'encode'): + processor.encode = getattr(processor, 'tokenizer', getattr(processor, '_tokenizer', processor)).encode + if not hasattr(processor, 'decode'): + processor.decode = getattr(processor, 'tokenizer', getattr(processor, '_tokenizer', processor)).decode + return processor + except Exception as e: + if DEBUG >= 4: print(f"Failed to load processor for {model_id_or_local_path}. Error: {e}") + if DEBUG >= 4: print(traceback.format_exc()) + + try: + if DEBUG >= 4: print(f"Trying AutoTokenizer for {model_id_or_local_path}") + return AutoTokenizer.from_pretrained(model_id_or_local_path) + except Exception as e: + if DEBUG >= 4: print(f"Failed to load tokenizer for {model_id_or_local_path}. Falling back to tinygrad tokenizer. Error: {e}") + if DEBUG >= 4: print(traceback.format_exc()) + + raise ValueError(f"[TODO] Unsupported model: {model_id_or_local_path}") diff --git a/build/lib/exo/models.py b/build/lib/exo/models.py new file mode 100644 index 000000000..137b881ce --- /dev/null +++ b/build/lib/exo/models.py @@ -0,0 +1,44 @@ +from exo.inference.shard import Shard + +model_base_shards = { + ### llama + "llama-3.1-8b": { + "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32), + "TinygradDynamicShardInferenceEngine": Shard(model_id="mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", start_layer=0, end_layer=0, n_layers=32), + "PyTorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Meta-Llama-3.1-8B", start_layer=0, end_layer=0, n_layers=32), + }, + "llama-3.1-70b": { + "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), + "TinygradDynamicShardInferenceEngine": Shard(model_id="NousResearch/Meta-Llama-3.1-70B", start_layer=0, end_layer=0, n_layers=80), + }, + "llama-3.1-405b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-405B-4bit", start_layer=0, end_layer=0, n_layers=126),}, + "llama-3-8b": { + "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32), + "TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R", start_layer=0, end_layer=0, n_layers=32), + }, + "llama-3-70b": { + "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), + "TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-70B-R", start_layer=0, end_layer=0, n_layers=80), + }, + "llama-3-2B-Base": { + "PyTorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-2B-Base", start_layer=0, end_layer=0, n_layers=6), + }, + "llama-3-1B-Base": { + "PyTorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-1B-Base", start_layer=0, end_layer=0, n_layers=3), + }, + "TinyLlama-1.1B-Chat-yaw": { + "PyTorchDynamicShardInferenceEngine": Shard(model_id="ambrosfitz/TinyLlama-1.1B-Chat-yawp", start_layer=0, end_layer=0, n_layers=22), + }, + ### mistral + "mistral-nemo": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Nemo-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=40),}, + "mistral-large": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Large-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=88),}, + ### deepseek v2 + "deepseek-coder-v2-lite": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx", start_layer=0, end_layer=0, n_layers=27),}, + ### llava + "llava-1.5-7b-hf": {"MLXDynamicShardInferenceEngine": Shard(model_id="llava-hf/llava-1.5-7b-hf", start_layer=0, end_layer=0, n_layers=32),}, + ### qwen + "Qwen2-0.5B-Instruct": { + "PyTorchDynamicShardInferenceEngine": Shard(model_id="Qwen/Qwen2-0.5B-Instruct", start_layer=0, end_layer=0, n_layers=24), + }, + +} diff --git a/build/lib/exo/networking/__init__.py b/build/lib/exo/networking/__init__.py new file mode 100644 index 000000000..44a10a30e --- /dev/null +++ b/build/lib/exo/networking/__init__.py @@ -0,0 +1,5 @@ +from .discovery import Discovery +from .peer_handle import PeerHandle +from .server import Server + +__all__ = ["Discovery", "PeerHandle", "Server"] diff --git a/build/lib/exo/networking/discovery.py b/build/lib/exo/networking/discovery.py new file mode 100644 index 000000000..cdcbfabc1 --- /dev/null +++ b/build/lib/exo/networking/discovery.py @@ -0,0 +1,17 @@ +from abc import ABC, abstractmethod +from typing import List +from .peer_handle import PeerHandle + + +class Discovery(ABC): + @abstractmethod + async def start(self) -> None: + pass + + @abstractmethod + async def stop(self) -> None: + pass + + @abstractmethod + async def discover_peers(self, wait_for_peers: int = 0) -> List[PeerHandle]: + pass diff --git a/build/lib/exo/networking/grpc/__init__.py b/build/lib/exo/networking/grpc/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/build/lib/exo/networking/grpc/grpc_discovery.py b/build/lib/exo/networking/grpc/grpc_discovery.py new file mode 100644 index 000000000..eb08a8385 --- /dev/null +++ b/build/lib/exo/networking/grpc/grpc_discovery.py @@ -0,0 +1,188 @@ +import asyncio +import json +import socket +import time +from typing import List, Dict, Callable, Tuple, Coroutine +from ..discovery import Discovery +from ..peer_handle import PeerHandle +from .grpc_peer_handle import GRPCPeerHandle +from exo.topology.device_capabilities import DeviceCapabilities, device_capabilities, UNKNOWN_DEVICE_CAPABILITIES +from exo import DEBUG_DISCOVERY + + +class ListenProtocol(asyncio.DatagramProtocol): + def __init__(self, on_message: Callable[[bytes, Tuple[str, int]], Coroutine]): + super().__init__() + self.on_message = on_message + self.loop = asyncio.get_event_loop() + + def connection_made(self, transport): + self.transport = transport + + def datagram_received(self, data, addr): + asyncio.create_task(self.on_message(data, addr)) + + +class GRPCDiscovery(Discovery): + def __init__( + self, + node_id: str, + node_port: int, + listen_port: int, + broadcast_port: int = None, + broadcast_interval: int = 1, + device_capabilities: DeviceCapabilities = UNKNOWN_DEVICE_CAPABILITIES, + discovery_timeout: int = 30, + ): + self.node_id = node_id + self.node_port = node_port + self.device_capabilities = device_capabilities + self.listen_port = listen_port + self.broadcast_port = broadcast_port if broadcast_port is not None else listen_port + self.broadcast_interval = broadcast_interval + self.known_peers: Dict[str, Tuple[GRPCPeerHandle, float, float]] = {} + self.broadcast_task = None + self.listen_task = None + self.cleanup_task = None + self.discovery_timeout = discovery_timeout + + async def start(self): + self.device_capabilities = device_capabilities() + self.broadcast_task = asyncio.create_task(self.task_broadcast_presence()) + self.listen_task = asyncio.create_task(self.task_listen_for_peers()) + self.cleanup_task = asyncio.create_task(self.task_cleanup_peers()) + + async def stop(self): + if self.broadcast_task: + self.broadcast_task.cancel() + if self.listen_task: + self.listen_task.cancel() + if self.cleanup_task: + self.cleanup_task.cancel() + if self.broadcast_task or self.listen_task or self.cleanup_task: + await asyncio.gather(self.broadcast_task, self.listen_task, self.cleanup_task, return_exceptions=True) + + async def discover_peers(self, wait_for_peers: int = 0) -> List[PeerHandle]: + if DEBUG_DISCOVERY >= 2: + print("Starting peer discovery process...") + + if wait_for_peers > 0: + while len(self.known_peers) == 0: + if DEBUG_DISCOVERY >= 2: + print("No peers discovered yet, retrying in 1 second...") + await asyncio.sleep(1) # Keep trying to find peers + if DEBUG_DISCOVERY >= 2: + print(f"Discovered first peer: {next(iter(self.known_peers.values()))}") + + grace_period = 5 # seconds + while True: + initial_peer_count = len(self.known_peers) + if DEBUG_DISCOVERY >= 2: + print(f"Current number of known peers: {initial_peer_count}. Waiting {grace_period} seconds to discover more...") + if len(self.known_peers) == initial_peer_count: + if wait_for_peers > 0: + await asyncio.sleep(grace_period) + if DEBUG_DISCOVERY >= 2: + print(f"Waiting additional {wait_for_peers} seconds for more peers.") + wait_for_peers = 0 + else: + if DEBUG_DISCOVERY >= 2: + print("No new peers discovered in the last grace period. Ending discovery process.") + break # No new peers found in the grace period, we are done + + return [peer_handle for peer_handle, _, _ in self.known_peers.values()] + + async def task_broadcast_presence(self): + transport, _ = await asyncio.get_event_loop().create_datagram_endpoint(lambda: asyncio.DatagramProtocol(), local_addr=("0.0.0.0", 0), family=socket.AF_INET) + sock = transport.get_extra_info("socket") + sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) + + message = json.dumps({ + "type": "discovery", + "node_id": self.node_id, + "grpc_port": self.node_port, + "device_capabilities": self.device_capabilities.to_dict(), + }).encode("utf-8") + + while True: + try: + if DEBUG_DISCOVERY >= 3: + print(f"Broadcast presence: {message}") + transport.sendto(message, ("", self.broadcast_port)) + await asyncio.sleep(self.broadcast_interval) + except Exception as e: + print(f"Error in broadcast presence: {e}") + import traceback + + print(traceback.format_exc()) + + async def on_listen_message(self, data, addr): + if not data: + return + + decoded_data = data.decode("utf-8", errors="ignore") + + # Check if the decoded data starts with a valid JSON character + if not (decoded_data.strip() and decoded_data.strip()[0] in "{["): + if DEBUG_DISCOVERY >= 2: + print(f"Received invalid JSON data from {addr}: {decoded_data[:100]}") + return + + try: + decoder = json.JSONDecoder(strict=False) + message = decoder.decode(decoded_data) + except json.JSONDecodeError as e: + if DEBUG_DISCOVERY >= 2: + print(f"Error decoding JSON data from {addr}: {e}") + return + + if DEBUG_DISCOVERY >= 2: + print(f"received from peer {addr}: {message}") + + if message["type"] == "discovery" and message["node_id"] != self.node_id: + peer_id = message["node_id"] + peer_host = addr[0] + peer_port = message["grpc_port"] + device_capabilities = DeviceCapabilities(**message["device_capabilities"]) + if peer_id not in self.known_peers: + self.known_peers[peer_id] = ( + GRPCPeerHandle(peer_id, f"{peer_host}:{peer_port}", device_capabilities), + time.time(), + time.time(), + ) + if DEBUG_DISCOVERY >= 2: + print(f"Discovered new peer {peer_id} at {peer_host}:{peer_port}") + self.known_peers[peer_id] = (self.known_peers[peer_id][0], self.known_peers[peer_id][1], time.time()) + + async def task_listen_for_peers(self): + await asyncio.get_event_loop().create_datagram_endpoint(lambda: ListenProtocol(self.on_listen_message), local_addr=("0.0.0.0", self.listen_port)) + if DEBUG_DISCOVERY >= 2: + print("Started listen task") + + async def task_cleanup_peers(self): + while True: + try: + current_time = time.time() + peers_to_remove = [ + peer_handle.id() for peer_handle, connected_at, last_seen in self.known_peers.values() + if (not await peer_handle.is_connected() and current_time - connected_at > self.discovery_timeout) or current_time - last_seen > self.discovery_timeout + ] + if DEBUG_DISCOVERY >= 2: + print( + "Peer statuses:", + {peer_handle.id(): f"is_connected={await peer_handle.is_connected()}, {connected_at=}, {last_seen=}" + for peer_handle, connected_at, last_seen in self.known_peers.values()}, + ) + if DEBUG_DISCOVERY >= 2 and len(peers_to_remove) > 0: + print(f"Cleaning up peers: {peers_to_remove}") + for peer_id in peers_to_remove: + if peer_id in self.known_peers: + del self.known_peers[peer_id] + if DEBUG_DISCOVERY >= 2: + print(f"Removed peer {peer_id} due to inactivity.") + await asyncio.sleep(self.broadcast_interval) + except Exception as e: + print(f"Error in cleanup peers: {e}") + import traceback + + print(traceback.format_exc()) diff --git a/build/lib/exo/networking/grpc/grpc_peer_handle.py b/build/lib/exo/networking/grpc/grpc_peer_handle.py new file mode 100644 index 000000000..0629dc777 --- /dev/null +++ b/build/lib/exo/networking/grpc/grpc_peer_handle.py @@ -0,0 +1,109 @@ +import grpc +import numpy as np +from typing import Optional, Tuple, List + +# These would be generated from the .proto file +from . import node_service_pb2 +from . import node_service_pb2_grpc + +from ..peer_handle import PeerHandle +from exo.inference.shard import Shard +from exo.topology.topology import Topology +from exo.topology.device_capabilities import DeviceCapabilities + + +class GRPCPeerHandle(PeerHandle): + def __init__(self, _id: str, address: str, device_capabilities: DeviceCapabilities): + self._id = _id + self.address = address + self._device_capabilities = device_capabilities + self.channel = None + self.stub = None + + def id(self) -> str: + return self._id + + def device_capabilities(self) -> DeviceCapabilities: + return self._device_capabilities + + async def connect(self): + self.channel = grpc.aio.insecure_channel(self.address, options=[("grpc.max_metadata_size", 32*1024*1024)]) + self.stub = node_service_pb2_grpc.NodeServiceStub(self.channel) + + async def is_connected(self) -> bool: + return self.channel is not None and self.channel.get_state() == grpc.ChannelConnectivity.READY + + async def disconnect(self): + if self.channel: + await self.channel.close() + self.channel = None + self.stub = None + + async def send_prompt(self, shard: Shard, prompt: str, image_str: Optional[str] = None, request_id: Optional[str] = None, inference_state: Optional[str] = None) -> Optional[np.array]: + request = node_service_pb2.PromptRequest( + prompt=prompt, + image_str=image_str, + shard=node_service_pb2.Shard( + model_id=shard.model_id, + start_layer=shard.start_layer, + end_layer=shard.end_layer, + n_layers=shard.n_layers, + ), + request_id=request_id, + inference_state=inference_state, + ) + response = await self.stub.SendPrompt(request) + + if not response.tensor_data or not response.shape or not response.dtype: + return None + + return np.frombuffer(response.tensor_data, dtype=np.dtype(response.dtype)).reshape(response.shape) + + async def send_tensor(self, shard: Shard, tensor: np.ndarray, request_id: Optional[str] = None, inference_state: Optional[str] = None) -> Optional[np.array]: + request = node_service_pb2.TensorRequest( + shard=node_service_pb2.Shard( + model_id=shard.model_id, + start_layer=shard.start_layer, + end_layer=shard.end_layer, + n_layers=shard.n_layers, + ), + tensor=node_service_pb2.Tensor(tensor_data=tensor.tobytes(), shape=tensor.shape, dtype=str(tensor.dtype)), + request_id=request_id, + inference_state=inference_state, + ) + response = await self.stub.SendTensor(request) + + if not response.tensor_data or not response.shape or not response.dtype: + return None + + return np.frombuffer(response.tensor_data, dtype=np.dtype(response.dtype)).reshape(response.shape) + + async def get_inference_result(self, request_id: str) -> Tuple[Optional[np.ndarray], bool]: + request = node_service_pb2.GetInferenceResultRequest(request_id=request_id) + response = await self.stub.GetInferenceResult(request) + if response.tensor is None: + return None, response.is_finished + return ( + np.frombuffer(response.tensor.tensor_data, dtype=np.dtype(response.tensor.dtype)).reshape(response.tensor.shape), + response.is_finished, + ) + + async def collect_topology(self, visited: set[str], max_depth: int) -> Topology: + request = node_service_pb2.CollectTopologyRequest(visited=visited, max_depth=max_depth) + response = await self.stub.CollectTopology(request) + topology = Topology() + for node_id, capabilities in response.nodes.items(): + device_capabilities = DeviceCapabilities(model=capabilities.model, chip=capabilities.chip, memory=capabilities.memory, flops=capabilities.flops) + topology.update_node(node_id, device_capabilities) + for node_id, peers in response.peer_graph.items(): + for peer_id in peers.peer_ids: + topology.add_edge(node_id, peer_id) + return topology + + async def send_result(self, request_id: str, result: List[int], is_finished: bool) -> None: + request = node_service_pb2.SendResultRequest(request_id=request_id, result=result, is_finished=is_finished) + await self.stub.SendResult(request) + + async def send_opaque_status(self, request_id: str, status: str) -> None: + request = node_service_pb2.SendOpaqueStatusRequest(request_id=request_id, status=status) + await self.stub.SendOpaqueStatus(request) diff --git a/build/lib/exo/networking/grpc/grpc_server.py b/build/lib/exo/networking/grpc/grpc_server.py new file mode 100644 index 000000000..1481ef512 --- /dev/null +++ b/build/lib/exo/networking/grpc/grpc_server.py @@ -0,0 +1,118 @@ +import grpc +from concurrent import futures +import numpy as np +from asyncio import CancelledError + +from . import node_service_pb2 +from . import node_service_pb2_grpc +from exo import DEBUG +from exo.inference.shard import Shard +from exo.orchestration import Node + + +class GRPCServer(node_service_pb2_grpc.NodeServiceServicer): + def __init__(self, node: Node, host: str, port: int): + self.node = node + self.host = host + self.port = port + self.server = None + + async def start(self) -> None: + self.server = grpc.aio.server( + futures.ThreadPoolExecutor(max_workers=10), + options=[ + ("grpc.max_metadata_size", 32*1024*1024), + ("grpc.max_send_message_length", 128*1024*1024), + ("grpc.max_receive_message_length", 128*1024*1024), + ], + ) + node_service_pb2_grpc.add_NodeServiceServicer_to_server(self, self.server) + listen_addr = f"{self.host}:{self.port}" + self.server.add_insecure_port(listen_addr) + await self.server.start() + if DEBUG >= 1: print(f"Server started, listening on {listen_addr}") + + async def stop(self) -> None: + if self.server: + try: + await self.server.stop(grace=5) + await self.server.wait_for_termination() + except CancelledError: + pass + if DEBUG >= 1: print("Server stopped and all connections are closed") + + async def SendPrompt(self, request, context): + shard = Shard( + model_id=request.shard.model_id, + start_layer=request.shard.start_layer, + end_layer=request.shard.end_layer, + n_layers=request.shard.n_layers, + ) + prompt = request.prompt + image_str = request.image_str + request_id = request.request_id + result = await self.node.process_prompt(shard, prompt, image_str, request_id) + if DEBUG >= 5: print(f"SendPrompt {shard=} {prompt=} {image_str=} {request_id=} result: {result}") + tensor_data = result.tobytes() if result is not None else None + return node_service_pb2.Tensor(tensor_data=tensor_data, shape=result.shape, dtype=str(result.dtype)) if result is not None else node_service_pb2.Tensor() + + async def SendTensor(self, request, context): + shard = Shard( + model_id=request.shard.model_id, + start_layer=request.shard.start_layer, + end_layer=request.shard.end_layer, + n_layers=request.shard.n_layers, + ) + tensor = np.frombuffer(request.tensor.tensor_data, dtype=np.dtype(request.tensor.dtype)).reshape(request.tensor.shape) + request_id = request.request_id + inference_state = request.inference_state + + result = await self.node.process_tensor(shard, tensor, request_id, inference_state) + if DEBUG >= 5: print(f"SendTensor tensor {shard=} {tensor=} {request_id=} result: {result}") + tensor_data = result.tobytes() if result is not None else None + return node_service_pb2.Tensor(tensor_data=tensor_data, shape=result.shape, dtype=str(result.dtype)) if result is not None else node_service_pb2.Tensor() + + async def GetInferenceResult(self, request, context): + request_id = request.request_id + result = await self.node.get_inference_result(request_id) + if DEBUG >= 5: print(f"GetInferenceResult {request_id=}: {result}") + tensor_data = result[0].tobytes() if result[0] is not None else None + return ( + node_service_pb2.InferenceResult( + tensor=node_service_pb2.Tensor(tensor_data=tensor_data, shape=result[0].shape, dtype=str(result[0].dtype)), + is_finished=result[1], + ) if result[0] is not None else node_service_pb2.InferenceResult(is_finished=result[1]) + ) + + async def CollectTopology(self, request, context): + max_depth = request.max_depth + visited = set(request.visited) + topology = await self.node.collect_topology(visited, max_depth) + nodes = { + node_id: + node_service_pb2.DeviceCapabilities( + model=cap.model, + chip=cap.chip, + memory=cap.memory, + flops=node_service_pb2.DeviceFlops(fp32=cap.flops.fp32, fp16=cap.flops.fp16, int8=cap.flops.int8), + ) + for node_id, cap in topology.nodes.items() + } + peer_graph = {node_id: node_service_pb2.Peers(peer_ids=peers) for node_id, peers in topology.peer_graph.items()} + if DEBUG >= 5: print(f"CollectTopology {max_depth=} {visited=} {nodes=} {peer_graph=}") + return node_service_pb2.Topology(nodes=nodes, peer_graph=peer_graph) + + async def SendResult(self, request, context): + request_id = request.request_id + result = request.result + is_finished = request.is_finished + if DEBUG >= 5: print(f"Received SendResult request: {request_id=} {result=} {is_finished=}") + self.node.on_token.trigger_all(request_id, result, is_finished) + return node_service_pb2.Empty() + + async def SendOpaqueStatus(self, request, context): + request_id = request.request_id + status = request.status + if DEBUG >= 5: print(f"Received SendOpaqueStatus request: {request_id=} {status=}") + self.node.on_opaque_status.trigger_all(request_id, status) + return node_service_pb2.Empty() diff --git a/build/lib/exo/networking/grpc/node_service_pb2.py b/build/lib/exo/networking/grpc/node_service_pb2.py new file mode 100644 index 000000000..cae2d0809 --- /dev/null +++ b/build/lib/exo/networking/grpc/node_service_pb2.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: node_service.proto +# Protobuf Python Version: 5.26.1 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\x12node_service.proto\x12\x0cnode_service\"S\n\x05Shard\x12\x10\n\x08model_id\x18\x01 \x01(\t\x12\x13\n\x0bstart_layer\x18\x02 \x01(\x05\x12\x11\n\tend_layer\x18\x03 \x01(\x05\x12\x10\n\x08n_layers\x18\x04 \x01(\x05\"\xc3\x01\n\rPromptRequest\x12\"\n\x05shard\x18\x01 \x01(\x0b\x32\x13.node_service.Shard\x12\x0e\n\x06prompt\x18\x02 \x01(\t\x12\x16\n\timage_str\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x17\n\nrequest_id\x18\x04 \x01(\tH\x01\x88\x01\x01\x12\x1c\n\x0finference_state\x18\x05 \x01(\tH\x02\x88\x01\x01\x42\x0c\n\n_image_strB\r\n\x0b_request_idB\x12\n\x10_inference_state\"\xb3\x01\n\rTensorRequest\x12\"\n\x05shard\x18\x01 \x01(\x0b\x32\x13.node_service.Shard\x12$\n\x06tensor\x18\x02 \x01(\x0b\x32\x14.node_service.Tensor\x12\x17\n\nrequest_id\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x1c\n\x0finference_state\x18\x04 \x01(\tH\x01\x88\x01\x01\x42\r\n\x0b_request_idB\x12\n\x10_inference_state\"/\n\x19GetInferenceResultRequest\x12\x12\n\nrequest_id\x18\x01 \x01(\t\"\\\n\x0fInferenceResult\x12)\n\x06tensor\x18\x01 \x01(\x0b\x32\x14.node_service.TensorH\x00\x88\x01\x01\x12\x13\n\x0bis_finished\x18\x02 \x01(\x08\x42\t\n\x07_tensor\";\n\x06Tensor\x12\x13\n\x0btensor_data\x18\x01 \x01(\x0c\x12\r\n\x05shape\x18\x02 \x03(\x05\x12\r\n\x05\x64type\x18\x03 \x01(\t\"<\n\x16\x43ollectTopologyRequest\x12\x0f\n\x07visited\x18\x01 \x03(\t\x12\x11\n\tmax_depth\x18\x02 \x01(\x05\"\x8e\x02\n\x08Topology\x12\x30\n\x05nodes\x18\x01 \x03(\x0b\x32!.node_service.Topology.NodesEntry\x12\x39\n\npeer_graph\x18\x02 \x03(\x0b\x32%.node_service.Topology.PeerGraphEntry\x1aN\n\nNodesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12/\n\x05value\x18\x02 \x01(\x0b\x32 .node_service.DeviceCapabilities:\x02\x38\x01\x1a\x45\n\x0ePeerGraphEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\"\n\x05value\x18\x02 \x01(\x0b\x32\x13.node_service.Peers:\x02\x38\x01\"\x19\n\x05Peers\x12\x10\n\x08peer_ids\x18\x01 \x03(\t\"7\n\x0b\x44\x65viceFlops\x12\x0c\n\x04\x66p32\x18\x01 \x01(\x02\x12\x0c\n\x04\x66p16\x18\x02 \x01(\x02\x12\x0c\n\x04int8\x18\x03 \x01(\x02\"k\n\x12\x44\x65viceCapabilities\x12\r\n\x05model\x18\x01 \x01(\t\x12\x0c\n\x04\x63hip\x18\x02 \x01(\t\x12\x0e\n\x06memory\x18\x03 \x01(\x05\x12(\n\x05\x66lops\x18\x04 \x01(\x0b\x32\x19.node_service.DeviceFlops\"L\n\x11SendResultRequest\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12\x0e\n\x06result\x18\x02 \x03(\x05\x12\x13\n\x0bis_finished\x18\x03 \x01(\x08\"=\n\x17SendOpaqueStatusRequest\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12\x0e\n\x06status\x18\x02 \x01(\t\"\x07\n\x05\x45mpty2\xde\x03\n\x0bNodeService\x12\x41\n\nSendPrompt\x12\x1b.node_service.PromptRequest\x1a\x14.node_service.Tensor\"\x00\x12\x41\n\nSendTensor\x12\x1b.node_service.TensorRequest\x1a\x14.node_service.Tensor\"\x00\x12^\n\x12GetInferenceResult\x12\'.node_service.GetInferenceResultRequest\x1a\x1d.node_service.InferenceResult\"\x00\x12Q\n\x0f\x43ollectTopology\x12$.node_service.CollectTopologyRequest\x1a\x16.node_service.Topology\"\x00\x12\x44\n\nSendResult\x12\x1f.node_service.SendResultRequest\x1a\x13.node_service.Empty\"\x00\x12P\n\x10SendOpaqueStatus\x12%.node_service.SendOpaqueStatusRequest\x1a\x13.node_service.Empty\"\x00\x62\x06proto3' +) + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'node_service_pb2', _globals) +if not _descriptor._USE_C_DESCRIPTORS: + DESCRIPTOR._loaded_options = None + _globals['_TOPOLOGY_NODESENTRY']._loaded_options = None + _globals['_TOPOLOGY_NODESENTRY']._serialized_options = b'8\001' + _globals['_TOPOLOGY_PEERGRAPHENTRY']._loaded_options = None + _globals['_TOPOLOGY_PEERGRAPHENTRY']._serialized_options = b'8\001' + _globals['_SHARD']._serialized_start = 36 + _globals['_SHARD']._serialized_end = 119 + _globals['_PROMPTREQUEST']._serialized_start = 122 + _globals['_PROMPTREQUEST']._serialized_end = 317 + _globals['_TENSORREQUEST']._serialized_start = 320 + _globals['_TENSORREQUEST']._serialized_end = 499 + _globals['_GETINFERENCERESULTREQUEST']._serialized_start = 501 + _globals['_GETINFERENCERESULTREQUEST']._serialized_end = 548 + _globals['_INFERENCERESULT']._serialized_start = 550 + _globals['_INFERENCERESULT']._serialized_end = 642 + _globals['_TENSOR']._serialized_start = 644 + _globals['_TENSOR']._serialized_end = 703 + _globals['_COLLECTTOPOLOGYREQUEST']._serialized_start = 705 + _globals['_COLLECTTOPOLOGYREQUEST']._serialized_end = 765 + _globals['_TOPOLOGY']._serialized_start = 768 + _globals['_TOPOLOGY']._serialized_end = 1038 + _globals['_TOPOLOGY_NODESENTRY']._serialized_start = 889 + _globals['_TOPOLOGY_NODESENTRY']._serialized_end = 967 + _globals['_TOPOLOGY_PEERGRAPHENTRY']._serialized_start = 969 + _globals['_TOPOLOGY_PEERGRAPHENTRY']._serialized_end = 1038 + _globals['_PEERS']._serialized_start = 1040 + _globals['_PEERS']._serialized_end = 1065 + _globals['_DEVICEFLOPS']._serialized_start = 1067 + _globals['_DEVICEFLOPS']._serialized_end = 1122 + _globals['_DEVICECAPABILITIES']._serialized_start = 1124 + _globals['_DEVICECAPABILITIES']._serialized_end = 1231 + _globals['_SENDRESULTREQUEST']._serialized_start = 1233 + _globals['_SENDRESULTREQUEST']._serialized_end = 1309 + _globals['_SENDOPAQUESTATUSREQUEST']._serialized_start = 1311 + _globals['_SENDOPAQUESTATUSREQUEST']._serialized_end = 1372 + _globals['_EMPTY']._serialized_start = 1374 + _globals['_EMPTY']._serialized_end = 1381 + _globals['_NODESERVICE']._serialized_start = 1384 + _globals['_NODESERVICE']._serialized_end = 1862 +# @@protoc_insertion_point(module_scope) diff --git a/build/lib/exo/networking/grpc/node_service_pb2_grpc.py b/build/lib/exo/networking/grpc/node_service_pb2_grpc.py new file mode 100644 index 000000000..ea1d3c98f --- /dev/null +++ b/build/lib/exo/networking/grpc/node_service_pb2_grpc.py @@ -0,0 +1,272 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc +import warnings + +from . import node_service_pb2 as node__service__pb2 + +GRPC_GENERATED_VERSION = '1.64.1' +GRPC_VERSION = grpc.__version__ +EXPECTED_ERROR_RELEASE = '1.65.0' +SCHEDULED_RELEASE_DATE = 'June 25, 2024' +_version_not_supported = False + +try: + from grpc._utilities import first_version_is_lower + _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION) +except ImportError: + _version_not_supported = True + +if _version_not_supported: + warnings.warn( + f'The grpc package installed is at version {GRPC_VERSION},' + f' but the generated code in node_service_pb2_grpc.py depends on' + f' grpcio>={GRPC_GENERATED_VERSION}.' + + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}' + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.' + + f' This warning will become an error in {EXPECTED_ERROR_RELEASE},' + f' scheduled for release on {SCHEDULED_RELEASE_DATE}.', RuntimeWarning + ) + + +class NodeServiceStub(object): + """Missing associated documentation comment in .proto file.""" + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.SendPrompt = channel.unary_unary( + '/node_service.NodeService/SendPrompt', + request_serializer=node__service__pb2.PromptRequest.SerializeToString, + response_deserializer=node__service__pb2.Tensor.FromString, + _registered_method=True + ) + self.SendTensor = channel.unary_unary( + '/node_service.NodeService/SendTensor', + request_serializer=node__service__pb2.TensorRequest.SerializeToString, + response_deserializer=node__service__pb2.Tensor.FromString, + _registered_method=True + ) + self.GetInferenceResult = channel.unary_unary( + '/node_service.NodeService/GetInferenceResult', + request_serializer=node__service__pb2.GetInferenceResultRequest.SerializeToString, + response_deserializer=node__service__pb2.InferenceResult.FromString, + _registered_method=True + ) + self.CollectTopology = channel.unary_unary( + '/node_service.NodeService/CollectTopology', + request_serializer=node__service__pb2.CollectTopologyRequest.SerializeToString, + response_deserializer=node__service__pb2.Topology.FromString, + _registered_method=True + ) + self.SendResult = channel.unary_unary( + '/node_service.NodeService/SendResult', + request_serializer=node__service__pb2.SendResultRequest.SerializeToString, + response_deserializer=node__service__pb2.Empty.FromString, + _registered_method=True + ) + self.SendOpaqueStatus = channel.unary_unary( + '/node_service.NodeService/SendOpaqueStatus', + request_serializer=node__service__pb2.SendOpaqueStatusRequest.SerializeToString, + response_deserializer=node__service__pb2.Empty.FromString, + _registered_method=True + ) + + +class NodeServiceServicer(object): + """Missing associated documentation comment in .proto file.""" + def SendPrompt(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def SendTensor(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def GetInferenceResult(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def CollectTopology(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def SendResult(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def SendOpaqueStatus(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_NodeServiceServicer_to_server(servicer, server): + rpc_method_handlers = { + 'SendPrompt': + grpc.unary_unary_rpc_method_handler( + servicer.SendPrompt, + request_deserializer=node__service__pb2.PromptRequest.FromString, + response_serializer=node__service__pb2.Tensor.SerializeToString, + ), + 'SendTensor': + grpc.unary_unary_rpc_method_handler( + servicer.SendTensor, + request_deserializer=node__service__pb2.TensorRequest.FromString, + response_serializer=node__service__pb2.Tensor.SerializeToString, + ), + 'GetInferenceResult': + grpc.unary_unary_rpc_method_handler( + servicer.GetInferenceResult, + request_deserializer=node__service__pb2.GetInferenceResultRequest.FromString, + response_serializer=node__service__pb2.InferenceResult.SerializeToString, + ), + 'CollectTopology': + grpc.unary_unary_rpc_method_handler( + servicer.CollectTopology, + request_deserializer=node__service__pb2.CollectTopologyRequest.FromString, + response_serializer=node__service__pb2.Topology.SerializeToString, + ), + 'SendResult': + grpc.unary_unary_rpc_method_handler( + servicer.SendResult, + request_deserializer=node__service__pb2.SendResultRequest.FromString, + response_serializer=node__service__pb2.Empty.SerializeToString, + ), + 'SendOpaqueStatus': + grpc.unary_unary_rpc_method_handler( + servicer.SendOpaqueStatus, + request_deserializer=node__service__pb2.SendOpaqueStatusRequest.FromString, + response_serializer=node__service__pb2.Empty.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler('node_service.NodeService', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) + server.add_registered_method_handlers('node_service.NodeService', rpc_method_handlers) + + +# This class is part of an EXPERIMENTAL API. +class NodeService(object): + """Missing associated documentation comment in .proto file.""" + @staticmethod + def SendPrompt(request, target, options=(), channel_credentials=None, call_credentials=None, insecure=False, compression=None, wait_for_ready=None, timeout=None, metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/node_service.NodeService/SendPrompt', + node__service__pb2.PromptRequest.SerializeToString, + node__service__pb2.Tensor.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True + ) + + @staticmethod + def SendTensor(request, target, options=(), channel_credentials=None, call_credentials=None, insecure=False, compression=None, wait_for_ready=None, timeout=None, metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/node_service.NodeService/SendTensor', + node__service__pb2.TensorRequest.SerializeToString, + node__service__pb2.Tensor.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True + ) + + @staticmethod + def GetInferenceResult(request, target, options=(), channel_credentials=None, call_credentials=None, insecure=False, compression=None, wait_for_ready=None, timeout=None, metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/node_service.NodeService/GetInferenceResult', + node__service__pb2.GetInferenceResultRequest.SerializeToString, + node__service__pb2.InferenceResult.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True + ) + + @staticmethod + def CollectTopology(request, target, options=(), channel_credentials=None, call_credentials=None, insecure=False, compression=None, wait_for_ready=None, timeout=None, metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/node_service.NodeService/CollectTopology', + node__service__pb2.CollectTopologyRequest.SerializeToString, + node__service__pb2.Topology.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True + ) + + @staticmethod + def SendResult(request, target, options=(), channel_credentials=None, call_credentials=None, insecure=False, compression=None, wait_for_ready=None, timeout=None, metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/node_service.NodeService/SendResult', + node__service__pb2.SendResultRequest.SerializeToString, + node__service__pb2.Empty.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True + ) + + @staticmethod + def SendOpaqueStatus(request, target, options=(), channel_credentials=None, call_credentials=None, insecure=False, compression=None, wait_for_ready=None, timeout=None, metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/node_service.NodeService/SendOpaqueStatus', + node__service__pb2.SendOpaqueStatusRequest.SerializeToString, + node__service__pb2.Empty.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True + ) diff --git a/build/lib/exo/networking/grpc/test_grpc_discovery.py b/build/lib/exo/networking/grpc/test_grpc_discovery.py new file mode 100644 index 000000000..13372bbb4 --- /dev/null +++ b/build/lib/exo/networking/grpc/test_grpc_discovery.py @@ -0,0 +1,22 @@ +import asyncio +import unittest +from .grpc_discovery import GRPCDiscovery + + +class TestGRPCDiscovery(unittest.IsolatedAsyncioTestCase): + async def asyncSetUp(self): + self.node1 = GRPCDiscovery("node1", 50051, 5678, 5679) + self.node2 = GRPCDiscovery("node2", 50052, 5679, 5678) + await self.node1.start() + await self.node2.start() + + async def asyncTearDown(self): + await self.node1.stop() + await self.node2.stop() + + async def test_discovery(self): + await asyncio.sleep(4) + + # Check discovered peers + print("Node1 Peers:", ", ".join([f"{peer_id}: {peer}" for peer_id, peer in self.node1.known_peers.items()])) + print("Node2 Peers:", ", ".join([f"{peer_id}: {peer}" for peer_id, peer in self.node2.known_peers.items()])) diff --git a/build/lib/exo/networking/peer_handle.py b/build/lib/exo/networking/peer_handle.py new file mode 100644 index 000000000..cf232d006 --- /dev/null +++ b/build/lib/exo/networking/peer_handle.py @@ -0,0 +1,48 @@ +from abc import ABC, abstractmethod +from typing import Optional, Tuple, List +import numpy as np +from exo.inference.shard import Shard +from exo.topology.device_capabilities import DeviceCapabilities +from exo.topology.topology import Topology + + +class PeerHandle(ABC): + @abstractmethod + def id(self) -> str: + pass + + @abstractmethod + def device_capabilities(self) -> DeviceCapabilities: + pass + + @abstractmethod + async def connect(self) -> None: + pass + + @abstractmethod + async def is_connected(self) -> bool: + pass + + @abstractmethod + async def disconnect(self) -> None: + pass + + @abstractmethod + async def send_prompt(self, shard: Shard, prompt: str, image_str: Optional[str] = None, request_id: Optional[str] = None, inference_state: Optional[str] = None) -> Optional[np.array]: + pass + + @abstractmethod + async def send_tensor(self, shard: Shard, tensor: np.array, request_id: Optional[str] = None, inference_state: Optional[str] = None) -> Optional[np.array]: + pass + + @abstractmethod + async def get_inference_result(self, request_id: str) -> Tuple[Optional[np.ndarray], bool]: + pass + + @abstractmethod + async def collect_topology(self, visited: set[str], max_depth: int) -> Topology: + pass + + @abstractmethod + async def send_result(self, request_id: str, result: List[int], is_finished: bool) -> None: + pass diff --git a/build/lib/exo/networking/server.py b/build/lib/exo/networking/server.py new file mode 100644 index 000000000..8e7f9812f --- /dev/null +++ b/build/lib/exo/networking/server.py @@ -0,0 +1,11 @@ +from abc import ABC, abstractmethod + + +class Server(ABC): + @abstractmethod + async def start(self) -> None: + pass + + @abstractmethod + async def stop(self) -> None: + pass diff --git a/build/lib/exo/orchestration/__init__.py b/build/lib/exo/orchestration/__init__.py new file mode 100644 index 000000000..478af5370 --- /dev/null +++ b/build/lib/exo/orchestration/__init__.py @@ -0,0 +1,4 @@ +from .node import Node +from .standard_node import StandardNode + +__all__ = ["Node", "StandardNode"] diff --git a/build/lib/exo/orchestration/node.py b/build/lib/exo/orchestration/node.py new file mode 100644 index 000000000..60b729748 --- /dev/null +++ b/build/lib/exo/orchestration/node.py @@ -0,0 +1,47 @@ +from typing import Optional, Tuple, List +import numpy as np +from abc import ABC, abstractmethod +from exo.helpers import AsyncCallbackSystem +from exo.inference.shard import Shard +from exo.topology.topology import Topology + + +class Node(ABC): + @abstractmethod + async def start(self, wait_for_peers: int = 0) -> None: + pass + + @abstractmethod + async def stop(self) -> None: + pass + + @abstractmethod + async def process_prompt(self, shard: Shard, prompt: str, image_str: Optional[str] = None, request_id: Optional[str] = None, inference_state: Optional[str] = None) -> Optional[np.ndarray]: + pass + + @abstractmethod + async def process_tensor(self, shard: Shard, tensor: np.ndarray, request_id: Optional[str] = None, inference_state: Optional[str] = None) -> Optional[np.ndarray]: + pass + + @abstractmethod + async def get_inference_result(self, request_id: str) -> Tuple[Optional[np.ndarray], bool]: + pass + + @abstractmethod + async def collect_topology(self, visited: set[str] = set(), max_depth: int = 2) -> Topology: + pass + + @property + @abstractmethod + def current_topology(self) -> Topology: + pass + + @property + @abstractmethod + def on_token(self) -> AsyncCallbackSystem[str, Tuple[str, List[int], bool]]: + pass + + @property + @abstractmethod + def on_opaque_status(self) -> AsyncCallbackSystem[str, Tuple[str, str]]: + pass diff --git a/build/lib/exo/orchestration/standard_node.py b/build/lib/exo/orchestration/standard_node.py new file mode 100644 index 000000000..b968b6597 --- /dev/null +++ b/build/lib/exo/orchestration/standard_node.py @@ -0,0 +1,385 @@ +import numpy as np +import json +import asyncio +import uuid +import time +import traceback +from typing import List, Dict, Optional, Tuple, Union +from exo.networking import Discovery, PeerHandle, Server +from exo.inference.inference_engine import InferenceEngine, Shard +from .node import Node +from exo.topology.topology import Topology +from exo.topology.device_capabilities import device_capabilities +from exo.topology.partitioning_strategy import Partition, PartitioningStrategy, map_partitions_to_shards +from exo import DEBUG +from exo.helpers import AsyncCallbackSystem +from exo.viz.topology_viz import TopologyViz +from exo.download.hf.hf_helpers import RepoProgressEvent + + +class StandardNode(Node): + def __init__( + self, + _id: str, + server: Server, + inference_engine: InferenceEngine, + discovery: Discovery, + partitioning_strategy: PartitioningStrategy = None, + max_generate_tokens: int = 1024, + chatgpt_api_endpoints: List[str] = [], + web_chat_urls: List[str] = [], + disable_tui: Optional[bool] = False, + topology_viz: Optional[TopologyViz] = None, + ): + self.id = _id + self.inference_engine = inference_engine + self.server = server + self.discovery = discovery + self.partitioning_strategy = partitioning_strategy + self.peers: List[PeerHandle] = {} + self.topology: Topology = Topology() + self.device_capabilities = device_capabilities() + self.buffered_token_output: Dict[str, Tuple[List[int], bool]] = {} + self.max_generate_tokens = max_generate_tokens + self.topology_viz = topology_viz + self._on_token = AsyncCallbackSystem[str, Tuple[str, List[int], bool]]() + self._on_opaque_status = AsyncCallbackSystem[str, Tuple[str, str]]() + self._on_opaque_status.register("node_status").on_next(self.on_node_status) + self.node_download_progress: Dict[str, RepoProgressEvent] = {} + + async def start(self, wait_for_peers: int = 0) -> None: + await self.server.start() + await self.discovery.start() + await self.update_peers(wait_for_peers) + await self.collect_topology() + if DEBUG >= 2: print(f"Collected topology: {self.topology}") + asyncio.create_task(self.periodic_topology_collection(5)) + + async def stop(self) -> None: + await self.discovery.stop() + await self.server.stop() + + def on_node_status(self, request_id, opaque_status): + try: + status_data = json.loads(opaque_status) + if status_data.get("type", "") == "node_status": + if status_data.get("status", "").startswith("start_"): + self.current_topology.active_node_id = status_data.get("node_id") + elif status_data.get("status", "").startswith("end_"): + if status_data.get("node_id") == self.current_topology.active_node_id: + self.current_topology.active_node_id = None + download_progress = None + if status_data.get("type", "") == "download_progress": + if DEBUG >= 5: print(f"Download progress from {status_data.get('node_id')}: {status_data.get('progress')}") + download_progress = RepoProgressEvent.from_dict(status_data.get('progress')) + self.node_download_progress[status_data.get('node_id')] = download_progress + if self.topology_viz: + self.topology_viz.update_visualization(self.current_topology, self.partitioning_strategy.partition(self.current_topology), self.id, self.node_download_progress) + except Exception as e: + if DEBUG >= 1: print(f"Error updating visualization: {e}") + if DEBUG >= 1: traceback.print_exc() + + async def process_prompt(self, base_shard: Shard, prompt: str, image_str: Optional[str] = None, request_id: Optional[str] = None, inference_state: Optional[str] = None) -> Optional[np.ndarray]: + shard = self.get_current_shard(base_shard) + asyncio.create_task( + self.broadcast_opaque_status( + request_id, + json.dumps({ + "type": "node_status", + "node_id": self.id, + "status": "start_process_prompt", + "base_shard": base_shard.to_dict(), + "shard": shard.to_dict(), + "prompt": prompt, + "image_str": image_str, + "inference_state": inference_state, + "request_id": request_id, + }), + ) + ) + start_time = time.perf_counter_ns() + resp = await self._process_prompt(base_shard, prompt, image_str, request_id, inference_state) + end_time = time.perf_counter_ns() + elapsed_time_ns = end_time - start_time + asyncio.create_task( + self.broadcast_opaque_status( + request_id, + json.dumps({ + "type": "node_status", + "node_id": self.id, + "status": "end_process_prompt", + "base_shard": base_shard.to_dict(), + "shard": shard.to_dict(), + "prompt": prompt, + "image_str": image_str, + "inference_state": inference_state, + "request_id": request_id, + "elapsed_time_ns": elapsed_time_ns, + "result_size": resp.size if resp is not None else 0, + }), + ) + ) + return resp + + async def _process_prompt(self, base_shard: Shard, prompt: str, image_str: Optional[str] = None, request_id: Optional[str] = None, inference_state: Optional[str] = None) -> Optional[np.ndarray]: + if request_id is None: + request_id = str(uuid.uuid4()) + if request_id not in self.buffered_token_output: + self.buffered_token_output[request_id] = ([], False) + shard = self.get_current_shard(base_shard) + + if DEBUG >= 2: print(f"[{request_id}] process prompt: {base_shard=} {shard=} {prompt=} {image_str=}") + if shard.start_layer != 0: + if DEBUG >= 2: print(f"[{request_id}] forwarding to next shard: {base_shard=} {shard=} {prompt=} {image_str=}") + await self.forward_to_next_shard(shard, prompt, request_id, image_str=image_str, inference_state=inference_state) + return + + result, inference_state, is_finished = await self.inference_engine.infer_prompt(request_id, shard, prompt, image_str, inference_state=inference_state) + is_finished = is_finished or len(self.buffered_token_output[request_id][0]) >= self.max_generate_tokens + if is_finished: + self.buffered_token_output[request_id] = (self.buffered_token_output[request_id][0], True) + asyncio.create_task(self.broadcast_result(request_id, self.buffered_token_output[request_id][0], is_finished)) # TODO: this is n^2 communication complexity + + if result.size == 1: + self.buffered_token_output[request_id][0].append(result.item()) + self.trigger_on_token_callbacks(request_id, self.buffered_token_output[request_id][0], is_finished) + + if DEBUG >= 2: print(f"[{request_id}] result size: {result.size}, is finished: {is_finished}, buffered tokens: {len(self.buffered_token_output[request_id][0])}") + + if not is_finished: + asyncio.create_task(self.forward_to_next_shard(shard, result, request_id, image_str=image_str, inference_state=inference_state)) + + return np.array(self.buffered_token_output[request_id][0]) if len(self.buffered_token_output[request_id][0]) > 0 else None + + async def process_tensor( + self, + base_shard: Shard, + tensor: np.ndarray, + request_id: Optional[str] = None, + inference_state: Optional[str] = None, + ) -> Optional[np.ndarray]: + shard = self.get_current_shard(base_shard) + asyncio.create_task( + self.broadcast_opaque_status( + request_id, + json.dumps({ + "type": "node_status", + "node_id": self.id, + "status": "start_process_tensor", + "base_shard": base_shard.to_dict(), + "shard": shard.to_dict(), + "tensor_size": tensor.size, + "tensor_shape": tensor.shape, + "request_id": request_id, + "inference_state": inference_state, + }), + ) + ) + start_time = time.perf_counter_ns() + resp = await self._process_tensor(shard, tensor, request_id, inference_state) + end_time = time.perf_counter_ns() + elapsed_time_ns = end_time - start_time + asyncio.create_task( + self.broadcast_opaque_status( + request_id, + json.dumps({ + "type": "node_status", + "node_id": self.id, + "status": "end_process_tensor", + "base_shard": base_shard.to_dict(), + "shard": shard.to_dict(), + "request_id": request_id, + "elapsed_time_ns": elapsed_time_ns, + "result_size": resp.size if resp is not None else 0, + }), + ) + ) + return resp + + async def _process_tensor( + self, + base_shard: Shard, + tensor: np.ndarray, + request_id: Optional[str] = None, + inference_state: Optional[str] = None, + ) -> Optional[np.ndarray]: + if request_id is None: + request_id = str(uuid.uuid4()) + if request_id not in self.buffered_token_output: + self.buffered_token_output[request_id] = ([], False) + shard = self.get_current_shard(base_shard) + + try: + if DEBUG >= 1: print(f"[{request_id}] process_tensor: {tensor.size=} {tensor.shape=}") + result, inference_state, is_finished = await self.inference_engine.infer_tensor(request_id, shard, tensor, inference_state=inference_state) + is_finished = is_finished or len(self.buffered_token_output[request_id][0]) >= self.max_generate_tokens + if is_finished: + self.buffered_token_output[request_id] = (self.buffered_token_output[request_id][0], True) + asyncio.create_task(self.broadcast_result(request_id, self.buffered_token_output[request_id][0], is_finished)) # TODO: this is n^2 communication complexity + + if result.size == 1: # we got a new token out + self.buffered_token_output[request_id][0].append(result.item()) + self.trigger_on_token_callbacks(request_id, self.buffered_token_output[request_id][0], is_finished) + if DEBUG >= 2: print(f"[{request_id}] result size: {result.size}, is finished: {is_finished}, buffered tokens: {len(self.buffered_token_output[request_id][0])}") + + if not is_finished: + asyncio.create_task(self.forward_to_next_shard(shard, result, request_id, inference_state=inference_state)) + + return np.array(self.buffered_token_output[request_id][0]) if len(self.buffered_token_output[request_id][0]) > 0 else None + except Exception as e: + print(f"Error processing tensor for shard {shard}: {e}") + traceback.print_exc() + return None + + async def forward_to_next_shard( + self, + base_shard: Shard, + tensor_or_prompt: Union[np.ndarray, str], + request_id: str, + image_str: Optional[str] = None, + inference_state: Optional[str] = None, + ) -> None: + if not self.partitioning_strategy: + if DEBUG >= 1: print("No partitioning strategy found. Skipping forward.") + return + shard = self.get_current_shard(base_shard) + + partitions = self.partitioning_strategy.partition(self.topology) + shards = map_partitions_to_shards(self.partitioning_strategy.partition(self.topology), base_shard.n_layers, base_shard.model_id) + current_partition_index = next((i for i, p in enumerate(partitions) if p.node_id == self.id), None) + if DEBUG >= 1: print(f"Current partition index: {current_partition_index}") + if current_partition_index is not None: + next_partition_index = (current_partition_index+1) % len(partitions) + next_partition: Partition = partitions[next_partition_index] + next_shard = shards[next_partition_index] + if DEBUG >= 2: print(f"Computed next from: {shard}, {self.topology}. Next partition: {next_partition}") + + if next_partition.node_id == self.id: + if isinstance(tensor_or_prompt, np.ndarray): + await self.process_tensor(shard, tensor_or_prompt, request_id, inference_state=inference_state) + else: + await self.process_prompt(shard, tensor_or_prompt, image_str, request_id, inference_state=inference_state) + return + + target_peer = next((p for p in self.peers if p.id() == next_partition.node_id), None) + if not target_peer: + raise ValueError(f"Peer for {next_partition} not found") + + if DEBUG >= 1: print(f"Sending tensor_or_prompt to {target_peer.id()}: {tensor_or_prompt}") + + if isinstance(tensor_or_prompt, np.ndarray): + await target_peer.send_tensor(next_shard, tensor_or_prompt, request_id=request_id, inference_state=inference_state) + else: + await target_peer.send_prompt(next_shard, tensor_or_prompt, image_str=image_str, request_id=request_id, inference_state=inference_state) + + def get_current_shard(self, base_shard: Shard) -> Shard: + partitions = self.partitioning_strategy.partition(self.topology) + shards = map_partitions_to_shards(partitions, base_shard.n_layers, base_shard.model_id) + current_partition_index = next((i for i, p in enumerate(partitions) if p.node_id == self.id), None) + if current_partition_index is None: + raise ValueError(f"No current partition found for node: {self.id}") + return shards[current_partition_index] + + async def update_peers(self, wait_for_peers: int = 0) -> None: + self.peers = await self.discovery.discover_peers(wait_for_peers) + for peer in self.peers: + is_connected = await peer.is_connected() + if DEBUG >= 2 and is_connected: + print(f"Already connected to {peer.id()}: {is_connected}") + if not is_connected: + if DEBUG >= 2: print(f"Connecting to {peer.id()}...") + await peer.connect() + if DEBUG >= 1: print(f"Connected to peer {peer.device_capabilities()} ({peer.id()=})") + + async def periodic_topology_collection(self, interval: int): + while True: + await asyncio.sleep(interval) + try: + await self.update_peers() + await self.collect_topology() + except Exception as e: + print(f"Error collecting topology: {e}") + traceback.print_exc() + + async def get_inference_result(self, request_id: str) -> Tuple[Optional[np.ndarray], bool]: + if request_id not in self.buffered_token_output: + return None, False + return np.array(self.buffered_token_output[request_id][0]), self.buffered_token_output[request_id][1] + + async def collect_topology(self, visited: set[str] = set(), max_depth: int = 4) -> Topology: + next_topology = Topology() + next_topology.update_node(self.id, self.device_capabilities) + + if DEBUG >= 2: print(f"Collecting topology {max_depth=} {visited=}") + + prev_visited = visited.copy() + # TODO: should we add our own peer id here? + visited.update(p.id() for p in self.peers) + + for peer in self.peers: + next_topology.update_node(peer.id(), peer.device_capabilities()) + next_topology.add_edge(self.id, peer.id()) + + if peer.id() in prev_visited: + continue + + if max_depth <= 0: + if DEBUG >= 2: print("Max depth reached. Skipping...") + continue + + try: + other_topology = await peer.collect_topology(visited, max_depth=max_depth - 1) + if DEBUG >= 2: print(f"Collected topology from: {peer.id()}: {other_topology}") + self.topology.merge(other_topology) + except Exception as e: + print(f"Error collecting topology from {peer.id()}: {e}") + + next_topology.active_node_id = self.topology.active_node_id # this is not so clean. + self.topology = next_topology + if self.topology_viz: + self.topology_viz.update_visualization(self.current_topology, self.partitioning_strategy.partition(self.current_topology), self.id) + return next_topology + + @property + def on_token(self) -> AsyncCallbackSystem[str, Tuple[str, List[int], bool]]: + return self._on_token + + @property + def on_opaque_status(self) -> AsyncCallbackSystem[str, Tuple[str, str]]: + return self._on_opaque_status + + def trigger_on_token_callbacks(self, request_id: str, tokens: List[int], is_finished: bool) -> None: + if DEBUG >= 2: print(f"Triggering all on_token callbacks with {request_id=} num_tokens={len(tokens)} {is_finished=}") + self.on_token.trigger_all(request_id, tokens, is_finished) + + async def broadcast_result(self, request_id: str, result: List[int], is_finished: bool) -> None: + async def send_result_to_peer(peer): + try: + await asyncio.wait_for(peer.send_result(request_id, result, is_finished), timeout=15.0) + except asyncio.TimeoutError: + print(f"Timeout broadcasting result to {peer.id()}") + except Exception as e: + print(f"Error broadcasting result to {peer.id()}: {e}") + traceback.print_exc() + + await asyncio.gather(*[send_result_to_peer(peer) for peer in self.peers], return_exceptions=True) + + async def broadcast_opaque_status(self, request_id: str, status: str) -> None: + if DEBUG >= 5: print(f"Broadcasting opaque status: {request_id=} {status=}") + + async def send_status_to_peer(peer): + try: + await asyncio.wait_for(peer.send_opaque_status(request_id, status), timeout=15.0) + except asyncio.TimeoutError: + print(f"Timeout sending opaque status to {peer.id()}") + except Exception as e: + print(f"Error sending opaque status to {peer.id()}: {e}") + traceback.print_exc() + + await asyncio.gather(*[send_status_to_peer(peer) for peer in self.peers], return_exceptions=True) + # in the case of opaque status, we also want to receive our own opaque statuses + self.on_opaque_status.trigger_all(request_id, status) + + @property + def current_topology(self) -> Topology: + return self.topology diff --git a/build/lib/exo/orchestration/test_node.py b/build/lib/exo/orchestration/test_node.py new file mode 100644 index 000000000..230ef0cf6 --- /dev/null +++ b/build/lib/exo/orchestration/test_node.py @@ -0,0 +1,57 @@ +import unittest +from unittest.mock import Mock, AsyncMock +import numpy as np + +from .standard_node import StandardNode +from exo.networking.peer_handle import PeerHandle + + +class TestNode(unittest.IsolatedAsyncioTestCase): + def setUp(self): + self.mock_inference_engine = AsyncMock() + self.mock_server = AsyncMock() + self.mock_server.start = AsyncMock() + self.mock_server.stop = AsyncMock() + self.mock_discovery = AsyncMock() + self.mock_discovery.start = AsyncMock() + self.mock_discovery.stop = AsyncMock() + mock_peer1 = Mock(spec=PeerHandle) + mock_peer1.id.return_value = "peer1" + mock_peer2 = Mock(spec=PeerHandle) + mock_peer2.id.return_value = "peer2" + self.mock_discovery.discover_peers = AsyncMock(return_value=[mock_peer1, mock_peer2]) + + self.node = StandardNode("test_node", self.mock_server, self.mock_inference_engine, "localhost", 50051, self.mock_discovery) + + async def asyncSetUp(self): + await self.node.start() + + async def asyncTearDown(self): + await self.node.stop() + + async def test_node_initialization(self): + self.assertEqual(self.node.node_id, "test_node") + self.assertEqual(self.node.host, "localhost") + self.assertEqual(self.node.port, 50051) + + async def test_node_start(self): + self.mock_server.start.assert_called_once_with("localhost", 50051) + + async def test_node_stop(self): + await self.node.stop() + self.mock_server.stop.assert_called_once() + + async def test_discover_and_connect_to_peers(self): + await self.node.discover_and_connect_to_peers() + self.assertEqual(len(self.node.peers), 2) + self.assertIn("peer1", map(lambda p: p.id(), self.node.peers)) + self.assertIn("peer2", map(lambda p: p.id(), self.node.peers)) + + async def test_process_tensor_calls_inference_engine(self): + mock_peer = Mock() + self.node.peers = [mock_peer] + + input_tensor = np.array([69, 1, 2]) + await self.node.process_tensor(input_tensor, None) + + self.node.inference_engine.process_shard.assert_called_once_with(input_tensor) diff --git a/build/lib/exo/stats/__init__.py b/build/lib/exo/stats/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/build/lib/exo/stats/metrics.py b/build/lib/exo/stats/metrics.py new file mode 100644 index 000000000..f29533ff7 --- /dev/null +++ b/build/lib/exo/stats/metrics.py @@ -0,0 +1,29 @@ +from exo.orchestration import Node +from prometheus_client import start_http_server, Counter, Histogram +import json + +# Create metrics to track time spent and requests made. +PROCESS_PROMPT_COUNTER = Counter("process_prompt_total", "Total number of prompts processed", ["node_id"]) +PROCESS_TENSOR_COUNTER = Counter("process_tensor_total", "Total number of tensors processed", ["node_id"]) +PROCESS_TENSOR_TIME = Histogram("process_tensor_seconds", "Time spent processing tensor", ["node_id"]) + + +def start_metrics_server(node: Node, port: int): + start_http_server(port) + + def _on_opaque_status(request_id, opaque_status: str): + status_data = json.loads(opaque_status) + _type = status_data.get("type", "") + node_id = status_data.get("node_id", "") + if _type != "node_status": + return + status = status_data.get("status", "") + + if status == "end_process_prompt": + PROCESS_PROMPT_COUNTER.labels(node_id=node_id).inc() + elif status == "end_process_tensor": + elapsed_time_ns = status_data.get("elapsed_time_ns", 0) + PROCESS_TENSOR_COUNTER.labels(node_id=node_id).inc() + PROCESS_TENSOR_TIME.labels(node_id=node_id).observe(elapsed_time_ns/1e9) # Convert ns to seconds + + node.on_opaque_status.register("stats").on_next(_on_opaque_status) diff --git a/build/lib/exo/test_callbacks.py b/build/lib/exo/test_callbacks.py new file mode 100644 index 000000000..c10083d6e --- /dev/null +++ b/build/lib/exo/test_callbacks.py @@ -0,0 +1,50 @@ +import asyncio +from typing import Any, Callable +from exo.helpers import AsyncCallbackSystem, AsyncCallback + + +# Usage example +async def main() -> None: + callback_system = AsyncCallbackSystem[str, Any]() + + # Register callbacks + callback1 = callback_system.register("callback1") + callback2 = callback_system.register("callback2") + + def on_next_callback(name: str) -> Callable[..., None]: + def callback(*args: Any) -> None: + print(f"{name} received values: {args}") + + return callback + + callback1.on_next(on_next_callback("Callback1")) + callback2.on_next(on_next_callback("Callback2")) + + async def wait_for_callback(name: str, callback: AsyncCallback[Any], condition: Callable[..., bool]) -> None: + try: + result = await callback.wait(condition, timeout=2) + print(f"{name} wait completed with result: {result}") + except asyncio.TimeoutError: + print(f"{name} wait timed out") + + # Trigger all callbacks at once + callback_system.trigger_all("Hello", 42, True) + + # Wait for all callbacks with different conditions + await asyncio.gather( + wait_for_callback("Callback1", callback1, lambda msg, num, flag: isinstance(msg, str) and num > 0), + wait_for_callback("Callback2", callback2, lambda msg, num, flag: flag is True), + ) + + # Trigger individual callback + callback_system.trigger("callback2", "World", -10, False) + + # Demonstrate timeout + new_callback = callback_system.register("new_callback") + new_callback.on_next(on_next_callback("NewCallback")) + await wait_for_callback("NewCallback", new_callback, lambda msg, num, flag: num > 100) + + callback_system.trigger("callback2", "World", 200, False) + + +asyncio.run(main()) diff --git a/build/lib/exo/topology/__init__.py b/build/lib/exo/topology/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/build/lib/exo/topology/device_capabilities.py b/build/lib/exo/topology/device_capabilities.py new file mode 100644 index 000000000..51db53ef2 --- /dev/null +++ b/build/lib/exo/topology/device_capabilities.py @@ -0,0 +1,207 @@ +from exo import DEBUG +from dataclasses import dataclass, asdict +import subprocess +import psutil + +TFLOPS = 1.00 + + +@dataclass +class DeviceFlops: + # units of TFLOPS + fp32: float + fp16: float + int8: float + + def __str__(self): + return f"fp32: {self.fp32 / TFLOPS:.2f} TFLOPS, fp16: {self.fp16 / TFLOPS:.2f} TFLOPS, int8: {self.int8 / TFLOPS:.2f} TFLOPS" + + def to_dict(self): + return asdict(self) + + +@dataclass +class DeviceCapabilities: + model: str + chip: str + memory: int + flops: DeviceFlops + + def __str__(self): + return f"Model: {self.model}. Chip: {self.chip}. Memory: {self.memory}MB. Flops: {self.flops}" + + def __post_init__(self): + if isinstance(self.flops, dict): + self.flops = DeviceFlops(**self.flops) + + def to_dict(self): + return {"model": self.model, "chip": self.chip, "memory": self.memory, "flops": self.flops.to_dict()} + + +UNKNOWN_DEVICE_CAPABILITIES = DeviceCapabilities(model="Unknown Model", chip="Unknown Chip", memory=0, flops=DeviceFlops(fp32=0, fp16=0, int8=0)) + +CHIP_FLOPS = { + # Source: https://www.cpu-monkey.com + # Note: currently no distinction between variants of M3 Max and M3 Pro, we pick the lower one to be conservative + ### M chips + "Apple M1": DeviceFlops(fp32=2.29*TFLOPS, fp16=4.58*TFLOPS, int8=9.16*TFLOPS), + "Apple M1 Pro": DeviceFlops(fp32=5.30*TFLOPS, fp16=10.60*TFLOPS, int8=21.20*TFLOPS), + "Apple M1 Max": DeviceFlops(fp32=10.60*TFLOPS, fp16=21.20*TFLOPS, int8=42.40*TFLOPS), + "Apple M1 Ultra": DeviceFlops(fp32=21.20*TFLOPS, fp16=42.40*TFLOPS, int8=84.80*TFLOPS), + "Apple M2": DeviceFlops(fp32=3.55*TFLOPS, fp16=7.10*TFLOPS, int8=14.20*TFLOPS), + "Apple M2 Pro": DeviceFlops(fp32=5.68*TFLOPS, fp16=11.36*TFLOPS, int8=22.72*TFLOPS), + "Apple M2 Max": DeviceFlops(fp32=13.49*TFLOPS, fp16=26.98*TFLOPS, int8=53.96*TFLOPS), + "Apple M2 Ultra": DeviceFlops(fp32=26.98*TFLOPS, fp16=53.96*TFLOPS, int8=107.92*TFLOPS), + "Apple M3": DeviceFlops(fp32=3.55*TFLOPS, fp16=7.10*TFLOPS, int8=14.20*TFLOPS), + "Apple M3 Max": DeviceFlops(fp32=14.20*TFLOPS, fp16=28.40*TFLOPS, int8=56.80*TFLOPS), + "Apple M3 Pro": DeviceFlops(fp32=4.97*TFLOPS, fp16=9.94*TFLOPS, int8=19.88*TFLOPS), + "Apple M4": DeviceFlops(fp32=3.55*TFLOPS, fp16=7.10*TFLOPS, int8=14.20*TFLOPS), + ### A chips + "Apple A13 Bionic": DeviceFlops(fp32=0.69*TFLOPS, fp16=1.38*TFLOPS, int8=2.76*TFLOPS), + "Apple A14 Bionic": DeviceFlops(fp32=0.75*TFLOPS, fp16=1.50*TFLOPS, int8=3.00*TFLOPS), + "Apple A15 Bionic": DeviceFlops(fp32=1.37*TFLOPS, fp16=2.74*TFLOPS, int8=5.48*TFLOPS), + "Apple A16 Bionic": DeviceFlops(fp32=1.79*TFLOPS, fp16=3.58*TFLOPS, int8=7.16*TFLOPS), + "Apple A17 Pro": DeviceFlops(fp32=2.15*TFLOPS, fp16=4.30*TFLOPS, int8=8.60*TFLOPS), + ### NVIDIA GPUs + # RTX 40 series + "NVIDIA GEFORCE RTX 4090": DeviceFlops(fp32=82.58*TFLOPS, fp16=165.16*TFLOPS, int8=330.32*TFLOPS), + "NVIDIA GEFORCE RTX 4080": DeviceFlops(fp32=48.74*TFLOPS, fp16=97.48*TFLOPS, int8=194.96*TFLOPS), + "NVIDIA GEFORCE RTX 4080 SUPER": DeviceFlops(fp32=52.0*TFLOPS, fp16=104.0*TFLOPS, int8=208.0*TFLOPS), + "NVIDIA GEFORCE RTX 4070 TI SUPER": DeviceFlops(fp32=40.0*TFLOPS, fp16=80.0*TFLOPS, int8=160.0*TFLOPS), + "NVIDIA GEFORCE RTX 4070 TI": DeviceFlops(fp32=39.43*TFLOPS, fp16=78.86*TFLOPS, int8=157.72*TFLOPS), + "NVIDIA GEFORCE RTX 4070 SUPER": DeviceFlops(fp32=30.0*TFLOPS, fp16=60.0*TFLOPS, int8=120.0*TFLOPS), + "NVIDIA GEFORCE RTX 4070": DeviceFlops(fp32=29.0*TFLOPS, fp16=58.0*TFLOPS, int8=116.0*TFLOPS), + "NVIDIA GEFORCE RTX 4060 TI 16GB": DeviceFlops(fp32=22.0*TFLOPS, fp16=44.0*TFLOPS, int8=88.0*TFLOPS), + # RTX 30 series + "NVIDIA GEFORCE RTX 3050": DeviceFlops(fp32=9.11*TFLOPS, fp16=18.22*TFLOPS, int8=36.44*TFLOPS), + "NVIDIA GEFORCE RTX 3060": DeviceFlops(fp32=13.0*TFLOPS, fp16=26.0*TFLOPS, int8=52.0*TFLOPS), + "NVIDIA GEFORCE RTX 3060 TI": DeviceFlops(fp32=16.2*TFLOPS, fp16=32.4*TFLOPS, int8=64.8*TFLOPS), + "NVIDIA GEFORCE RTX 3070": DeviceFlops(fp32=20.3*TFLOPS, fp16=40.6*TFLOPS, int8=81.2*TFLOPS), + "NVIDIA GEFORCE RTX 3070 TI": DeviceFlops(fp32=21.8*TFLOPS, fp16=43.6*TFLOPS, int8=87.2*TFLOPS), + "NVIDIA GEFORCE RTX 3080 (10 GB)": DeviceFlops(fp32=29.8*TFLOPS, fp16=59.6*TFLOPS, int8=119.2*TFLOPS), + "NVIDIA GEFORCE RTX 3080 (12 GB)": DeviceFlops(fp32=30.6*TFLOPS, fp16=61.2*TFLOPS, int8=122.4*TFLOPS), + "NVIDIA GEFORCE RTX 3080 TI": DeviceFlops(fp32=34.1*TFLOPS, fp16=68.2*TFLOPS, int8=136.4*TFLOPS), + "NVIDIA GEFORCE RTX 3090": DeviceFlops(fp32=35.6*TFLOPS, fp16=71.2*TFLOPS, int8=142.4*TFLOPS), + "NVIDIA GEFORCE RTX 3090 TI": DeviceFlops(fp32=40.0*TFLOPS, fp16=80.0*TFLOPS, int8=160.0*TFLOPS), + # RTX 20 series + "NVIDIA GEFORCE RTX 2060": DeviceFlops(fp32=6.45*TFLOPS, fp16=12.9*TFLOPS, int8=25.8*TFLOPS), + "NVIDIA GEFORCE RTX 2060 SUPER": DeviceFlops(fp32=7.2*TFLOPS, fp16=14.4*TFLOPS, int8=28.8*TFLOPS), + "NVIDIA GEFORCE RTX 2070": DeviceFlops(fp32=7.46*TFLOPS, fp16=14.93*TFLOPS, int8=29.86*TFLOPS), + "NVIDIA GEFORCE RTX 2070 SUPER": DeviceFlops(fp32=9.06*TFLOPS, fp16=18.12*TFLOPS, int8=36.24*TFLOPS), + "NVIDIA GEFORCE RTX 2080": DeviceFlops(fp32=10.07*TFLOPS, fp16=20.14*TFLOPS, int8=40.28*TFLOPS), + "NVIDIA GEFORCE RTX 2080 SUPER": DeviceFlops(fp32=11.15*TFLOPS, fp16=22.30*TFLOPS, int8=44.60*TFLOPS), + "NVIDIA TITAN RTX": DeviceFlops(fp32=16.31*TFLOPS, fp16=32.62*TFLOPS, int8=65.24*TFLOPS), + # QUATRO RTX Ampere series + "NVIDIA QUATRO RTX A2000": DeviceFlops(fp32=7.99*TFLOPS, fp16=7.99*TFLOPS, int8=31.91*TFLOPS), + "NVIDIA QUATRO RTX A4000": DeviceFlops(fp32=19.17*TFLOPS, fp16=19.17*TFLOPS, int8=76.68*TFLOPS), + "NVIDIA QUATRO RTX A4500": DeviceFlops(fp32=23.65*TFLOPS, fp16=23.65*TFLOPS, int8=94.6*TFLOPS), + "NVIDIA QUATRO RTX A5000": DeviceFlops(fp32=27.8*TFLOPS, fp16=27.8*TFLOPS, int8=111.2*TFLOPS), + "NVIDIA QUATRO RTX A6000": DeviceFlops(fp32=38.71*TFLOPS, fp16=38.71*TFLOPS, int8=154.84*TFLOPS), + # Common Server GPUs + "NVIDIA A40 48GB PCIE": DeviceFlops(fp32=37.4*TFLOPS, fp16=149.7*TFLOPS, int8=299.3*TFLOPS), + "NVIDIA A100 40GB PCIE": DeviceFlops(fp32=19.5*TFLOPS, fp16=312.0*TFLOPS, int8=624.0*TFLOPS), + "NVIDIA A800 40GB PCIE": DeviceFlops(fp32=19.5*TFLOPS, fp16=312.0*TFLOPS, int8=624.0*TFLOPS), + "NVIDIA A100 80GB PCIE": DeviceFlops(fp32=19.5*TFLOPS, fp16=312.0*TFLOPS, int8=624.0*TFLOPS), + "NVIDIA A800 80GB PCIE": DeviceFlops(fp32=19.5*TFLOPS, fp16=312.0*TFLOPS, int8=624.0*TFLOPS), + "NVIDIA A100 80GB SXM": DeviceFlops(fp32=19.5*TFLOPS, fp16=312.0*TFLOPS, int8=624.0*TFLOPS), + "NVIDIA A800 80GB SXM": DeviceFlops(fp32=19.5*TFLOPS, fp16=312.0*TFLOPS, int8=624.0*TFLOPS), + "NVIDIA T1000 8GB": DeviceFlops(fp32=2.5 * TFLOPS, fp16=5.0 * TFLOPS, int8=10.0 * TFLOPS), + "Quadro M2000": DeviceFlops(fp32=0.5 * TFLOPS, fp16=1.0 * TFLOPS, int8=2.0 * TFLOPS), + "Quadro P400": DeviceFlops(fp32=0.641 * TFLOPS, fp16=1.282 * TFLOPS, int8=2.564 * TFLOPS), + # ... add more devices if needed ... + ### AMD GPUs + # RX 6000 series + "AMD Radeon RX 6900 XT": DeviceFlops(fp32=23.04*TFLOPS, fp16=46.08*TFLOPS, int8=92.16*TFLOPS), + "AMD Radeon RX 6800 XT": DeviceFlops(fp32=20.74*TFLOPS, fp16=41.48*TFLOPS, int8=82.96*TFLOPS), + "AMD Radeon RX 6800": DeviceFlops(fp32=16.17*TFLOPS, fp16=32.34*TFLOPS, int8=64.68*TFLOPS), + "AMD Radeon RX 6700 XT": DeviceFlops(fp32=13.21*TFLOPS, fp16=26.42*TFLOPS, int8=52.84*TFLOPS), + "AMD Radeon RX 6700": DeviceFlops(fp32=11.4*TFLOPS, fp16=22.8*TFLOPS, int8=45.6*TFLOPS), + "AMD Radeon RX 6600 XT": DeviceFlops(fp32=10.6*TFLOPS, fp16=21.2*TFLOPS, int8=42.4*TFLOPS), + "AMD Radeon RX 6600": DeviceFlops(fp32=8.93*TFLOPS, fp16=17.86*TFLOPS, int8=35.72*TFLOPS), + "AMD Radeon RX 6500 XT": DeviceFlops(fp32=5.77*TFLOPS, fp16=11.54*TFLOPS, int8=23.08*TFLOPS), + "AMD Radeon RX 6400": DeviceFlops(fp32=3.57*TFLOPS, fp16=7.14*TFLOPS, int8=14.28*TFLOPS), + # RX 7000 series + "AMD Radeon RX 7900 XTX": DeviceFlops(fp32=61.4*TFLOPS, fp16=122.8*TFLOPS, int8=245.6*TFLOPS), + "AMD Radeon RX 7900 XT": DeviceFlops(fp32=53.4*TFLOPS, fp16=106.8*TFLOPS, int8=213.6*TFLOPS), + "AMD Radeon RX 7800 XT": DeviceFlops(fp32=42.6*TFLOPS, fp16=85.2*TFLOPS, int8=170.4*TFLOPS), + "AMD Radeon RX 7700 XT": DeviceFlops(fp32=34.2*TFLOPS, fp16=68.4*TFLOPS, int8=136.8*TFLOPS), + "AMD Radeon RX 7600": DeviceFlops(fp32=21.5*TFLOPS, fp16=43.0*TFLOPS, int8=86.0*TFLOPS), + "AMD Radeon RX 7500": DeviceFlops(fp32=16.2*TFLOPS, fp16=32.4*TFLOPS, int8=64.8*TFLOPS), + ### Qualcomm embedded chips: TODO +} +CHIP_FLOPS.update({f"LAPTOP GPU {key}": value for key, value in CHIP_FLOPS.items()}) +CHIP_FLOPS.update({f"Laptop GPU {key}": value for key, value in CHIP_FLOPS.items()}) +CHIP_FLOPS.update({f"{key} LAPTOP GPU": value for key, value in CHIP_FLOPS.items()}) +CHIP_FLOPS.update({f"{key} Laptop GPU": value for key, value in CHIP_FLOPS.items()}) + + +def device_capabilities() -> DeviceCapabilities: + if psutil.MACOS: + return mac_device_capabilities() + elif psutil.LINUX: + return linux_device_capabilities() + else: + return DeviceCapabilities( + model="Unknown Device", + chip="Unknown Chip", + memory=psutil.virtual_memory().total // 2**20, + flops=DeviceFlops(fp32=0, fp16=0, int8=0), + ) + + +def mac_device_capabilities() -> DeviceCapabilities: + # Fetch the model of the Mac using system_profiler + model = subprocess.check_output(["system_profiler", "SPHardwareDataType"]).decode("utf-8") + model_line = next((line for line in model.split("\n") if "Model Name" in line), None) + model_id = model_line.split(": ")[1] if model_line else "Unknown Model" + chip_line = next((line for line in model.split("\n") if "Chip" in line), None) + chip_id = chip_line.split(": ")[1] if chip_line else "Unknown Chip" + memory_line = next((line for line in model.split("\n") if "Memory" in line), None) + memory_str = memory_line.split(": ")[1] if memory_line else "Unknown Memory" + memory_units = memory_str.split() + memory_value = int(memory_units[0]) + if memory_units[1] == "GB": + memory = memory_value*1024 + else: + memory = memory_value + + # Assuming static values for other attributes for demonstration + return DeviceCapabilities(model=model_id, chip=chip_id, memory=memory, flops=CHIP_FLOPS.get(chip_id, DeviceFlops(fp32=0, fp16=0, int8=0))) + + +def linux_device_capabilities() -> DeviceCapabilities: + import psutil + from tinygrad import Device + + if DEBUG >= 2: print(f"tinygrad {Device.DEFAULT=}") + if Device.DEFAULT == "CUDA" or Device.DEFAULT == "NV" or Device.DEFAULT == "GPU": + import pynvml + + pynvml.nvmlInit() + handle = pynvml.nvmlDeviceGetHandleByIndex(0) + gpu_name = pynvml.nvmlDeviceGetName(handle).upper() + gpu_memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle) + + if DEBUG >= 2: print(f"NVIDIA device {gpu_name=} {gpu_memory_info=}") + + return DeviceCapabilities( + model=f"Linux Box ({gpu_name})", + chip=gpu_name, + memory=gpu_memory_info.total // 2**20, + flops=CHIP_FLOPS.get(gpu_name, DeviceFlops(fp32=0, fp16=0, int8=0)), + ) + elif Device.DEFAULT == "AMD": + # TODO AMD support + return DeviceCapabilities( + model="Linux Box (AMD)", + chip="Unknown AMD", + memory=psutil.virtual_memory().total // 2**20, + flops=DeviceFlops(fp32=0, fp16=0, int8=0), + ) + else: + return DeviceCapabilities( + model=f"Linux Box (Device: {Device.DEFAULT})", + chip=f"Unknown Chip (Device: {Device.DEFAULT})", + memory=psutil.virtual_memory().total // 2**20, + flops=DeviceFlops(fp32=0, fp16=0, int8=0), + ) diff --git a/build/lib/exo/topology/partitioning_strategy.py b/build/lib/exo/topology/partitioning_strategy.py new file mode 100644 index 000000000..29c3dc6a9 --- /dev/null +++ b/build/lib/exo/topology/partitioning_strategy.py @@ -0,0 +1,40 @@ +from abc import ABC, abstractmethod +from typing import List +from dataclasses import dataclass +from .topology import Topology +from exo.inference.shard import Shard + + +# Partitions shard-space into pieces of contiguous shards, represented by floating point range [start, end) between 0 and 1 +@dataclass +class Partition: + node_id: str + start: float + end: float + + +class PartitioningStrategy(ABC): + @abstractmethod + def partition(self, topology: Topology) -> List[Partition]: + pass + + +def map_partitions_to_shards(partitions: List[Partition], num_layers: int, model_id: str) -> List[Shard]: + shards = [] + for i, partition in enumerate(partitions): + start_layer = int(partition.start*num_layers) + end_layer = int(partition.end*num_layers) - 1 + + # Ensure the last partition covers up to num_layers - 1 + if i == len(partitions) - 1: + end_layer = num_layers - 1 + + # Ensure no empty shards + if start_layer <= end_layer: + shards.append(Shard(model_id, start_layer, end_layer, num_layers)) + + # Ensure full coverage + if shards and shards[-1].end_layer < num_layers - 1: + shards[-1] = Shard(model_id, shards[-1].start_layer, num_layers - 1, num_layers) + + return shards diff --git a/build/lib/exo/topology/ring_memory_weighted_partitioning_strategy.py b/build/lib/exo/topology/ring_memory_weighted_partitioning_strategy.py new file mode 100644 index 000000000..6550aeb19 --- /dev/null +++ b/build/lib/exo/topology/ring_memory_weighted_partitioning_strategy.py @@ -0,0 +1,18 @@ +from typing import List +from .partitioning_strategy import PartitioningStrategy +from .topology import Topology +from .partitioning_strategy import Partition + + +class RingMemoryWeightedPartitioningStrategy(PartitioningStrategy): + def partition(self, topology: Topology) -> List[Partition]: + nodes = list(topology.all_nodes()) + nodes.sort(key=lambda x: (x[1].memory, x[0]), reverse=True) + total_memory = sum(node[1].memory for node in nodes) + partitions = [] + start = 0 + for node in nodes: + end = round(start + (node[1].memory/total_memory), 5) + partitions.append(Partition(node[0], start, end)) + start = end + return partitions diff --git a/build/lib/exo/topology/test_device_capabilities.py b/build/lib/exo/topology/test_device_capabilities.py new file mode 100644 index 000000000..5f8b4c3ac --- /dev/null +++ b/build/lib/exo/topology/test_device_capabilities.py @@ -0,0 +1,91 @@ +import unittest +from unittest.mock import patch +from exo.topology.device_capabilities import mac_device_capabilities, DeviceCapabilities, DeviceFlops, TFLOPS + + +class TestMacDeviceCapabilities(unittest.TestCase): + @patch("subprocess.check_output") + def test_mac_device_capabilities_pro(self, mock_check_output): + # Mock the subprocess output + mock_check_output.return_value = b""" +Hardware: + +Hardware Overview: + +Model Name: MacBook Pro +Model Identifier: Mac15,9 +Model Number: Z1CM000EFB/A +Chip: Apple M3 Max +Total Number of Cores: 16 (12 performance and 4 efficiency) +Memory: 128 GB +System Firmware Version: 10000.000.0 +OS Loader Version: 10000.000.0 +Serial Number (system): XXXXXXXXXX +Hardware UUID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX +Provisioning UDID: XXXXXXXX-XXXXXXXXXXXXXXXX +Activation Lock Status: Enabled +""" + + # Call the function + result = mac_device_capabilities() + + # Check the results + self.assertIsInstance(result, DeviceCapabilities) + self.assertEqual(result.model, "MacBook Pro") + self.assertEqual(result.chip, "Apple M3 Max") + self.assertEqual(result.memory, 131072) # 16 GB in MB + self.assertEqual( + str(result), + "Model: MacBook Pro. Chip: Apple M3 Max. Memory: 131072MB. Flops: 14.20 TFLOPS, fp16: 28.40 TFLOPS, int8: 56.80 TFLOPS", + ) + + @patch("subprocess.check_output") + def test_mac_device_capabilities_air(self, mock_check_output): + # Mock the subprocess output + mock_check_output.return_value = b""" +Hardware: + +Hardware Overview: + +Model Name: MacBook Air +Model Identifier: Mac14,2 +Model Number: MLY33B/A +Chip: Apple M2 +Total Number of Cores: 8 (4 performance and 4 efficiency) +Memory: 8 GB +System Firmware Version: 10000.00.0 +OS Loader Version: 10000.00.0 +Serial Number (system): XXXXXXXXXX +Hardware UUID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX +Provisioning UDID: XXXXXXXX-XXXXXXXXXXXXXXXX +Activation Lock Status: Disabled +""" + + # Call the function + result = mac_device_capabilities() + + # Check the results + self.assertIsInstance(result, DeviceCapabilities) + self.assertEqual(result.model, "MacBook Air") + self.assertEqual(result.chip, "Apple M2") + self.assertEqual(result.memory, 8192) # 8 GB in MB + + @unittest.skip("Unskip this test when running on a MacBook Pro, Apple M3 Max, 128GB") + def test_mac_device_capabilities_real(self): + # Call the function without mocking + result = mac_device_capabilities() + + # Check the results + self.assertIsInstance(result, DeviceCapabilities) + self.assertEqual(result.model, "MacBook Pro") + self.assertEqual(result.chip, "Apple M3 Max") + self.assertEqual(result.memory, 131072) # 128 GB in MB + self.assertEqual(result.flops, DeviceFlops(fp32=14.20*TFLOPS, fp16=28.40*TFLOPS, int8=56.80*TFLOPS)) + self.assertEqual( + str(result), + "Model: MacBook Pro. Chip: Apple M3 Max. Memory: 131072MB. Flops: 14.20 TFLOPS, fp16: 28.40 TFLOPS, int8: 56.80 TFLOPS", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/build/lib/exo/topology/test_map_partitions.py b/build/lib/exo/topology/test_map_partitions.py new file mode 100644 index 000000000..5254915e6 --- /dev/null +++ b/build/lib/exo/topology/test_map_partitions.py @@ -0,0 +1,81 @@ +import unittest +from typing import List +from exo.topology.partitioning_strategy import Partition, map_partitions_to_shards +from exo.inference.shard import Shard + + +class TestRingMemoryWeightedPartitioningStrategy(unittest.TestCase): + def test_map_partitions_to_shards(self): + partitions = [ + Partition("node1", 0.0, 0.42857), + Partition("node2", 0.42857, 0.71428), + Partition("node3", 0.71428, 0.99999), + ] + shards = map_partitions_to_shards(partitions, 32, "model") + self.assertEqual( + shards, + [ + Shard("model", 0, 12, 32), + Shard("model", 13, 21, 32), + Shard("model", 22, 31, 32), + ], + ) + + partitions = [ + Partition("node1", 0.0, 0.1), + Partition("node2", 0.1, 0.2), + Partition("node3", 0.2, 1.0), + ] + shards = map_partitions_to_shards(partitions, 32, "model") + self.assertEqual( + shards, + [ + Shard("model", 0, 2, 32), + Shard("model", 3, 5, 32), + Shard("model", 6, 31, 32), + ], + ) + + partitions = [ + Partition("node1", 0.0, 1.0), + ] + shards = map_partitions_to_shards(partitions, 32, "model") + self.assertEqual( + shards, + [ + Shard("model", 0, 31, 32), + ], + ) + + partitions = [] + shards = map_partitions_to_shards(partitions, 32, "model") + self.assertEqual(shards, []) + + def test_broken_map_partitions_to_shards(self): + # this was an old broken implementation that sometimes had rounding errors! + def _broken_map_partitions_to_shards(partitions: List[Partition], num_layers, model_id: str): + shards = [] + for i, partition in enumerate(partitions): + start_layer = int(partition.start*num_layers) + end_layer = int(partition.end*num_layers) - 1 + shards.append(Shard(model_id, start_layer, end_layer, num_layers)) + return shards + + partitions = [ + Partition("node1", 0.0, 0.42857), + Partition("node2", 0.42857, 0.71428), + Partition("node3", 0.71428, 0.99999), + ] + shards = _broken_map_partitions_to_shards(partitions, 32, "model") + self.assertEqual( + shards, + [ + Shard("model", 0, 12, 32), + Shard("model", 13, 21, 32), + Shard("model", 22, 30, 32), + ], + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/build/lib/exo/topology/test_ring_memory_weighted_partitioning_strategy.py b/build/lib/exo/topology/test_ring_memory_weighted_partitioning_strategy.py new file mode 100644 index 000000000..fd466f367 --- /dev/null +++ b/build/lib/exo/topology/test_ring_memory_weighted_partitioning_strategy.py @@ -0,0 +1,90 @@ +import unittest +from exo.topology.ring_memory_weighted_partitioning_strategy import RingMemoryWeightedPartitioningStrategy +from exo.topology.topology import Topology +from exo.topology.device_capabilities import DeviceCapabilities, DeviceFlops +from exo.topology.partitioning_strategy import Partition + + +class TestRingMemoryWeightedPartitioningStrategy(unittest.TestCase): + def test_partition(self): + # triangle + # node1 -> node2 -> node3 -> node1 + topology = Topology() + topology.update_node( + "node1", + DeviceCapabilities(model="test1", chip="test1", memory=3000, flops=DeviceFlops(fp32=0, fp16=0, int8=0)), + ) + topology.update_node( + "node2", + DeviceCapabilities(model="test2", chip="test2", memory=1000, flops=DeviceFlops(fp32=0, fp16=0, int8=0)), + ) + topology.update_node( + "node3", + DeviceCapabilities(model="test3", chip="test3", memory=6000, flops=DeviceFlops(fp32=0, fp16=0, int8=0)), + ) + topology.add_edge("node1", "node2") + topology.add_edge("node2", "node3") + topology.add_edge("node3", "node1") + topology.add_edge("node1", "node3") + + strategy = RingMemoryWeightedPartitioningStrategy() + partitions = strategy.partition(topology) + + self.assertEqual(len(partitions), 3) + self.assertEqual( + partitions, + [ + Partition("node3", 0.0, 0.6), + Partition("node1", 0.6, 0.9), + Partition("node2", 0.9, 1.0), + ], + ) + + def test_partition_rounding(self): + # triangle + # node1 -> node2 -> node3 -> node1 + topology = Topology() + topology.update_node( + "node1", + DeviceCapabilities( + model="MacBook Pro", + chip="test1", + memory=128*1024*1024*1024, + flops=DeviceFlops(fp32=0, fp16=0, int8=0), + ), + ) + topology.update_node( + "node2", + DeviceCapabilities( + model="Mac Studio", + chip="test2", + memory=192*1024*1024*1024, + flops=DeviceFlops(fp32=0, fp16=0, int8=0), + ), + ) + topology.update_node( + "node3", + DeviceCapabilities( + model="MacBook Pro", + chip="test3", + memory=128*1024*1024*1024, + flops=DeviceFlops(fp32=0, fp16=0, int8=0), + ), + ) + + strategy = RingMemoryWeightedPartitioningStrategy() + partitions = strategy.partition(topology) + + self.assertEqual(len(partitions), 3) + self.assertEqual( + partitions, + [ + Partition("node3", 0.0, 0.42857), + Partition("node1", 0.6, 0.9), + Partition("node2", 0.9, 1.0), + ], + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/build/lib/exo/topology/topology.py b/build/lib/exo/topology/topology.py new file mode 100644 index 000000000..46b512e50 --- /dev/null +++ b/build/lib/exo/topology/topology.py @@ -0,0 +1,49 @@ +from .device_capabilities import DeviceCapabilities +from typing import Dict, Set, Optional + + +class Topology: + def __init__(self): + self.nodes: Dict[str, DeviceCapabilities] = {} # Maps node IDs to DeviceCapabilities + self.peer_graph: Dict[str, Set[str]] = {} # Adjacency list representing the graph + self.active_node_id: Optional[str] = None + + def update_node(self, node_id: str, device_capabilities: DeviceCapabilities): + self.nodes[node_id] = device_capabilities + + def get_node(self, node_id: str) -> DeviceCapabilities: + return self.nodes.get(node_id) + + def all_nodes(self): + return self.nodes.items() + + def add_edge(self, node1_id: str, node2_id: str): + if node1_id not in self.peer_graph: + self.peer_graph[node1_id] = set() + if node2_id not in self.peer_graph: + self.peer_graph[node2_id] = set() + self.peer_graph[node1_id].add(node2_id) + self.peer_graph[node2_id].add(node1_id) + + def get_neighbors(self, node_id: str) -> Set[str]: + return self.peer_graph.get(node_id, set()) + + def all_edges(self): + edges = [] + for node, neighbors in self.peer_graph.items(): + for neighbor in neighbors: + if (neighbor, node) not in edges: # Avoid duplicate edges + edges.append((node, neighbor)) + return edges + + def merge(self, other: "Topology"): + for node_id, capabilities in other.nodes.items(): + self.update_node(node_id, capabilities) + for node_id, neighbors in other.peer_graph.items(): + for neighbor in neighbors: + self.add_edge(node_id, neighbor) + + def __str__(self): + nodes_str = ", ".join(f"{node_id}: {cap}" for node_id, cap in self.nodes.items()) + edges_str = ", ".join(f"{node}: {neighbors}" for node, neighbors in self.peer_graph.items()) + return f"Topology(Nodes: {{{nodes_str}}}, Edges: {{{edges_str}}})" diff --git a/build/lib/exo/viz/__init__.py b/build/lib/exo/viz/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/build/lib/exo/viz/test_topology_viz.py b/build/lib/exo/viz/test_topology_viz.py new file mode 100644 index 000000000..e57de1ae3 --- /dev/null +++ b/build/lib/exo/viz/test_topology_viz.py @@ -0,0 +1,129 @@ +import asyncio +import unittest +from datetime import timedelta +from exo.viz.topology_viz import TopologyViz +from exo.topology.topology import Topology +from exo.topology.device_capabilities import DeviceCapabilities, DeviceFlops +from exo.topology.partitioning_strategy import Partition +from exo.download.hf.hf_helpers import RepoProgressEvent, RepoFileProgressEvent + + +def create_hf_repo_progress_event( + completed_files: int = 5, + total_files: int = 10, + downloaded_bytes: int = 500000000, + downloaded_bytes_this_session: int = 250000000, + total_bytes: int = 1000000000, + overall_speed: int = 5000000, + overall_eta: timedelta = timedelta(seconds=100), + file_progress: dict = None, + status: str = "in_progress" +) -> RepoProgressEvent: + if file_progress is None: + file_progress = { + "file1.bin": + RepoFileProgressEvent( + repo_id="repo_id", + repo_revision="repo_revision", + file_path="file1.bin", + downloaded=100000000, + downloaded_this_session=50000000, + total=200000000, + speed=1000000, + eta=timedelta(seconds=100), + status="in_progress" + ), "file2.bin": + RepoFileProgressEvent( + repo_id="repo_id", + repo_revision="repo_revision", + file_path="file2.bin", + downloaded=200000000, + downloaded_this_session=100000000, + total=200000000, + speed=2000000, + eta=timedelta(seconds=0), + status="complete" + ) + } + + return RepoProgressEvent( + repo_id="repo_id", + repo_revision="repo_revision", + completed_files=completed_files, + total_files=total_files, + downloaded_bytes=downloaded_bytes, + downloaded_bytes_this_session=downloaded_bytes_this_session, + total_bytes=total_bytes, + overall_speed=overall_speed, + overall_eta=overall_eta, + file_progress=file_progress, + status=status + ) + + +class TestNodeViz(unittest.IsolatedAsyncioTestCase): + async def asyncSetUp(self): + self.topology = Topology() + self.topology.update_node( + "node1", + DeviceCapabilities(model="ModelA", chip="ChipA", memory=8*1024, flops=DeviceFlops(fp32=1.0, fp16=2.0, int8=4.0)), + ) + self.topology.update_node( + "node2", + DeviceCapabilities(model="ModelB", chip="ChipB", memory=16*1024, flops=DeviceFlops(fp32=2.0, fp16=4.0, int8=8.0)), + ) + self.topology.update_node( + "node3", + DeviceCapabilities(model="ModelC", chip="ChipC", memory=32*1024, flops=DeviceFlops(fp32=4.0, fp16=8.0, int8=16.0)), + ) + self.topology.update_node( + "node4", + DeviceCapabilities(model="ModelD", chip="ChipD", memory=64*1024, flops=DeviceFlops(fp32=8.0, fp16=16.0, int8=32.0)), + ) + + self.top_viz = TopologyViz() + await asyncio.sleep(2) # Simulate running for a short time + + async def test_layout_generation(self): + # self.top_viz._generate_layout() + self.top_viz.refresh() + import time + + time.sleep(2) + self.top_viz.update_visualization( + self.topology, + [ + Partition("node1", 0, 0.2), + Partition("node4", 0.2, 0.4), + Partition("node2", 0.4, 0.8), + Partition("node3", 0.8, 0.9), + ], + "node1", + { + "node1": create_hf_repo_progress_event(), + "node2": create_hf_repo_progress_event(), + "node3": create_hf_repo_progress_event(), + "node4": create_hf_repo_progress_event(), + }, + ) + time.sleep(2) + self.topology.active_node_id = "node3" + self.top_viz.update_visualization( + self.topology, + [ + Partition("node1", 0, 0.3), + Partition("node5", 0.3, 0.5), + Partition("node2", 0.5, 0.7), + Partition("node4", 0.7, 0.9), + ], + "node5", + { + "node1": create_hf_repo_progress_event(), + "node5": create_hf_repo_progress_event(), + }, + ) + time.sleep(2) + + +if __name__ == "__main__": + unittest.main() diff --git a/build/lib/exo/viz/topology_viz.py b/build/lib/exo/viz/topology_viz.py new file mode 100644 index 000000000..3664f3783 --- /dev/null +++ b/build/lib/exo/viz/topology_viz.py @@ -0,0 +1,307 @@ +import math +from collections import OrderedDict +from typing import List, Optional, Tuple, Dict +from exo.helpers import exo_text, pretty_print_bytes, pretty_print_bytes_per_second +from exo.topology.topology import Topology +from exo.topology.partitioning_strategy import Partition +from exo.download.hf.hf_helpers import RepoProgressEvent +from exo.topology.device_capabilities import UNKNOWN_DEVICE_CAPABILITIES +from rich.console import Console, Group +from rich.text import Text +from rich.live import Live +from rich.style import Style +from rich.table import Table +from rich.layout import Layout +from rich.syntax import Syntax +from rich.panel import Panel +from rich.markdown import Markdown + + +class TopologyViz: + def __init__(self, chatgpt_api_endpoints: List[str] = [], web_chat_urls: List[str] = []): + self.chatgpt_api_endpoints = chatgpt_api_endpoints + self.web_chat_urls = web_chat_urls + self.topology = Topology() + self.partitions: List[Partition] = [] + self.node_id = None + self.node_download_progress: Dict[str, RepoProgressEvent] = {} + self.requests: OrderedDict[str, Tuple[str, str]] = {} + + self.console = Console() + self.layout = Layout() + self.layout.split(Layout(name="main"), Layout(name="prompt_output", size=15), Layout(name="download", size=25)) + self.main_panel = Panel(self._generate_main_layout(), title="Exo Cluster (0 nodes)", border_style="bright_yellow") + self.prompt_output_panel = Panel("", title="Prompt and Output", border_style="green") + self.download_panel = Panel("", title="Download Progress", border_style="cyan") + self.layout["main"].update(self.main_panel) + self.layout["prompt_output"].update(self.prompt_output_panel) + self.layout["download"].update(self.download_panel) + + # Initially hide the prompt_output panel + self.layout["prompt_output"].visible = False + self.live_panel = Live(self.layout, auto_refresh=False, console=self.console) + self.live_panel.start() + + def update_visualization(self, topology: Topology, partitions: List[Partition], node_id: Optional[str] = None, node_download_progress: Dict[str, RepoProgressEvent] = {}): + self.topology = topology + self.partitions = partitions + self.node_id = node_id + if node_download_progress: + self.node_download_progress = node_download_progress + self.refresh() + + def update_prompt(self, request_id: str, prompt: Optional[str] = None): + if request_id in self.requests: + self.requests[request_id] = [prompt, self.requests[request_id][1]] + else: + self.requests[request_id] = [prompt, ""] + self.refresh() + + def update_prompt_output(self, request_id: str, output: Optional[str] = None): + if request_id in self.requests: + self.requests[request_id] = [self.requests[request_id][0], output] + else: + self.requests[request_id] = ["", output] + self.refresh() + + def refresh(self): + self.main_panel.renderable = self._generate_main_layout() + # Update the panel title with the number of nodes and partitions + node_count = len(self.topology.nodes) + self.main_panel.title = f"Exo Cluster ({node_count} node{'s' if node_count != 1 else ''})" + + # Update and show/hide prompt and output panel + if any(r[0] or r[1] for r in self.requests.values()): + self.prompt_output_panel = self._generate_prompt_output_layout() + self.layout["prompt_output"].update(self.prompt_output_panel) + self.layout["prompt_output"].visible = True + else: + self.layout["prompt_output"].visible = False + + # Only show download_panel if there are in-progress downloads + if any(progress.status == "in_progress" for progress in self.node_download_progress.values()): + self.download_panel.renderable = self._generate_download_layout() + self.layout["download"].visible = True + else: + self.layout["download"].visible = False + + self.live_panel.update(self.layout, refresh=True) + + def _generate_prompt_output_layout(self) -> Panel: + content = [] + requests = list(self.requests.values())[-3:] # Get the 3 most recent requests + max_width = self.console.width - 6 # Full width minus padding and icon + max_lines = 13 # Maximum number of lines for the entire panel content + + for (prompt, output) in reversed(requests): + prompt_icon, output_icon = "💬️", "🤖" + + # Process prompt + prompt_lines = prompt.split('\n') + if len(prompt_lines) > max_lines // 2: + prompt_lines = prompt_lines[:max_lines//2 - 1] + ['...'] + prompt_text = Text(f"{prompt_icon} ", style="bold bright_blue") + prompt_text.append('\n'.join(line[:max_width] for line in prompt_lines), style="white") + + # Process output + output_lines = output.split('\n') + remaining_lines = max_lines - len(prompt_lines) - 2 # -2 for spacing + if len(output_lines) > remaining_lines: + output_lines = output_lines[:remaining_lines - 1] + ['...'] + output_text = Text(f"\n{output_icon} ", style="bold bright_magenta") + output_text.append('\n'.join(line[:max_width] for line in output_lines), style="white") + + content.append(prompt_text) + content.append(output_text) + content.append(Text()) # Empty line between entries + + return Panel( + Group(*content), + title="", + border_style="cyan", + height=15, # Increased height to accommodate multiple lines + expand=True # Allow the panel to expand to full width + ) + + def _generate_main_layout(self) -> str: + # Calculate visualization parameters + num_partitions = len(self.partitions) + radius_x = 30 + radius_y = 12 + center_x, center_y = 50, 24 # Increased center_y to add more space + + # Generate visualization + visualization = [[" " for _ in range(100)] for _ in range(48)] # Increased height to 48 + + # Add exo_text at the top in bright yellow + exo_lines = exo_text.split("\n") + yellow_style = Style(color="bright_yellow") + max_line_length = max(len(line) for line in exo_lines) + for i, line in enumerate(exo_lines): + centered_line = line.center(max_line_length) + start_x = (100-max_line_length) // 2 + 15 + colored_line = Text(centered_line, style=yellow_style) + for j, char in enumerate(str(colored_line)): + if 0 <= start_x + j < 100 and i < len(visualization): + visualization[i][start_x + j] = char + + # Display chatgpt_api_endpoints and web_chat_urls + info_lines = [] + if len(self.web_chat_urls) > 0: + info_lines.append(f"Web Chat URL (tinychat): {' '.join(self.web_chat_urls[:1])}") + if len(self.chatgpt_api_endpoints) > 0: + info_lines.append(f"ChatGPT API endpoint: {' '.join(self.chatgpt_api_endpoints[:1])}") + + info_start_y = len(exo_lines) + 1 + for i, line in enumerate(info_lines): + start_x = (100 - len(line)) // 2 + 15 + for j, char in enumerate(line): + if 0 <= start_x + j < 100 and info_start_y + i < 48: + visualization[info_start_y + i][start_x + j] = char + + # Calculate total FLOPS and position on the bar + total_flops = sum(self.topology.nodes.get(partition.node_id, UNKNOWN_DEVICE_CAPABILITIES).flops.fp16 for partition in self.partitions) + bar_pos = (math.tanh(total_flops/20 - 2) + 1)/2 + + # Add GPU poor/rich bar + bar_width = 30 + bar_start_x = (100-bar_width) // 2 + bar_y = info_start_y + len(info_lines) + 1 + + # Create a gradient bar using emojis + gradient_bar = Text() + emojis = ["🟥", "🟧", "🟨", "🟩"] + for i in range(bar_width): + emoji_index = min(int(i/(bar_width/len(emojis))), len(emojis) - 1) + gradient_bar.append(emojis[emoji_index]) + + # Add the gradient bar to the visualization + visualization[bar_y][bar_start_x - 1] = "[" + visualization[bar_y][bar_start_x + bar_width] = "]" + for i, segment in enumerate(str(gradient_bar)): + visualization[bar_y][bar_start_x + i] = segment + + # Add labels + visualization[bar_y - 1][bar_start_x - 10:bar_start_x - 3] = "GPU poor" + visualization[bar_y - 1][bar_start_x + bar_width*2 + 2:bar_start_x + bar_width*2 + 11] = "GPU rich" + + # Add position indicator and FLOPS value + pos_x = bar_start_x + int(bar_pos*bar_width) + flops_str = f"{total_flops:.2f} TFLOPS" + visualization[bar_y - 1][pos_x] = "▼" + visualization[bar_y + 1][pos_x - len(flops_str) // 2:pos_x + len(flops_str) // 2 + len(flops_str) % 2] = flops_str + visualization[bar_y + 2][pos_x] = "▲" + + # Add an extra empty line for spacing + bar_y += 4 + + for i, partition in enumerate(self.partitions): + device_capabilities = self.topology.nodes.get(partition.node_id, UNKNOWN_DEVICE_CAPABILITIES) + + angle = 2*math.pi*i/num_partitions + x = int(center_x + radius_x*math.cos(angle)) + y = int(center_y + radius_y*math.sin(angle)) + + # Place node with different color for active node and this node + if partition.node_id == self.topology.active_node_id: + visualization[y][x] = "🔴" + elif partition.node_id == self.node_id: + visualization[y][x] = "🟢" + else: + visualization[y][x] = "🔵" + + # Place node info (model, memory, TFLOPS, partition) on three lines + node_info = [ + f"{device_capabilities.model} {device_capabilities.memory // 1024}GB", + f"{device_capabilities.flops.fp16}TFLOPS", + f"[{partition.start:.2f}-{partition.end:.2f}]", + ] + + # Calculate info position based on angle + info_distance_x = radius_x + 6 + info_distance_y = radius_y + 3 + info_x = int(center_x + info_distance_x*math.cos(angle)) + info_y = int(center_y + info_distance_y*math.sin(angle)) + + # Adjust text position to avoid overwriting the node icon and prevent cutoff + if info_x < x: + info_x = max(0, x - len(max(node_info, key=len)) - 1) + elif info_x > x: + info_x = min(99 - len(max(node_info, key=len)), info_x) + + # Adjust for top and bottom nodes + if 5*math.pi/4 < angle < 7*math.pi/4: + info_x += 4 + elif math.pi/4 < angle < 3*math.pi/4: + info_x += 3 + info_y -= 2 + + for j, line in enumerate(node_info): + for k, char in enumerate(line): + if 0 <= info_y + j < 48 and 0 <= info_x + k < 100: + if info_y + j != y or info_x + k != x: + visualization[info_y + j][info_x + k] = char + + # Draw line to next node + next_i = (i+1) % num_partitions + next_angle = 2*math.pi*next_i/num_partitions + next_x = int(center_x + radius_x*math.cos(next_angle)) + next_y = int(center_y + radius_y*math.sin(next_angle)) + + # Simple line drawing + steps = max(abs(next_x - x), abs(next_y - y)) + for step in range(1, steps): + line_x = int(x + (next_x-x)*step/steps) + line_y = int(y + (next_y-y)*step/steps) + if 0 <= line_y < 48 and 0 <= line_x < 100: + visualization[line_y][line_x] = "-" + + # Convert to string + return "\n".join("".join(str(char) for char in row) for row in visualization) + + def _generate_download_layout(self) -> Table: + summary = Table(show_header=False, box=None, padding=(0, 1), expand=True) + summary.add_column("Info", style="cyan", no_wrap=True, ratio=50) + summary.add_column("Progress", style="cyan", no_wrap=True, ratio=40) + summary.add_column("Percentage", style="cyan", no_wrap=True, ratio=10) + + # Current node download progress + if self.node_id in self.node_download_progress: + download_progress = self.node_download_progress[self.node_id] + title = f"Downloading model {download_progress.repo_id}@{download_progress.repo_revision} ({download_progress.completed_files}/{download_progress.total_files}):" + summary.add_row(Text(title, style="bold")) + progress_info = f"{pretty_print_bytes(download_progress.downloaded_bytes)} / {pretty_print_bytes(download_progress.total_bytes)} ({pretty_print_bytes_per_second(download_progress.overall_speed)})" + summary.add_row(progress_info) + + eta_info = f"{download_progress.overall_eta}" + summary.add_row(eta_info) + + summary.add_row("") # Empty row for spacing + + for file_path, file_progress in download_progress.file_progress.items(): + if file_progress.status != "complete": + progress = int(file_progress.downloaded/file_progress.total*30) + bar = f"[{'=' * progress}{' ' * (30 - progress)}]" + percentage = f"{file_progress.downloaded / file_progress.total * 100:.0f}%" + summary.add_row(Text(file_path[:30], style="cyan"), bar, percentage) + + summary.add_row("") # Empty row for spacing + + # Other nodes download progress summary + summary.add_row(Text("Other Nodes Download Progress:", style="bold")) + for node_id, progress in self.node_download_progress.items(): + if node_id != self.node_id: + device = self.topology.nodes.get(node_id) + partition = next((p for p in self.partitions if p.node_id == node_id), None) + partition_info = f"[{partition.start:.2f}-{partition.end:.2f}]" if partition else "" + percentage = progress.downloaded_bytes/progress.total_bytes*100 if progress.total_bytes > 0 else 0 + speed = pretty_print_bytes_per_second(progress.overall_speed) + device_info = f"{device.model if device else 'Unknown Device'} {device.memory // 1024 if device else '?'}GB {partition_info}" + progress_info = f"{progress.repo_id}@{progress.repo_revision} ({speed})" + progress_bar = f"[{'=' * int(percentage // 3.33)}{' ' * (30 - int(percentage // 3.33))}]" + percentage_str = f"{percentage:.1f}%" + eta_str = f"{progress.overall_eta}" + summary.add_row(device_info, progress_info, percentage_str) + summary.add_row("", progress_bar, eta_str) + + return summary From 0e9f42a283dff94e0a153a0c5db73a077f62e458 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 23 Nov 2024 23:02:34 -0900 Subject: [PATCH 492/589] fixing torchtune module issues --- exo/inference/inference_engine.py | 8 +++++++- exo/inference/torch/models/llama3.py | 3 +++ exo/inference/torch/models/llm_utils.py | 8 +++----- exo/inference/torch/pt_inference.py | 13 +++++++++++-- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/exo/inference/inference_engine.py b/exo/inference/inference_engine.py index d698d26fc..358297a2f 100644 --- a/exo/inference/inference_engine.py +++ b/exo/inference/inference_engine.py @@ -30,7 +30,13 @@ def get_inference_engine(inference_engine_name: str, shard_downloader: 'ShardDow tinygrad.helpers.DEBUG.value = int(os.getenv("TINYGRAD_DEBUG", default="0")) return TinygradDynamicShardInferenceEngine(shard_downloader) + elif inference_engine_name == "torch": + from exo.inference.torch.pt_inference import TorchDynamicShardInferenceEngine + return TorchDynamicShardInferenceEngine(shard_downloader) + elif inference_engine_name == "hf": + from exo.inference.torch.hf_inference import HFDynamicShardInferenceEngine + return HFDynamicShardInferenceEngine(shard_downloader) elif inference_engine_name == "dummy": from exo.inference.dummy_inference_engine import DummyInferenceEngine return DummyInferenceEngine() - raise ValueError(f"Unsupported inference engine: {inference_engine_name}") \ No newline at end of file + raise ValueError(f"Unsupported inference engine: {inference_engine_name}") diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index feef0baa8..750ca6d02 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -70,7 +70,9 @@ def setup_caches( else: self.decoder_max_cache_seq_len = self.max_seq_len + print(f"decoder max: {self.decoder_max_cache_seq_len}") for layer in self.layers: + print(f"setting cache for {layer} if not none") if layer is not None: layer.setup_caches( batch_size, @@ -300,6 +302,7 @@ def generate( print(self.model) if not self.model.caches_are_enabled() and self.use_cache: with self.device: + print("setting up cache") self.model.setup_caches( bsz, self.dtype, diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 9edd779aa..7bbe44a73 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -161,9 +161,9 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): # paried_lmhead = False # get everything else except layers, embed_tokens and lm_head - if len(re.findall(r"model\.layers\..*", key)) == 0 and key != "model.embed_tokens.weight" and key != "lm_head.weight": + #if len(re.findall(r"model\.layers\..*", key)) == 0 and key != "model.embed_tokens.weight" and key != "lm_head.weight": # print(f"loading other weight: {key}") - remapped_state_dict[key] = value + #remapped_state_dict[key] = value # if paried_lmhead: # print(f"model.output.weight: {paried_embed_weight}") @@ -172,8 +172,6 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): # print("\nRemapped state dict\n") # for rsdk in remapped_state_dict.keys(): # print(f"-- {rsdk}") - del state_dict - del full_state_dict model.load_state_dict(remapped_state_dict, strict=False) # if DEBUG >= 7: @@ -236,4 +234,4 @@ def forward(self, hidden_states): hidden_states = hidden_states.to(torch.float32) variance = hidden_states.pow(2).mean(-1, keepdim=True) hidden_states = hidden_states * torch.rsqrt(variance + self.eps) - return self.weight * hidden_states.to(input_dtype) \ No newline at end of file + return self.weight * hidden_states.to(input_dtype) diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index b5a1c8fd6..ba35b4f31 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -62,16 +62,19 @@ async def infer_prompt( await self.ensure_shard(shard) # tokenize + prompt_encode = self.tokenizer.encode(prompt, add_bos=True, add_eos=True) + print(f"prompt_encode {prompt_encode}") tokens = torch.tensor( - self.tokenizer.encode(prompt, add_bos=True, add_eos=True), + prompt_encode, dtype=torch.int ) + print(f"tokens: {tokens}") hidden_states = None # generate loop = asyncio.get_running_loop() with ThreadPoolExecutor() as pool: - hidden_states, logits, finished = await loop.run_in_executor( + result = await loop.run_in_executor( pool, functools.partial( self.sharded_model.generate, @@ -79,6 +82,10 @@ async def infer_prompt( ) ) + print(f"thread result: {result}") + + hidden_states, logits, finished = result[0], result[1], result[2] + if hidden_states is not None: return hidden_states.numpy(force=True), "", finished else: @@ -128,3 +135,5 @@ async def ensure_shard(self, shard: Shard): shard, self.sharded_model ) + + print("shard ensured\n") From a170cc67152a1287b7c2e295c9654e8bbb71f64b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 23 Nov 2024 23:03:36 -0900 Subject: [PATCH 493/589] adding torchtune install --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 263445efc..33977614a 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,8 @@ "uuid==1.30", "tinygrad @ git+https://github.com/tinygrad/tinygrad.git@232edcfd4f8b388807c64fb1817a7668ce27cbad", "torch==2.4.0", - "accelerate==0.34.2" + "accelerate==0.34.2", + "torchtune==0.4.0" ] extras_require = { From ff786883a38c1bdd6bd5c4af7dcce0fb6f094477 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 23 Nov 2024 23:09:15 -0900 Subject: [PATCH 494/589] adding torchao install --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 33977614a..147c01ce2 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,8 @@ "tinygrad @ git+https://github.com/tinygrad/tinygrad.git@232edcfd4f8b388807c64fb1817a7668ce27cbad", "torch==2.4.0", "accelerate==0.34.2", - "torchtune==0.4.0" + "torchtune==0.4.0", + "torchao==0.6.1" ] extras_require = { From 9f57e45a1de09f4ddc2a7b185a1aba1b70582582 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 24 Nov 2024 04:41:32 -0900 Subject: [PATCH 495/589] building out test inference engine for pytorch, adding torch engine to model selection only for llama models, adding needed modules to setup.py, changed out using llamatokenizer to base resolve_tokenizer --- exo/inference/torch/models/llama3.py | 67 +++----- exo/inference/torch/pt_inference.py | 149 ++++++++++-------- .../torch/tests/test_pt_inference_engine.py | 38 ++--- exo/models.py | 6 +- setup.py | 4 +- 5 files changed, 128 insertions(+), 136 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 750ca6d02..fd8344009 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -15,6 +15,7 @@ from exo.inference.shard import Shard from exo.inference.torch.models.llm_utils import MultiLayerPreceptron, RMSNorm, get_torch_dtype +from exo.helpers import DEBUG class ShardTransformerDecoder(ttm.TransformerDecoder): @@ -70,9 +71,7 @@ def setup_caches( else: self.decoder_max_cache_seq_len = self.max_seq_len - print(f"decoder max: {self.decoder_max_cache_seq_len}") for layer in self.layers: - print(f"setting cache for {layer} if not none") if layer is not None: layer.setup_caches( batch_size, @@ -257,14 +256,12 @@ def __init__( self, config: dict, shard: Shard, - tokenizer: Any, device: Optional[torch.device] = None, max_new_tokens: int = 2048, use_cache: Optional[bool] = False ): super(ShardedLlamaModel, self).__init__() - self.tokenizer = tokenizer self.shard = shard self.config = config self.dtype = get_torch_dtype(self.config["torch_dtype"]) if "torch_dtype" in self.config else torch.float @@ -272,6 +269,9 @@ def __init__( self.max_new_tokens = max_new_tokens self.max_seq_len = self.config["max_seq_len"] + # pad_id maually set as same in all llama models + self.pad_id = 128004 # from <|finetune_right_pad_id|> + if use_cache: self.use_cache = use_cache else: @@ -280,12 +280,13 @@ def __init__( self.model = LlamaModel(config, self.shard).to(dtype=self.dtype, device=self.device) print(f"model loaded: {self.model}\n") + print(f"device: {self.device}\n") def generate( self, tokens: torch.Tensor, hidden_state: Optional[torch.Tensor] = None - ) -> Tuple[torch.Tensor, Optional[torch.Tensor], bool]: + ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor]]: """ Generate logits and/or hidden_states from llama model @@ -294,12 +295,13 @@ def generate( hidden_state (torch.Tensor, optional) - hidden state from last activated hidden layer, if any """ if tokens.ndim == 1: - tokens = tokens.view(1, -1) + tokens = tokens.view(1, -1).to(device=self.device) bsz, tokens_length = tokens.size() + total_response_length = tokens_length + self.max_seq_len + # setup cache - print(self.model) if not self.model.caches_are_enabled() and self.use_cache: with self.device: print("setting up cache") @@ -312,14 +314,13 @@ def generate( if not self.shard.is_last_layer(): self.model.output_hidden_states = [self.shard.end_layer] - total_response_length = tokens_length + self.max_seq_len resp_max_seq_len = total_response_length if not self.model.caches_are_enabled() else self.model.decoder_max_cache_seq_len # clone tokens - generated_tokens = tokens.clone() + generated_tokens = tokens.clone().to(device=self.device) # masking for proper attention - padding_masks = generated_tokens != self.tokenizer.pad_id + padding_masks = generated_tokens != self.pad_id if not padding_masks.all(): padding_masks = torch.nn.functional.pad(padding_masks, (0, self.max_seq_len), value=True) @@ -332,11 +333,15 @@ def generate( total_response_length, resp_max_seq_len if resp_max_seq_len is not None else total_response_length, dtype=torch.bool, - device=tokens.device, + device=self.device, ) ).unsqueeze(0) - input_pos = torch.arange(0, total_response_length, device=generated_tokens.device).unsqueeze(0) + input_pos = torch.arange( + 0, + total_response_length, + device=self.device + ).unsqueeze(0) if self.model.caches_are_enabled(): curr_masks = masks[:, :tokens_length] @@ -345,6 +350,12 @@ def generate( input_pos = input_pos[:, :tokens_length].squeeze() + if DEBUG >= 4: + print("model_input") + print(f"tokens: {tokens} - {tokens.device}") + print(f"mask: {curr_masks} - {curr_masks.device}") + print(f"input_pos: {input_pos} - {input_pos.device}") + if hidden_state is not None: model_output = self.model( tokens=hidden_state, @@ -358,27 +369,8 @@ def generate( input_pos=input_pos, ) - print(f"\nmodel_output: {model_output}") - - # stop token - stop_tokens = None - - stop_token_reached = torch.zeros( - bsz, - dtype=torch.bool, - device=tokens.device - ) - stop_tokens = ( - torch.tensor( - stop_tokens, - device=tokens.device, - dtype=tokens.dtype - ) - if stop_tokens - else None - ) - - finished = False + if DEBUG >= 4: + print(f"model_output\n{model_output}") if isinstance(model_output, list): model_logits = model_output[1] @@ -388,11 +380,4 @@ def generate( model_logits = model_output model_hs = None - if stop_tokens is not None: - stop_token_reached = ttg._generation.update_stop_tokens_tracker( - tokens, stop_tokens, stop_token_reached - ) - - finished = True if stop_token_reached.all() else False - - return model_hs, model_logits, finished + return model_hs, model_logits diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index ba35b4f31..3818dc268 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -3,19 +3,18 @@ Sharded inference engine using PyTorch based torchtune models """ import os -from typing import Optional, Tuple, Union, List import functools from concurrent.futures import ThreadPoolExecutor import numpy as np import asyncio import torch - -from torchtune.models import llama3 +from torchtune.generation import sample as tt_sample from exo.inference.inference_engine import InferenceEngine from exo.download.hf.hf_shard_download import HFShardDownloader from exo.inference.shard import Shard +from exo.inference.tokenizers import _resolve_tokenizer from exo.helpers import DEBUG from exo.inference.torch.models.llm_utils import ( load_model_config, @@ -29,11 +28,12 @@ TOP_K = 25 class TorchDynamicShardInferenceEngine(InferenceEngine): - def __init__(self, shard_downloader: HFShardDownloader, model_id: str="llama"): + def __init__(self, shard_downloader: HFShardDownloader): self.shard = None self.shard_downloader = shard_downloader - self.model_id = model_id - self.supported_models = ["llama"] + self.request_id = None + self.executor = ThreadPoolExecutor(max_workers=1) + self.past_tokens = [] # device settings if os.environ.get("TORCH_DEVICE"): @@ -45,88 +45,105 @@ def __init__(self, shard_downloader: HFShardDownloader, model_id: str="llama"): else: self.device = torch.device("cpu") - async def infer_prompt( - self, - request_id: str, - shard: Shard, - prompt: str, - image_str: Optional[str] = None, - inference_state: Optional[str] = None - ) -> Tuple[np.ndarray, str, bool]: + async def encode(self, shard: Shard, prompt: str) -> np.ndarray: if DEBUG >= 4: - print("infer_prompt called") - print(f"prompt: {prompt}") - print(f"shard: {shard}") - print(f"inference_state: {inference_state}") - # ensure shard + print("encode called") + print(f"shard: {shard}\nprompt: {prompt}") + await self.ensure_shard(shard) - # tokenize - prompt_encode = self.tokenizer.encode(prompt, add_bos=True, add_eos=True) - print(f"prompt_encode {prompt_encode}") - tokens = torch.tensor( - prompt_encode, - dtype=torch.int - ) - print(f"tokens: {tokens}") - hidden_states = None - - # generate - loop = asyncio.get_running_loop() - with ThreadPoolExecutor() as pool: - result = await loop.run_in_executor( - pool, - functools.partial( - self.sharded_model.generate, - tokens=tokens - ) + tokens = await asyncio.get_event_loop().run_in_executor( + self.executor, + functools.partial( + self.tokenizer.encode, + prompt, + return_tensors="np" ) + ) + + if DEBUG >= 4: + print(f"tokens: {tokens}") - print(f"thread result: {result}") + self.past_tokens = tokens.tolist() - hidden_states, logits, finished = result[0], result[1], result[2] + return tokens - if hidden_states is not None: - return hidden_states.numpy(force=True), "", finished - else: - return logits.numpy(force=True), "", finished + async def decode(self, shard: Shard, tokens: np.ndarray) -> str: + await self.ensure_shard(shard) + return await asyncio.get_running_loop().run_in_executor( + self.executor, + functools.partial( + self.tokenizer.decode, + tokens.tolist() + ) + ) + + async def sample(self, x: np.ndarray, temp=TEMP, top_k=TOP_K) -> np.ndarray: + logits = x[:, -1] + def sample_wrapper(): + return tt_sample( + torch.tensor(logits), + temperature=temp, + top_k=top_k + ).numpy(force=True) + + return await asyncio.get_running_loop().run_in_executor( + self.executor, + functools.partial(sample_wrapper) + ) async def infer_tensor( self, request_id: str, shard: Shard, input_data: np.ndarray, - inference_state: Optional[str] = None - ) -> Tuple[np.ndarray, str, bool]: + ) -> np.ndarray: # ensure shard await self.ensure_shard(shard) - return np.empty((1,1)), "", False + self.request_id = request_id if not self.request_id else self.request_id + + def infer_wrapper(): + model_hs, model_logits = self.sharded_model.generate( + tokens=torch.tensor(self.past_tokens).to(self.device), + hidden_state=torch.tensor(input_data).to(self.device) + ) + + if not shard.is_last_layer(): + if model_hs is not None: + return model_hs.numpy(force=True) + else: + raise ValueError("model hidden state returned None") + + if model_logits is not None: + return model_logits.numpy(force=True) + else: + raise ValueError("model logits returned None") + + return await asyncio.get_running_loop().run_in_executor(self.executor, infer_wrapper) async def ensure_shard(self, shard: Shard): if self.shard == shard: return - + # download model safetensors and shard - model_path = await self.shard_downloader.ensure_shard(shard) + model_path = await self.shard_downloader.ensure_shard( + shard, + self.__class__.__name__ + ) model_config = load_model_config(model_path / "config.json") - self.tokenizer = llama3.llama3_tokenizer( - path=f"{model_path}/original/tokenizer.model" - ) + self.tokenizer = await _resolve_tokenizer(model_path) - if self.model_id not in self.supported_models: - raise ValueError( - f"Model {self.model_id} not supported, only supported models are\n{self.supported_models}" + self.sharded_model = await asyncio.get_running_loop().run_in_executor( + self.executor, + functools.partial( + ShardedLlamaModel, + config=model_config, + shard=shard, + device=self.device, + use_cache=True ) - - self.sharded_model = ShardedLlamaModel( - model_config, - shard, - self.tokenizer, - self.device, - None, - use_cache=True ) # load sharded weights @@ -136,4 +153,8 @@ async def ensure_shard(self, shard: Shard): self.sharded_model ) - print("shard ensured\n") + if DEBUG >= 4: + print("shard ensured\n") + print(f"model_path: {model_path}") + print(f"shard: {shard}") + print(f"model: {self.sharded_model}") diff --git a/exo/inference/torch/tests/test_pt_inference_engine.py b/exo/inference/torch/tests/test_pt_inference_engine.py index e430989ad..af301980d 100644 --- a/exo/inference/torch/tests/test_pt_inference_engine.py +++ b/exo/inference/torch/tests/test_pt_inference_engine.py @@ -1,7 +1,7 @@ """ Test inference engine and model sharding """ -import time +import pytest import asyncio from exo.inference.shard import Shard @@ -11,42 +11,30 @@ import numpy as np -async def test_inference_engine( - inference_engine_1: InferenceEngine, - inference_engine_2: InferenceEngine, - model_id: str, - n_layers: int): - +@pytest.mark.asyncio +async def test_inference_engine(): prompt = "In a single word only, what is the last name of the current president of the USA?" shard = Shard( - model_id=model_id, + model_id="llama-3.2-1b", start_layer=0, end_layer=0, - n_layers=n_layers + n_layers=16 ) - resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( - "A", - shard=shard, - prompt=prompt - ) + inference_engine = TorchDynamicShardInferenceEngine(HFShardDownloader()) - print("\n------------resp_full---------------\n") - print(resp_full) - print("\n------------resp_full---------------\n") + output = await inference_engine.infer_prompt("test_id", shard, prompt) + print("\n------------inference_engine output---------------\n") + print(output) + print("\n---------------------------\n") - time.sleep(5) + assert isinstance(output, np.ndarray), "Output should be numpy array" if __name__ == '__main__': try: - print("\n\n -------- TEST meta-llama/Llama-3.2-1B-Instruct -------- \n\n") - asyncio.run(test_inference_engine( - TorchDynamicShardInferenceEngine(HFShardDownloader()), - TorchDynamicShardInferenceEngine(HFShardDownloader()), - "meta-llama/Llama-3.2-1B-Instruct", - 16 - )) + print("\n\n -------- TEST unsloth/Llama-3.2-1B-Instruct -------- \n\n") + asyncio.run(test_inference_engine()) except Exception as err: print(f"\n!!!! LLAMA TEST FAILED \n{err}\n") diff --git a/exo/models.py b/exo/models.py index b1b39e666..60525357b 100644 --- a/exo/models.py +++ b/exo/models.py @@ -8,8 +8,8 @@ "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-1B-Instruct-4bit", "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-1B-Instruct", + "TorchDynamicShardInferenceEngine": "unsloth/Llama-3.2-1B-Instruct" }, - "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Llama-3.2-1B-Instruct", start_layer=0, end_layer=0, n_layers=16), }, "llama-3.2-3b": { "layers": 28, @@ -17,7 +17,6 @@ "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-3B-Instruct-4bit", "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-3B-Instruct", }, - "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Llama-3.2-3B-Instruct", start_layer=0, end_layer=0, n_layers=28), }, "llama-3.1-8b": { "layers": 32, @@ -25,7 +24,6 @@ "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", "TinygradDynamicShardInferenceEngine": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", }, - "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Meta-Llama-3.1-8B-Instruct", start_layer=0, end_layer=0, n_layers=32), }, "llama-3.1-70b": { "layers": 80, @@ -33,7 +31,6 @@ "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", "TinygradDynamicShardInferenceEngine": "NousResearch/Meta-Llama-3.1-70B-Instruct", }, - "TorchDynamicShardInferenceEngine": Shard(model_id="unsloth/Meta-Llama-3.1-70B-Instruct", start_layer=0, end_layer=0, n_layers=80), }, "llama-3.1-70b-bf16": { "layers": 80, @@ -48,7 +45,6 @@ "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3-8B-Instruct-4bit", "TinygradDynamicShardInferenceEngine": "TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R", }, - "TorchDynamicShardInferenceEngine": Shard(model_id="NousResearch/Meta-Llama-3-8B-Instruct", start_layer=0, end_layer=0, n_layers=32), }, "llama-3-70b": { "layers": 80, diff --git a/setup.py b/setup.py index 84532ff86..32328864b 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,9 @@ "torch==2.4.0", "accelerate==0.34.2", "torchtune==0.4.0", - "torchao==0.6.1" + "torchao==0.6.1", + "pytest==8.3.3", + "pytest-asyncio==0.24.0" ] extras_require = { From fbf106ec6cb6becbc7b823bd5971ad9dee84f17e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 24 Nov 2024 05:01:58 -0900 Subject: [PATCH 496/589] removing last shard check for return of hidden state from infer_prompt --- exo/inference/torch/models/llama3.py | 2 +- exo/inference/torch/pt_inference.py | 14 ++++---------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index fd8344009..01c15fe78 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -286,7 +286,7 @@ def generate( self, tokens: torch.Tensor, hidden_state: Optional[torch.Tensor] = None - ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor]]: + ) -> Tuple[Optional[torch.Tensor], torch.Tensor]: """ Generate logits and/or hidden_states from llama model diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index 3818dc268..cbd4f1758 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -109,16 +109,10 @@ def infer_wrapper(): hidden_state=torch.tensor(input_data).to(self.device) ) - if not shard.is_last_layer(): - if model_hs is not None: - return model_hs.numpy(force=True) - else: - raise ValueError("model hidden state returned None") - - if model_logits is not None: - return model_logits.numpy(force=True) - else: - raise ValueError("model logits returned None") + if model_hs is not None: + return model_hs.numpy(force=True) + + return model_logits.numpy(force=True) return await asyncio.get_running_loop().run_in_executor(self.executor, infer_wrapper) From 405b5ae9b6db97916edcfeeabf3cc6f90a0f84c7 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 24 Nov 2024 05:39:37 -0900 Subject: [PATCH 497/589] fixing vram/ram issue, switched to using float16 for dtype --- exo/inference/torch/models/llama3.py | 12 +++++++++--- .../torch/tests/test_pt_inference_engine.py | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 01c15fe78..ba1b13ed8 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -101,6 +101,11 @@ def forward( input_pos: Optional[torch.Tensor] = None, ) -> Union[torch.Tensor, List[torch.Tensor]]: # Determine the type of input and shape + if DEBUG >= 4: + print("forward called") + print(f"tokens: {tokens}") + print(f"mask: {mask}") + if tokens.ndim == 3: h = tokens # Use directly as hidden states else: @@ -257,14 +262,14 @@ def __init__( config: dict, shard: Shard, device: Optional[torch.device] = None, - max_new_tokens: int = 2048, + max_new_tokens: int = 10, use_cache: Optional[bool] = False ): super(ShardedLlamaModel, self).__init__() self.shard = shard self.config = config - self.dtype = get_torch_dtype(self.config["torch_dtype"]) if "torch_dtype" in self.config else torch.float + self.dtype = torch.float16 self.device = device if device is not None else torch.device("cpu") self.max_new_tokens = max_new_tokens self.max_seq_len = self.config["max_seq_len"] @@ -277,7 +282,8 @@ def __init__( else: self.config.get("use_cache", False) - self.model = LlamaModel(config, self.shard).to(dtype=self.dtype, device=self.device) + with torch.no_grad(): + self.model = LlamaModel(config, self.shard).to(dtype=self.dtype, device=self.device) print(f"model loaded: {self.model}\n") print(f"device: {self.device}\n") diff --git a/exo/inference/torch/tests/test_pt_inference_engine.py b/exo/inference/torch/tests/test_pt_inference_engine.py index af301980d..abd5f9ea3 100644 --- a/exo/inference/torch/tests/test_pt_inference_engine.py +++ b/exo/inference/torch/tests/test_pt_inference_engine.py @@ -18,7 +18,7 @@ async def test_inference_engine(): shard = Shard( model_id="llama-3.2-1b", start_layer=0, - end_layer=0, + end_layer=15, n_layers=16 ) From 0320c50252641770857dbd5524191ad937aba079 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 24 Nov 2024 05:44:27 -0900 Subject: [PATCH 498/589] trying to offload as I can --- exo/inference/torch/pt_inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index cbd4f1758..0a8d679e1 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -110,9 +110,9 @@ def infer_wrapper(): ) if model_hs is not None: - return model_hs.numpy(force=True) + return model_hs.cpu().numpy(force=True) - return model_logits.numpy(force=True) + return model_logits.cpu().numpy(force=True) return await asyncio.get_running_loop().run_in_executor(self.executor, infer_wrapper) From 84f41314fe8ba4f9e23f9f1f48be9da346efa008 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 24 Nov 2024 05:47:55 -0900 Subject: [PATCH 499/589] adding detach --- exo/inference/torch/pt_inference.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index 0a8d679e1..e4f0ec755 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -110,9 +110,11 @@ def infer_wrapper(): ) if model_hs is not None: - return model_hs.cpu().numpy(force=True) + model_hs = model_hs.detach().cpu() + return model_hs.numpy(force=True) - return model_logits.cpu().numpy(force=True) + model_logits = model_logits.detach().cpu() + return model_logits.numpy(force=True) return await asyncio.get_running_loop().run_in_executor(self.executor, infer_wrapper) From 596c715e91eaf909f0d226d24388028f9a87ae9e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 24 Nov 2024 05:55:38 -0900 Subject: [PATCH 500/589] debug process issue --- exo/inference/torch/models/llama3.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index ba1b13ed8..cf1c23cc2 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -310,7 +310,6 @@ def generate( # setup cache if not self.model.caches_are_enabled() and self.use_cache: with self.device: - print("setting up cache") self.model.setup_caches( bsz, self.dtype, @@ -363,8 +362,10 @@ def generate( print(f"input_pos: {input_pos} - {input_pos.device}") if hidden_state is not None: + print(f"hidden_state: {hidden_state} - {hidden_state.device}") model_output = self.model( - tokens=hidden_state, + tokens=tokens, + hidden_state=hidden_state, mask=curr_masks, input_pos=input_pos, ) From a5eb1beb01becc65a5c4ee915534b9294237808b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 24 Nov 2024 05:56:37 -0900 Subject: [PATCH 501/589] putting back hidden state pass --- exo/inference/torch/models/llama3.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index cf1c23cc2..783e20b9a 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -364,8 +364,7 @@ def generate( if hidden_state is not None: print(f"hidden_state: {hidden_state} - {hidden_state.device}") model_output = self.model( - tokens=tokens, - hidden_state=hidden_state, + tokens=hidden_state, mask=curr_masks, input_pos=input_pos, ) From 21e626e0a703a455f50667f25dee0352025d2194 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 24 Nov 2024 13:50:59 -0900 Subject: [PATCH 502/589] fixing torch inference engine selection not working when adding more nodes --- build/lib/exo/inference/inference_engine.py | 2 +- exo/orchestration/standard_node.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/build/lib/exo/inference/inference_engine.py b/build/lib/exo/inference/inference_engine.py index 2b98adbe8..c5dfc0e30 100644 --- a/build/lib/exo/inference/inference_engine.py +++ b/build/lib/exo/inference/inference_engine.py @@ -27,7 +27,7 @@ def get_inference_engine(inference_engine_name: str, shard_downloader: 'ShardDow tinygrad.helpers.DEBUG.value = int(os.getenv("TINYGRAD_DEBUG", default="0")) return TinygradDynamicShardInferenceEngine(shard_downloader) - elif inference_engine_name == "pytorch": + elif inference_engine_name == "torch": from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine return PyTorchDynamicShardInferenceEngine(shard_downloader) else: diff --git a/exo/orchestration/standard_node.py b/exo/orchestration/standard_node.py index 1094a2a90..357c8fb14 100644 --- a/exo/orchestration/standard_node.py +++ b/exo/orchestration/standard_node.py @@ -91,6 +91,9 @@ def get_supported_inference_engines(self): if self.inference_engine.__class__.__name__ == 'MLXDynamicShardInferenceEngine': supported_engine_names.append('mlx') supported_engine_names.append('tinygrad') + elif self.get_inference_engine.__class__.__name__ == 'TorchDynamicShardInferenceEngine': + supported_engine_names.append('torch') + supported_engine_names.append('tinygrad') else: supported_engine_names.append('tinygrad') return supported_engine_names From e8f689cb66e142122d6e28490c9ef2413864b981 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 24 Nov 2024 13:53:06 -0900 Subject: [PATCH 503/589] fixing typo --- exo/orchestration/standard_node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/orchestration/standard_node.py b/exo/orchestration/standard_node.py index 357c8fb14..e08488da8 100644 --- a/exo/orchestration/standard_node.py +++ b/exo/orchestration/standard_node.py @@ -91,7 +91,7 @@ def get_supported_inference_engines(self): if self.inference_engine.__class__.__name__ == 'MLXDynamicShardInferenceEngine': supported_engine_names.append('mlx') supported_engine_names.append('tinygrad') - elif self.get_inference_engine.__class__.__name__ == 'TorchDynamicShardInferenceEngine': + elif self.inference_engine.__class__.__name__ == 'TorchDynamicShardInferenceEngine': supported_engine_names.append('torch') supported_engine_names.append('tinygrad') else: From d0cc3b017c73f040e5a8b6cfcf7ac7b335003d3b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 24 Nov 2024 14:43:22 -0900 Subject: [PATCH 504/589] fixing llama3 tests, removing mask and input_ids for going through model layers as that will not be passed to other nodes --- exo/inference/inference_engine.py | 5 +- exo/inference/torch/hf_inference.py | 377 ------------------ exo/inference/torch/models/llama3.py | 8 +- .../torch/tests/test_hf_inference_engine.py | 141 ------- exo/inference/torch/tests/test_llama3_full.py | 9 +- .../torch/tests/test_llama3_split.py | 18 +- exo/inference/torch/tests/test_split_model.py | 214 ---------- 7 files changed, 18 insertions(+), 754 deletions(-) delete mode 100644 exo/inference/torch/hf_inference.py delete mode 100644 exo/inference/torch/tests/test_hf_inference_engine.py delete mode 100644 exo/inference/torch/tests/test_split_model.py diff --git a/exo/inference/inference_engine.py b/exo/inference/inference_engine.py index 9a0c2a091..31e37d117 100644 --- a/exo/inference/inference_engine.py +++ b/exo/inference/inference_engine.py @@ -34,6 +34,7 @@ async def infer_prompt(self, request_id: str, shard: Shard, prompt: str) -> np.n "mlx": "MLXDynamicShardInferenceEngine", "tinygrad": "TinygradDynamicShardInferenceEngine", "dummy": "DummyInferenceEngine", + "torch": "TorchDynamicShardInferenceEngine" } def get_inference_engine(inference_engine_name: str, shard_downloader: 'ShardDownloader'): @@ -51,10 +52,8 @@ def get_inference_engine(inference_engine_name: str, shard_downloader: 'ShardDow return TinygradDynamicShardInferenceEngine(shard_downloader) elif inference_engine_name == "torch": from exo.inference.torch.pt_inference import TorchDynamicShardInferenceEngine + return TorchDynamicShardInferenceEngine(shard_downloader) - elif inference_engine_name == "hf": - from exo.inference.torch.hf_inference import HFDynamicShardInferenceEngine - return HFDynamicShardInferenceEngine(shard_downloader) elif inference_engine_name == "dummy": from exo.inference.dummy_inference_engine import DummyInferenceEngine return DummyInferenceEngine() diff --git a/exo/inference/torch/hf_inference.py b/exo/inference/torch/hf_inference.py deleted file mode 100644 index 4912a0a2e..000000000 --- a/exo/inference/torch/hf_inference.py +++ /dev/null @@ -1,377 +0,0 @@ -""" -HFDynamicShardInferenceEngine -Sharded inference engine using PyTorch based HuggingFace transformers -""" -import asyncio -import os -import json -import functools -from concurrent.futures import ThreadPoolExecutor - -import numpy as np - -import torch - -from typing import Optional, Tuple, Union, List -from exo.inference.shard import Shard -from exo.inference.inference_engine import InferenceEngine -from exo.inference.torch.models.hf import ShardedHuggingFaceModel -from exo.inference.tokenizers import resolve_tokenizer -from exo.helpers import DEBUG -from exo.download.hf.hf_shard_download import HFShardDownloader -from exo.download.hf.hf_helpers import get_weight_map - -from transformers import Cache - -# model value options -TOP_K = 20 -TEMP = 0.6 -TOP_P = 0.9 - -class HFDynamicShardInferenceEngine(InferenceEngine): - def __init__(self, shard_downloader: HFShardDownloader): - """ - Initialize the inference engine. - - Args: - shard_downloader: Model and weights sharding download - """ - self.shard = None - self.shard_downloader = shard_downloader - - # the whole history with new logits need to - # be passed to the model to reach the end token - # even with caching - self.past_input_ids = None - - # setup cuda device - if os.environ.get("TORCH_DEVICE"): - self.device = torch.device(os.environ["TORCH_DEVICE"]) - elif torch.cuda.is_available(): - self.device = torch.device("cuda") - elif torch.backends.mps.is_available() and torch.backends.mps.is_built(): - self.device = torch.device("mps") - else: - self.device = torch.device("cpu") - - torch.set_default_device(self.device) - - # setup cude dtype - self.dtype = torch.get_default_dtype() - - # setup device_map - if os.environ.get("TORCH_DEVICE_MAP"): - self.device_map = os.environ["TORCH_DEVICE_MAP"] - else: - self.device_map = str(self.device) - - def infer_caching( - self, - inference_state: Optional[str] = None - ) -> Tuple[Optional[torch.Tensor], Optional[dict]]: - """ - inference caching from inference_state json - """ - # setup cache and cached input_ids - past_iids = None - cached_iids = None - if inference_state is not None: - try: - infer_state = json.loads(inference_state) - except ValueError: - infer_state = None - - if infer_state is not None: - cached_iids = infer_state["cached_iids"] - if cached_iids is not None: - past_iids = None - if len(cached_iids) > 0: - past_iids = torch.tensor(cached_iids["input_ids"]).to(self.device) - cached_iids = {"input_ids": past_iids.tolist()} - - if DEBUG >= 4: - print(f"cached_iids len: {len(cached_iids)}") - print(f"cached_iids: {cached_iids}") - - return (past_iids, cached_iids) - - async def async_forward( - self, - input_ids: Optional[torch.Tensor] = None, - hidden_states: Optional[torch.Tensor] = None, - attention_mask: Optional[torch.Tensor] = None - ) -> Tuple[Optional[torch.Tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.Tensor]]: - """ - Asynchronously performs the forward pass using a stateful sharded model. - - Args: - input_ids (torch.Tensor, optional): Input token IDs for the model. If not provided, `hidden_states` must be used. - hidden_states (torch.Tensor, optional): Precomputed hidden states to be used instead of `input_ids`. - attention_mask (torch.Tensor, optional): Mask to prevent attention on padding token indices. - - Returns: - A tuple containing: - - - shard_hidden_states (torch.Tensor, optional): Hidden states resulting from the forward pass. - - shard_past_kvs (list(torch.FloatTensor), optional): List of past key-value tensors (cache) used in the model. - - shard_logits (torch.Tensor, optional): The logits computed during the forward pass. - """ - loop = asyncio.get_running_loop() - - with ThreadPoolExecutor() as pool: - result = await loop.run_in_executor(pool, functools.partial( - self.stateful_sharded_model.forward, - input_ids=input_ids, - hidden_states=hidden_states, - attention_mask=attention_mask - )) - - if DEBUG >=4: - print("async_forward") - print(f"result: {result}") - - return result[0], result[1], result[2] - - async def async_logit_sample( - self, - logits: torch.Tensor - ) -> torch.Tensor: - """ - Asynchronously samples logits using the model's logit sampling method. - - Args: - logits (torch.Tensor): The logits produced by the model for sampling. - - Returns: - next_logit (torch.Tensor): The next logit samples from given logis - """ - loop = asyncio.get_running_loop() - - with ThreadPoolExecutor() as pool: - result = await loop.run_in_executor(pool, functools.partial( - self.stateful_sharded_model.logits_sample, - logits=logits - )) - - return result - - async def infer_prompt( - self, - request_id: str, - shard: Shard, - prompt: str, - image_str: Optional[str] = None, - inference_state: Optional[str] = None - ) -> Tuple[np.ndarray, str, bool]: - """ - Asynchronously processes a prompt using the specified shard and returns the inference result. - - Args: - request_id (str): The unique identifier for the request. - shard (Shard): The model shard used for inference. - prompt (str): The text prompt to be processed by the model. - image_str (str, optional): A base64 encoded image string to be optionally used in the inference. Defaults to None. - inference_state (str, optional): The cached inference state for resuming or continuing inference. Defaults to None. - - Returns: - A tuple containing: - - - input_ids (np.ndarray): The processed token IDs as a NumPy array if logits were generated. Otherwise, it returns hidden states. - - cache_json (str): A JSON string containing the cached input IDs for further inference steps. - - is_finished (bool): A boolean indicating whether the model has reached the end-of-sequence (EOS) token. - """ - if DEBUG >= 4: - print("infer_prompt called") - print(f"prompt: {prompt}") - print(f"shard: {shard}") - print(f"inference_state: {inference_state}") - - await self.ensure_shard(shard) - - inputs = self.tokenizer([prompt], return_tensors="pt") - input_ids = inputs.input_ids.to(self.device) - input_attention_mask = inputs.attention_mask.to(self.device) - - # get cache from inference_state - past_iids, cached_iids = self.infer_caching(inference_state) - - if past_iids is not None: - self.past_input_ids = past_iids - else: - self.past_input_ids = input_ids - - if DEBUG >= 4: - print(f"past_input_ids: {self.past_input_ids}\n") - - shard_hidden_states, shard_past_kvs, shard_logits = await self.async_forward( - input_ids=self.past_input_ids, - attention_mask=input_attention_mask - ) - - if DEBUG >= 4: - print(f"\nshard_hidden_states: {shard_hidden_states}\n") - print(f"\nshard_past_kvs {shard_past_kvs}\n") - print(f"\nshard_logits: {shard_logits}") - - next_token = None - if shard_logits is not None: - next_token = await self.async_logit_sample(shard_logits) - self.past_input_ids = torch.cat([input_ids, next_token[:, None].squeeze(-1)], dim=-1) - input_ids = next_token - - if DEBUG >= 4: - print(f"\nnext_token: {next_token}") - - if self.past_input_ids is not None: - cached_iids = {"input_ids": self.past_input_ids.tolist()} - - is_finished = False - if next_token is not None: - is_finished = next_token.item() == self.tokenizer.eos_token_id - - return_values = ( - input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), - json.dumps({"cached_iids": cached_iids}), - is_finished - ) - - if DEBUG >= 4: - print(f"return_values: {return_values}") - - return return_values - - async def infer_tensor( - self, - request_id: str, - shard: Shard, - input_data: np.ndarray, - inference_state: Optional[str] = None - ) -> Tuple[np.ndarray, str, bool]: - """ - Asynchronously processes input tensor data using the specified shard and returns the inference result. - - Args: - request_id (str): The unique identifier for the request. - shard (Shard): The model shard used for inference. - input_data (np.ndarray): The input data in NumPy array format to be processed by the model. - inference_state (str, optional): The cached inference state for resuming or continuing inference. Defaults to None. - - Returns: - A tuple containing: - - - input_ids (np.ndarray): The processed token IDs as a NumPy array if logits were generated. Otherwise, it returns hidden states. - - cache_json (str): A JSON string containing the cached input IDs for further inference steps. - - is_finished (bool): A boolean indicating whether the model has reached the end-of-sequence (EOS) token. - """ - if DEBUG >= 4: - print("infer_tensor called") - print(f"input_data: {input_data}") - print(f"shard: {shard}") - print(f"inference_state: {inference_state}") - - await self.ensure_shard(shard) - - input_ids = torch.tensor(input_data).to(self.device) - - # get cache from inference_state - past_iids, cached_iids = self.infer_caching(inference_state) - - # detect if hidden_states or not - hidden_states = None - self.past_input_ids = None - if input_ids.size()[-1] > 1: - hidden_states = input_ids - self.past_input_ids = past_iids - else: - if past_iids is not None: - self.past_input_ids = past_iids - else: - self.past_input_ids = input_ids - - if DEBUG >= 4: - print(f"\npast_input_ids: {self.past_input_ids}") - print(f"\nhidden_state: {hidden_states}") - print(f"\ninference_state: {inference_state}") - - shard_hidden_states, shard_past_kvs, shard_logits = await self.async_forward( - input_ids=self.past_input_ids, - hidden_states=hidden_states - ) - - next_token = None - if shard_logits is not None: - next_token = await self.async_logit_sample(shard_logits) - input_ids = next_token - - #cache - next_cached_logits = None - if next_token is not None: - if self.past_input_ids is not None: - next_cached_logits = torch.cat([self.past_input_ids, next_token], dim=-1).to(self.device) - elif past_iids is not None: - next_cached_logits = torch.cat([past_iids, next_token], dim=-1).to(self.device) - - cached_iids = { - "input_ids": next_cached_logits.tolist() if next_cached_logits is not None else [] - } - - is_finished = False - if next_token is not None: - is_finished = next_token.item() == self.tokenizer.eos_token_id - - if is_finished: - # clear cache - cached_iids = {"input_ids": []} - - if DEBUG >= 4: - print(f"\ninput_ids: {input_ids}") - print(f"\nshard_hidden_states: {shard_hidden_states}\n") - print(f"\nshard_past_kvs {shard_past_kvs}\n") - print(f"\nshard_logits: {shard_logits}") - - return_values = ( - input_ids.numpy(force=True) if shard_logits is not None else shard_hidden_states.numpy(force=True), - json.dumps({"cached_iids": cached_iids}), - is_finished - ) - - if DEBUG >= 4: - print(f"return_values: {return_values}") - - return return_values - - async def ensure_shard(self, shard: Shard): - """ - Ensure the model shard is loaded and ready for inference. - - Args: - shard (Optional[Shard]): Shard information for the model. - """ - if self.shard == shard: - return - - if DEBUG >= 4: - print(f"Loading new shard: {shard}") - - model_path = await self.shard_downloader.ensure_shard(shard) - - # get model weight map - model_wm = await get_weight_map(repo_id=shard.model_id) - - self.stateful_sharded_model = ShardedHuggingFaceModel( - shard=shard, - local_model_path=model_path, - weight_map=model_wm, - device=self.device, - dtype=self.dtype, - device_map=self.device_map, - top_k=TOP_K, - temp=TEMP, - top_p=TOP_P - ) - self.shard = shard - - self.tokenizer = await resolve_tokenizer(shard.model_id) - - if DEBUG >= 4: - print(f"Shard loaded successfully: {shard}") diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 783e20b9a..08c244edf 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -134,10 +134,10 @@ def forward( with torch.no_grad(): h = layer( h, - mask=mask, - encoder_input=encoder_input, - encoder_mask=encoder_mask, - input_pos=input_pos, + # mask=mask, + # encoder_input=encoder_input, + # encoder_mask=encoder_mask, + # input_pos=input_pos, ) if i in self.output_hidden_states: diff --git a/exo/inference/torch/tests/test_hf_inference_engine.py b/exo/inference/torch/tests/test_hf_inference_engine.py deleted file mode 100644 index c7230c894..000000000 --- a/exo/inference/torch/tests/test_hf_inference_engine.py +++ /dev/null @@ -1,141 +0,0 @@ -""" -Test inference engine and model sharding -""" -import time -import asyncio - -from exo.inference.shard import Shard -from exo.inference.torch.hf_inference import HFDynamicShardInferenceEngine -from exo.download.hf.hf_shard_download import HFShardDownloader -from exo.inference.inference_engine import InferenceEngine - -import numpy as np - -async def test_inference_engine( - inference_engine_1: InferenceEngine, - inference_engine_2: InferenceEngine, - model_id: str, - n_layers: int): - - prompt = "In a single word only, what is the last name of the current president of the USA?" - - shard = Shard( - model_id=model_id, - start_layer=0, - end_layer=0, - n_layers=n_layers - ) - - resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( - "A", - shard=shard, - prompt=prompt - ) - - print("\n------------resp_full---------------\n") - print(resp_full) - print("\n------------resp_full---------------\n") - - time.sleep(5) - - next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( - "A", - shard=shard, - input_data=resp_full, - inference_state=inference_state_full, - ) - - print("\n------------next_resp_full---------------\n") - print(next_resp_full) - print("\n------------next_resp_full---------------\n") - - time.sleep(5) - - half_layer = int(n_layers/2) - - resp_shard = Shard( - model_id=model_id, - start_layer=0, - end_layer=half_layer, - n_layers=n_layers - ) - - resp_shard2 = Shard( - model_id=model_id, - start_layer=half_layer+1, - end_layer=n_layers-1, - n_layers=n_layers - ) - - resp1, inference_state_1, _ = await inference_engine_1.infer_prompt( - "B", - shard=resp_shard, - prompt=prompt - ) - - print("\n------------resp1---------------\n") - print(resp1) - print("\n------------resp1---------------\n") - - time.sleep(5) - - resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( - "B", - shard=resp_shard2, - input_data=resp1, - inference_state=inference_state_1, - ) - - print("\n------------resp2---------------\n") - print(resp2) - print("\n------------resp2---------------\n") - - resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( - "B", - shard=resp_shard, - input_data=resp2, - inference_state=inference_state_2, - ) - - print("\n------------resp3---------------\n") - print(resp3) - print("\n------------resp3---------------\n") - - resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( - "B", - shard=resp_shard2, - input_data=resp3, - inference_state=inference_state_3, - ) - - print("\n------------resp4---------------\n") - print(resp4) - print("\n------------resp4---------------\n") - - assert np.array_equal(resp_full, resp2) - assert np.array_equal(next_resp_full, resp4) - -if __name__ == '__main__': - try: - print("\n\n -------- TEST Qwen/Qwen2-0.5B-Instruct -------- \n\n") - asyncio.run(test_inference_engine( - HFDynamicShardInferenceEngine(HFShardDownloader()), - HFDynamicShardInferenceEngine(HFShardDownloader()), - "Qwen/Qwen2-0.5B-Instruct", - 36 - )) - except Exception as err: - print(f"\n!!!! QWEN2 TEST FAILED \n{err}\n") - - #try: - # print("\n-------- Test unsloth/Meta-Llama-3.1-8B-Instruct ----------\n") - # asyncio.run(test_inference_engine( - # TorchDynamicShardInferenceEngine(HFShardDownloader()), - # TorchDynamicShardInferenceEngine(HFShardDownloader()), - # "unsloth/Meta-Llama-3.1-8B-Instruct", - # 32 - # )) - #except Exception as err: - # print(f"\n!!!! unsloth/Meta-Llama-3.1-8B-Instruct TEST FAILED \n{err}\n") - - diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index 7ffb4dce4..860a6855d 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -114,11 +114,10 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" # device = torch.device("cuda") device = None shard_model_1 = ShardedLlamaModel( - config, - shard_1, - llama_tokenizer, - device, - MAX_NEW_TOKENS, + config=config, + shard=shard_1, + device=None, + max_new_tokens=MAX_NEW_TOKENS, use_cache=True ) print(f"\nshard_model_1: {shard_model_1}") diff --git a/exo/inference/torch/tests/test_llama3_split.py b/exo/inference/torch/tests/test_llama3_split.py index 682727654..e9f596a8c 100644 --- a/exo/inference/torch/tests/test_llama3_split.py +++ b/exo/inference/torch/tests/test_llama3_split.py @@ -110,11 +110,10 @@ def test_generation_2(shard_model, in_tokens, hidden_state): # Initialize LlamaModel with config and tokenizer shard_model_1 = ShardedLlamaModel( - config, - shard_1, - llama_tokenizer, - None, - MAX_NEW_TOKENS, + config=config, + shard=shard_1, + device=None, + max_new_tokens=MAX_NEW_TOKENS, use_cache=True ) print(f"\nshard_model_1: {shard_model_1}") @@ -122,11 +121,10 @@ def test_generation_2(shard_model, in_tokens, hidden_state): shard_1_hs, shard_1_tokens = test_generation_1(shard_model_1, prompt) shard_model_2 = ShardedLlamaModel( - config, - shard_2, - llama_tokenizer, - None, - MAX_NEW_TOKENS, + config=config, + shard=shard_2, + device=None, + max_new_tokens=MAX_NEW_TOKENS, use_cache=True ) print(f"\nshard_model_2: {shard_model_2}") diff --git a/exo/inference/torch/tests/test_split_model.py b/exo/inference/torch/tests/test_split_model.py deleted file mode 100644 index 197a7c07a..000000000 --- a/exo/inference/torch/tests/test_split_model.py +++ /dev/null @@ -1,214 +0,0 @@ -""" -Testing of loading model by layer -""" -import asyncio -import re -import json -import os -from pathlib import Path -from typing import Optional - -import torch - -from exo.download.hf.hf_helpers import get_weight_map -from exo.download.hf.hf_shard_download import HFShardDownloader -from exo.inference.shard import Shard -from exo.inference.torch.utils import print_cuda_vram_stats - -from transformers import AutoModelForCausalLM, AutoTokenizer - -def load_model( - shard: Shard, - model_path: Path, - weight_map: Optional[dict], - device: Optional[torch.device] = torch.device("cpu") -) -> Optional[AutoModelForCausalLM]: - """ - load model by layer and safetensors - return causal llm automodel with only requested layers, if weight maps - if no weight map, return and load the whole model - """ - print("load_model called") - model_st_snapshot = model_path/"model.safetensors.index.json" - - if os.environ.get("TORCH_DEVICE"): - device = torch.device(os.environ["TORCH_DEVICE"]) - elif torch.cuda.is_available(): - device = torch.device("cuda") - elif torch.backends.mps.is_available() and torch.backends.mps.is_built(): - device = torch.device("mps") - - torch.set_default_device(device) - - # setup cude dtype - dtype = torch.get_default_dtype() - - # setup device_map - if os.environ.get("TORCH_DEVICE_MAP"): - device_map = os.environ["TORCH_DEVICE_MAP"] - else: - device_map = str(device) - - if weight_map: - layer_weight_map = {} - non_layer_weights = [] - - for wname, wtensor in weight_map.items(): - # get layer number - layer_rgx = r'^model\.layers\.(\d+)\.*' - layer_found = re.findall(layer_rgx, wname) - print(f"wname: {wname}") - if layer_found: - print(f"layer_found: {layer_found}") - # slice up layer map to start and end layers - # from shard - layer_idx = int(layer_found[0]) - if shard.start_layer <= layer_idx <= shard.end_layer: - layer_weight_map[wname] = wtensor - else: - non_layer_weights.append((wname, wtensor)) - - non_layer_weights = sorted(non_layer_weights, key=lambda x: x[1]) - - print(f"sorted non_layer_weights: {non_layer_weights}") - - if shard.is_first_layer(): - # this assumes at max only one first weight non-layer for model - first_weight = non_layer_weights[0] - layer_weight_map[first_weight[0]] = first_weight[1] - elif shard.is_last_layer(): - last_weights = non_layer_weights[1:] - for last_weight in last_weights: - layer_weight_map[last_weight[0]] = last_weight[1] - - # rewrite model.safetensors.index.json - try: - mst_json = {} - with open(model_st_snapshot, "r") as mst_file: - mst_json = json.load(mst_file) - mst_json["weight_map"] = layer_weight_map - - print(f"mst_json: {json.dumps(mst_json, indent=4)}") - - os.remove(model_st_snapshot) - - with open(model_st_snapshot, "w") as mst_file: - json.dump(mst_json, mst_file, indent=4) - except Exception as err: - print(f"err: {err}") - raise - - else: - print("weight_map not found, loading whole model") - - # setup the weight range for init_weights - shard_num_hidden_layers = shard.end_layer - shard.start_layer - print(f"Setting up LLM config with {shard_num_hidden_layers} hidden layers") - - # load model with layer edits - # or whole model if no weight_map - print(f"Loading sharded AutoModelForCausalLM from {model_path}") - shard_model = AutoModelForCausalLM.from_pretrained( - pretrained_model_name_or_path=model_path, - device_map=device_map, - torch_dtype=dtype, - offload_buffers=True, - local_files_only=True, - num_hidden_layers=shard_num_hidden_layers - ).to(device) - - print("Loading tokenizer") - tokenizer = AutoTokenizer.from_pretrained( - pretrained_model_name_or_path=model_path, - local_files_only=True, - ) - - if torch.cuda.is_available() and device == "cuda": - print_cuda_vram_stats() - - prompt = "In a single word only, what color is a red apple?" - - model_inputs = tokenizer( - [prompt], - return_tensors="pt" - ) - - generated_ids = shard_model.generate( - model_inputs.input_ids.to(device), - attention_mask=model_inputs.attention_mask.to(device), - max_new_tokens=512, - do_sample=True - ) - - generated_ids = [ - output_ids[len(input_ids):] for input_ids, output_ids in zip( - model_inputs.input_ids, - generated_ids - ) - ] - - response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] - - print(f"Prompt: {prompt}\n") - print(f"Response: {response}\n") - - # have to clear out edited model safetensors mst_json - os.remove(model_st_snapshot) - - return shard_model - -async def test_split_model( - model_id: str, - start_layer: int, - end_layer: int, - n_layers: int -): - """ - Test to load split models - """ - - shard = Shard( - model_id=model_id, - start_layer=start_layer, - end_layer=end_layer-1, - n_layers=n_layers - ) - - print(f"loading shard: {shard}") - shard_downloader = HFShardDownloader() - model_path = await shard_downloader.ensure_shard(shard) - weight_map = await get_weight_map(model_id) - - load_model( - shard, - model_path, - weight_map - ) - -if __name__ == "__main__": - n_layers = int(os.environ["N_LAYERS"]) if os.environ.get("N_LAYERS") else 32 - start_layer = int(os.environ["START_LAYER"]) if os.environ.get("START_LAYER") else 0 - end_layer = int(os.environ["END_LAYER"]) if os.environ.get("END_LAYER") else int(n_layers/2) - #Qwen/Qwen2.5-3B - #try: - # print("\n-------- Test Qwen/Qwen2.5-3B-Instruct ----------\n") - # asyncio.run(test_split_model( - # "Qwen/Qwen2.5-3B-Instruct", - # 0, - # 6, - # 36 - # )) - #except Exception as err: - # print(f"\n\n !!!!!!!!!!! Qwen/Qwen2.5-3B-Instruct TEST FAILED \n{err}\n") - - # unsloth/Meta-Llama-3.1-8B-Instruct - try: - print("\n-------- Test unsloth/Meta-Llama-3.1-8B-Instruct ----------\n") - asyncio.run(test_split_model( - "unsloth/Meta-Llama-3.1-8B-Instruct", - start_layer, - end_layer, - n_layers - )) - except Exception as err: - print(f"\n\n !!!!!!!!!!! meta-llama/Llama-3.1-8B-Instruct TEST FAILED \n{err}\n") From 5f085dc14e470b8fdf464cae18987305aee4243d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 24 Nov 2024 14:48:00 -0900 Subject: [PATCH 505/589] optional caching as cache might not work with how sharding works --- exo/inference/torch/pt_inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index e4f0ec755..02a830507 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -138,7 +138,7 @@ async def ensure_shard(self, shard: Shard): config=model_config, shard=shard, device=self.device, - use_cache=True + use_cache=os.environ.get("TORCH_USE_CACHE") if os.environ.get("TORCH_USE_CACHE") else False ) ) From f66773525ea76c23e4b7845350e24ae5b7ac3a73 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 24 Nov 2024 14:51:47 -0900 Subject: [PATCH 506/589] fix cache assignment --- exo/inference/torch/models/llama3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 08c244edf..117025015 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -277,10 +277,10 @@ def __init__( # pad_id maually set as same in all llama models self.pad_id = 128004 # from <|finetune_right_pad_id|> - if use_cache: + if use_cache is not None: self.use_cache = use_cache else: - self.config.get("use_cache", False) + self.use_cache = self.config.get("use_cache", False) with torch.no_grad(): self.model = LlamaModel(config, self.shard).to(dtype=self.dtype, device=self.device) From cb847e44e98e41515243452bfaea24832118dbec Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 24 Nov 2024 15:35:28 -0900 Subject: [PATCH 507/589] fixing, set cache to false for inference for now --- exo/inference/torch/models/llama3.py | 6 +----- exo/inference/torch/pt_inference.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 117025015..009e512c7 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -273,15 +273,11 @@ def __init__( self.device = device if device is not None else torch.device("cpu") self.max_new_tokens = max_new_tokens self.max_seq_len = self.config["max_seq_len"] + self.use_cache = use_cache # pad_id maually set as same in all llama models self.pad_id = 128004 # from <|finetune_right_pad_id|> - if use_cache is not None: - self.use_cache = use_cache - else: - self.use_cache = self.config.get("use_cache", False) - with torch.no_grad(): self.model = LlamaModel(config, self.shard).to(dtype=self.dtype, device=self.device) diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index 02a830507..702f7f620 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -143,10 +143,14 @@ async def ensure_shard(self, shard: Shard): ) # load sharded weights - load_model_weights_torchtune( - model_path, - shard, - self.sharded_model + await asyncio.get_running_loop().run_in_executor( + self.executor, + functools.partial( + load_model_weights_torchtune, + model_path, + shard, + self.sharded_model + ) ) if DEBUG >= 4: From 83259757b4f4a92e1d3216fc7fe4f32010120017 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 24 Nov 2024 15:38:24 -0900 Subject: [PATCH 508/589] model double loading vram issue --- exo/inference/torch/pt_inference.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index 702f7f620..3f926a1c6 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -119,6 +119,12 @@ def infer_wrapper(): return await asyncio.get_running_loop().run_in_executor(self.executor, infer_wrapper) async def ensure_shard(self, shard: Shard): + if DEBUG >= 4: + print("shard ensured\n") + print(f"model_path: {model_path}") + print(f"shard: {shard}") + print(f"class shard: {self.shard}") + if self.shard == shard: return @@ -152,9 +158,3 @@ async def ensure_shard(self, shard: Shard): self.sharded_model ) ) - - if DEBUG >= 4: - print("shard ensured\n") - print(f"model_path: {model_path}") - print(f"shard: {shard}") - print(f"model: {self.sharded_model}") From c8308b80aa60eb21c4f8c8a44c1d7818572f692d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 24 Nov 2024 15:39:18 -0900 Subject: [PATCH 509/589] model double loading vram issue --- exo/inference/torch/pt_inference.py | 1 - 1 file changed, 1 deletion(-) diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index 3f926a1c6..56ca6cc6e 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -121,7 +121,6 @@ def infer_wrapper(): async def ensure_shard(self, shard: Shard): if DEBUG >= 4: print("shard ensured\n") - print(f"model_path: {model_path}") print(f"shard: {shard}") print(f"class shard: {self.shard}") From 2dfce955d8f68fee4efe87397aab52f85004cf99 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 24 Nov 2024 15:41:17 -0900 Subject: [PATCH 510/589] setting class shard --- exo/inference/torch/pt_inference.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index 56ca6cc6e..9a021f25f 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -126,6 +126,8 @@ async def ensure_shard(self, shard: Shard): if self.shard == shard: return + + self.shard = shard # download model safetensors and shard model_path = await self.shard_downloader.ensure_shard( From 39cfbf51d38001a9b80859de4f3828c964078546 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Tue, 26 Nov 2024 11:04:25 -0900 Subject: [PATCH 511/589] working on inference engine issue of too much vram --- exo/inference/torch/models/llama3.py | 17 ++++++++++------- exo/inference/torch/models/llm_utils.py | 6 +++--- exo/inference/torch/pt_inference.py | 4 ++-- exo/inference/torch/tests/test_llama3_full.py | 5 ++--- exo/inference/torch/tests/test_llama3_split.py | 4 ++-- .../torch/tests/test_pt_inference_engine.py | 2 +- exo/models.py | 2 +- 7 files changed, 21 insertions(+), 19 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 009e512c7..e086f0c39 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -132,13 +132,16 @@ def forward( # Process through each transformer layer with torch.no_grad(): - h = layer( - h, - # mask=mask, - # encoder_input=encoder_input, - # encoder_mask=encoder_mask, - # input_pos=input_pos, - ) + if self.layers[0].caches_are_enabled(): + h = layer( + h, + mask=mask, + encoder_input=encoder_input, + encoder_mask=encoder_mask, + input_pos=input_pos, + ) + else: + h = layer(h) if i in self.output_hidden_states: hidden.append(h) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 7bbe44a73..b24b49504 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -169,9 +169,9 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): # print(f"model.output.weight: {paried_embed_weight}") # remapped_state_dict["model.output.weight"] = paried_embed_weight - # print("\nRemapped state dict\n") - # for rsdk in remapped_state_dict.keys(): - # print(f"-- {rsdk}") + print("\nRemapped state dict\n") + for rsdk in remapped_state_dict.keys(): + print(f"-- {rsdk}") model.load_state_dict(remapped_state_dict, strict=False) # if DEBUG >= 7: diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index 9a021f25f..51ab943b1 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -110,10 +110,10 @@ def infer_wrapper(): ) if model_hs is not None: - model_hs = model_hs.detach().cpu() + # model_hs = model_hs.detach().cpu() return model_hs.numpy(force=True) - model_logits = model_logits.detach().cpu() + # model_logits = model_logits.detach().cpu() return model_logits.numpy(force=True) return await asyncio.get_running_loop().run_in_executor(self.executor, infer_wrapper) diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index 860a6855d..84bec1cf9 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -111,12 +111,11 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" llama_tokenizer = llama3.llama3_tokenizer(path=llama_tokenizer_path) # Initialize LlamaModel with config and tokenizer - # device = torch.device("cuda") - device = None + device = torch.device("cuda") shard_model_1 = ShardedLlamaModel( config=config, shard=shard_1, - device=None, + device=device, max_new_tokens=MAX_NEW_TOKENS, use_cache=True ) diff --git a/exo/inference/torch/tests/test_llama3_split.py b/exo/inference/torch/tests/test_llama3_split.py index e9f596a8c..98f58d970 100644 --- a/exo/inference/torch/tests/test_llama3_split.py +++ b/exo/inference/torch/tests/test_llama3_split.py @@ -112,7 +112,7 @@ def test_generation_2(shard_model, in_tokens, hidden_state): shard_model_1 = ShardedLlamaModel( config=config, shard=shard_1, - device=None, + device=torch.device("cuda"), max_new_tokens=MAX_NEW_TOKENS, use_cache=True ) @@ -123,7 +123,7 @@ def test_generation_2(shard_model, in_tokens, hidden_state): shard_model_2 = ShardedLlamaModel( config=config, shard=shard_2, - device=None, + device=torch.device("cuda"), max_new_tokens=MAX_NEW_TOKENS, use_cache=True ) diff --git a/exo/inference/torch/tests/test_pt_inference_engine.py b/exo/inference/torch/tests/test_pt_inference_engine.py index abd5f9ea3..02c3ad4de 100644 --- a/exo/inference/torch/tests/test_pt_inference_engine.py +++ b/exo/inference/torch/tests/test_pt_inference_engine.py @@ -18,7 +18,7 @@ async def test_inference_engine(): shard = Shard( model_id="llama-3.2-1b", start_layer=0, - end_layer=15, + end_layer=8, n_layers=16 ) diff --git a/exo/models.py b/exo/models.py index 60525357b..22bc4f6f4 100644 --- a/exo/models.py +++ b/exo/models.py @@ -8,7 +8,7 @@ "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-1B-Instruct-4bit", "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-1B-Instruct", - "TorchDynamicShardInferenceEngine": "unsloth/Llama-3.2-1B-Instruct" + "TorchDynamicShardInferenceEngine": "meta-llama/Llama-3.2-1B-Instruct" }, }, "llama-3.2-3b": { From 185502afb9c7a6d88844903c5c338a634c5c4f1a Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 27 Nov 2024 15:45:39 -0900 Subject: [PATCH 512/589] fixing vram issue from total_response_length being set with max_seq_len and not max_new_tokens --- exo/inference/torch/models/llama3.py | 2 +- exo/inference/torch/pt_inference.py | 6 +++++- exo/inference/torch/tests/test_pt_inference_engine.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index e086f0c39..bb84d786b 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -304,7 +304,7 @@ def generate( bsz, tokens_length = tokens.size() - total_response_length = tokens_length + self.max_seq_len + total_response_length = tokens_length + self.max_new_tokens # setup cache if not self.model.caches_are_enabled() and self.use_cache: diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index 51ab943b1..4f813eda4 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -104,9 +104,13 @@ async def infer_tensor( self.request_id = request_id if not self.request_id else self.request_id def infer_wrapper(): + hidden_state = None + if input_data.ndim == 3: + hidden_state = torch.tensor(input_data).to(self.device) + model_hs, model_logits = self.sharded_model.generate( tokens=torch.tensor(self.past_tokens).to(self.device), - hidden_state=torch.tensor(input_data).to(self.device) + hidden_state=hidden_state ) if model_hs is not None: diff --git a/exo/inference/torch/tests/test_pt_inference_engine.py b/exo/inference/torch/tests/test_pt_inference_engine.py index 02c3ad4de..6d057ab83 100644 --- a/exo/inference/torch/tests/test_pt_inference_engine.py +++ b/exo/inference/torch/tests/test_pt_inference_engine.py @@ -18,7 +18,7 @@ async def test_inference_engine(): shard = Shard( model_id="llama-3.2-1b", start_layer=0, - end_layer=8, + end_layer=1, n_layers=16 ) From 738e931a706c5b3a708b27933e6c0e4e3f1f5943 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 29 Nov 2024 06:37:37 -0900 Subject: [PATCH 513/589] fixing split and full model generation, finetune for nodes and generation, working on exo inference engine --- exo/inference/torch/models/llama3.py | 148 +++++++++--------- exo/inference/torch/models/llm_utils.py | 15 +- exo/inference/torch/pt_inference.py | 46 ++++-- exo/inference/torch/tests/test_llama3_full.py | 39 +++-- .../torch/tests/test_llama3_split.py | 100 +++++++----- 5 files changed, 210 insertions(+), 138 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index bb84d786b..89154fa71 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -96,8 +96,6 @@ def forward( tokens: torch.Tensor, *, mask: Optional[_MaskType] = None, - encoder_input: Optional[torch.Tensor] = None, - encoder_mask: Optional[torch.Tensor] = None, input_pos: Optional[torch.Tensor] = None, ) -> Union[torch.Tensor, List[torch.Tensor]]: # Determine the type of input and shape @@ -116,8 +114,6 @@ def forward( self._validate_inputs( seq_len, mask=mask, - encoder_input=encoder_input, - encoder_mask=encoder_mask, input_pos=input_pos, ) @@ -128,24 +124,27 @@ def forward( for i in range(self.shard.start_layer, self.shard.end_layer + 1): layer = self.layers[i] - print(f"\nhidden layer in H[{i}]\n{h}\nmask\n{mask}\ninput_pos\n{input_pos}\n{self.output_hidden_states}\n") + + if DEBUG >= 8: + print(f"\nhidden layer in H[{i}]\n{h}") + print(f"\nmask\n{mask}\ninput_pos\n{input_pos}") + print(f"\noutput_hidden_states\n{self.output_hidden_states}\n") # Process through each transformer layer with torch.no_grad(): - if self.layers[0].caches_are_enabled(): - h = layer( + if self.layers[self.shard.start_layer].caches_are_enabled(): + h = layer( h, mask=mask, - encoder_input=encoder_input, - encoder_mask=encoder_mask, input_pos=input_pos, ) else: h = layer(h) - if i in self.output_hidden_states: - hidden.append(h) + if i in self.output_hidden_states: + hidden.append(h) + if DEBUG >= 8: print(f"\nhidden layer out H[{i}]->H[{i + 1}]\n{h}\n") # Apply normalization @@ -159,7 +158,10 @@ def forward( # Return list if hidden states are requested output = [hidden[-1], output] if hidden else output - print(f"\n\noutput {output}\n\n") + + if DEBUG >= 4: + print(f"\n\noutput {output}\n\n") + return output @@ -289,7 +291,7 @@ def __init__( def generate( self, - tokens: torch.Tensor, + tokens: Optional[torch.Tensor] = None, hidden_state: Optional[torch.Tensor] = None ) -> Tuple[Optional[torch.Tensor], torch.Tensor]: """ @@ -299,80 +301,82 @@ def generate( tokens (torch.Tensor) - tokens from prompt tokenization and generation hidden_state (torch.Tensor, optional) - hidden state from last activated hidden layer, if any """ - if tokens.ndim == 1: - tokens = tokens.view(1, -1).to(device=self.device) + curr_masks = None + input_pos = None - bsz, tokens_length = tokens.size() + if tokens is not None: + if tokens.ndim == 1: + tokens = tokens.view(1, -1).to(device=self.device) - total_response_length = tokens_length + self.max_new_tokens + bsz, tokens_length = tokens.size() - # setup cache - if not self.model.caches_are_enabled() and self.use_cache: - with self.device: - self.model.setup_caches( - bsz, - self.dtype, - decoder_max_seq_len=tokens.numel() + self.max_new_tokens - ) + # using self.max_seq_len will take up alot of VRAM + total_response_length = tokens_length + self.max_seq_len - if not self.shard.is_last_layer(): - self.model.output_hidden_states = [self.shard.end_layer] + # setup cache + if not self.model.caches_are_enabled() and self.use_cache: + with self.device: + self.model.setup_caches( + bsz, + self.dtype, + decoder_max_seq_len=tokens.numel() + self.max_seq_len + ) - resp_max_seq_len = total_response_length if not self.model.caches_are_enabled() else self.model.decoder_max_cache_seq_len + if not self.shard.is_last_layer(): + self.model.output_hidden_states = [self.shard.end_layer] - # clone tokens - generated_tokens = tokens.clone().to(device=self.device) + resp_max_seq_len = total_response_length if not self.model.caches_are_enabled() else self.model.decoder_max_cache_seq_len - # masking for proper attention - padding_masks = generated_tokens != self.pad_id - if not padding_masks.all(): - padding_masks = torch.nn.functional.pad(padding_masks, (0, self.max_seq_len), value=True) + # clone tokens + generated_tokens = tokens.clone().to(device=self.device) - masks = ttg.get_causal_mask_from_padding_mask(padding_masks, target_seq_len=resp_max_seq_len) + # masking for proper attention + padding_masks = generated_tokens != self.pad_id + if not padding_masks.all(): + padding_masks = torch.nn.functional.pad(padding_masks, (0, self.max_seq_len), value=True) - input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) - else: - masks = torch.tril( - torch.ones( - total_response_length, - resp_max_seq_len if resp_max_seq_len is not None else total_response_length, - dtype=torch.bool, - device=self.device, - ) - ).unsqueeze(0) + masks = ttg.get_causal_mask_from_padding_mask(padding_masks, target_seq_len=resp_max_seq_len) - input_pos = torch.arange( - 0, - total_response_length, - device=self.device - ).unsqueeze(0) + input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) + else: + masks = torch.tril( + torch.ones( + total_response_length, + resp_max_seq_len if resp_max_seq_len is not None else total_response_length, + dtype=torch.bool, + device=self.device, + ) + ).unsqueeze(0) - if self.model.caches_are_enabled(): - curr_masks = masks[:, :tokens_length] - else: - curr_masks = masks[:, :tokens_length, :tokens_length] + input_pos = torch.arange( + 0, + total_response_length, + device=self.device + ).unsqueeze(0) + + if self.model.caches_are_enabled(): + curr_masks = masks[:, :tokens_length] + else: + curr_masks = masks[:, :tokens_length, :tokens_length] - input_pos = input_pos[:, :tokens_length].squeeze() + input_pos = input_pos[:, :tokens_length].squeeze() if DEBUG >= 4: print("model_input") - print(f"tokens: {tokens} - {tokens.device}") - print(f"mask: {curr_masks} - {curr_masks.device}") - print(f"input_pos: {input_pos} - {input_pos.device}") - - if hidden_state is not None: - print(f"hidden_state: {hidden_state} - {hidden_state.device}") - model_output = self.model( - tokens=hidden_state, - mask=curr_masks, - input_pos=input_pos, - ) - else: - model_output = self.model( - tokens=tokens, - mask=curr_masks, - input_pos=input_pos, - ) + if tokens is not None: + print(f"tokens: {tokens} - {tokens.device}") + print(f"mask: {curr_masks} - {curr_masks.device}") + print(f"input_pos: {input_pos} - {input_pos.device}") + + if hidden_state is not None: + print(f"hidden_state: {hidden_state} - {hidden_state.device}") + + + model_output = self.model( + tokens=hidden_state if hidden_state is not None else tokens, + mask=curr_masks, + input_pos=input_pos, + ) if DEBUG >= 4: print(f"model_output\n{model_output}") diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index b24b49504..5abb1428f 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -66,6 +66,12 @@ def load_model_config(model_config_path: Path) -> dict: "hidden_act": base_config.get("hidden_act", "silu") } + # the current max_position_embeddings requires a lot VRAM + # as it is over 13,000. Will require some logic to test if it + # exo can fit in the larger seq len + if model_config.get("rope_scaling", None) is not None: + model_config["max_seq_len"] = model_config["rope_scaling"]["original_max_position_embeddings"] + return model_config @@ -169,15 +175,16 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): # print(f"model.output.weight: {paried_embed_weight}") # remapped_state_dict["model.output.weight"] = paried_embed_weight - print("\nRemapped state dict\n") - for rsdk in remapped_state_dict.keys(): - print(f"-- {rsdk}") + if DEBUG >= 4: + print("\nRemapped state dict\n") + for rsdk in remapped_state_dict.keys(): + print(f"-- {rsdk}") model.load_state_dict(remapped_state_dict, strict=False) # if DEBUG >= 7: # print("\n--- checking weights ----\n") # print(f"\nremapped_state_dict: {remapped_state_dict.keys()}\n") - check_weights(model, remapped_state_dict) + # check_weights(model, remapped_state_dict) class MultiLayerPreceptron(nn.Module): diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index 4f813eda4..13d4d6e70 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -10,6 +10,7 @@ import asyncio import torch from torchtune.generation import sample as tt_sample +from torchtune.models import llama3 from exo.inference.inference_engine import InferenceEngine from exo.download.hf.hf_shard_download import HFShardDownloader @@ -33,7 +34,8 @@ def __init__(self, shard_downloader: HFShardDownloader): self.shard_downloader = shard_downloader self.request_id = None self.executor = ThreadPoolExecutor(max_workers=1) - self.past_tokens = [] + self.past_tokens = None + self.use_llama_tokenizer = os.environ.get("USE_LLAMA_TOKENIZER", False) # device settings if os.environ.get("TORCH_DEVICE"): @@ -56,16 +58,16 @@ async def encode(self, shard: Shard, prompt: str) -> np.ndarray: self.executor, functools.partial( self.tokenizer.encode, - prompt, - return_tensors="np" + prompt ) ) + if isinstance(tokens, list): + tokens = torch.tensor(tokens).to(device=self.device) + if DEBUG >= 4: print(f"tokens: {tokens}") - self.past_tokens = tokens.tolist() - return tokens async def decode(self, shard: Shard, tokens: np.ndarray) -> str: @@ -99,17 +101,29 @@ async def infer_tensor( input_data: np.ndarray, ) -> np.ndarray: # ensure shard + if DEBUG >= 4: + print("infer_tensor called") + print(f"shard: {shard}") + print(f"input_data: {input_data}") + print(f"self.past_tokens: {self.past_tokens}") await self.ensure_shard(shard) self.request_id = request_id if not self.request_id else self.request_id - def infer_wrapper(): - hidden_state = None - if input_data.ndim == 3: - hidden_state = torch.tensor(input_data).to(self.device) + hidden_state = None + if input_data.shape == (1, 1): + input_data = torch.tensor(input_data).to(self.device) + + if self.past_tokens is not None: + self.past_tokens = torch.cat((self.past_tokens, input_data), dim=-1).to(self.device) + else: + self.past_tokens = input_data.clone() + elif input_data.ndim == 3: + hidden_state = torch.tensor(input_data).to(self.device) + def infer_wrapper(): model_hs, model_logits = self.sharded_model.generate( - tokens=torch.tensor(self.past_tokens).to(self.device), + tokens=self.past_tokens if hidden_state is not None else None, hidden_state=hidden_state ) @@ -118,7 +132,8 @@ def infer_wrapper(): return model_hs.numpy(force=True) # model_logits = model_logits.detach().cpu() - return model_logits.numpy(force=True) + token = self.sample(model_logits, TEMP, TOP_K) + return token.numpy(force=True) return await asyncio.get_running_loop().run_in_executor(self.executor, infer_wrapper) @@ -140,7 +155,12 @@ async def ensure_shard(self, shard: Shard): ) model_config = load_model_config(model_path / "config.json") - self.tokenizer = await _resolve_tokenizer(model_path) + # self.tokenizer = await _resolve_tokenizer(model_path) + if self.use_llama_tokenizer: + llama_tokenizer_path = f"{model_path}/original/tokenizer.model" + self.tokenizer = llama3.llama3_tokenizer(path=llama_tokenizer_path) + else: + self.tokenizer = await _resolve_tokenizer(model_path) self.sharded_model = await asyncio.get_running_loop().run_in_executor( self.executor, @@ -149,7 +169,7 @@ async def ensure_shard(self, shard: Shard): config=model_config, shard=shard, device=self.device, - use_cache=os.environ.get("TORCH_USE_CACHE") if os.environ.get("TORCH_USE_CACHE") else False + use_cache=False ) ) diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index 84bec1cf9..ec2fc4396 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -24,7 +24,7 @@ MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" TEMP = 0.6 TOP_K = 25 -MAX_NEW_TOKENS = 10 +MAX_NEW_TOKENS = 2 def main(model, prompt: str, device: torch.device=torch.device("cpu")): # Tokenize input text @@ -38,17 +38,33 @@ def main(model, prompt: str, device: torch.device=torch.device("cpu")): tokenizer_out = llama_tokenizer({"messages": messages}, inference=True) print(f"tokenizer_out: {tokenizer_out}") - tokens = torch.tensor(tokenizer_out["tokens"], dtype=torch.int, device=device) + tokens = torch.tensor([tokenizer_out["tokens"]], dtype=torch.int, device=device) + generated_tokens = tokens.clone() - _, logits = model.generate(tokens=tokens) + print(f"tokens: {tokens}") - tokens = ttg.sample(logits=logits[:, -1].clone(), temperature=TEMP, top_k=TOP_K) + for i in range(MAX_NEW_TOKENS): + print(f"gen #{i}") + _, logits = model.generate(tokens=tokens) - print(f"tokens: {tokens}") + tokens = ttg.sample(logits=logits[:, -1].clone(), temperature=TEMP, top_k=TOP_K) - generated_tokens = tokens.clone().tolist() - print(f"generated_tokens: {generated_tokens}") - print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(generated_tokens[0])}\n\n\n") + print(f"tokens: {tokens}") + + if tokens.item() in llama_tokenizer.stop_tokens or tokens.item() == llama_tokenizer.eos_id: + print("stop token hit!") + break + + generated_tokens = torch.cat([generated_tokens, tokens], dim=-1) + print(f"generated_tokens: {generated_tokens}") + + tokens = generated_tokens.clone() + + + + + + print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(generated_tokens.tolist()[0])}\n\n\n") def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu")): @@ -86,7 +102,8 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" if __name__ == "__main__": # prompt = "hello" - prompt = "What is the capital of france?" + prompt = "In a single word only, What is the capital of france?" + # prompt = "In a single word only, what is the last name of the current president of the USA?" # Get the path to the model files from the Hugging Face cache cache_dir = Path(snapshot_download(MODEL_NAME)) @@ -123,5 +140,5 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) - # main(shard_model_1, prompt, device) - normal_full(shard_model_1, prompt, device) + main(shard_model_1, prompt, device) + # normal_full(shard_model_1, prompt, device) diff --git a/exo/inference/torch/tests/test_llama3_split.py b/exo/inference/torch/tests/test_llama3_split.py index 98f58d970..6544d4419 100644 --- a/exo/inference/torch/tests/test_llama3_split.py +++ b/exo/inference/torch/tests/test_llama3_split.py @@ -22,42 +22,29 @@ MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" TEMP = 0.6 -TOP_K = 25 +TOP_K = 35 MAX_NEW_TOKENS=10 -def test_generation_1(shard_model, prompt): +def test_generation_1(shard_model, tokens): """ Test the generation capabilities of the LlamaModel with sample text. """ - # Tokenize input text - messages = [] - messages.extend([ - Message(role="system", content="You are a helpful and creative AI assistant."), - Message(role="user", content=prompt), - # Empty assistant message to kick-start generation - Message(role="assistant", content=""), - ]) - - print(f"last?: {shard_model.shard.is_last_layer()}") - tokenizer_out = llama_tokenizer({"messages": messages}, inference=True) - print(f"tokenizer_out: {tokenizer_out}") - tokens = torch.tensor(tokenizer_out["tokens"], dtype=torch.int) hidden_states, _ = shard_model.generate(tokens) if hidden_states is not None: print(f"hidden_states[{len(hidden_states)}]: {hidden_states}") - return hidden_states, tokens - + return hidden_states -def test_generation_2(shard_model, in_tokens, hidden_state): +def test_generation_2(shard_model, hidden_state): print("Generate with the rest of layers") - hidden_states, logits = shard_model.generate(tokens=in_tokens, hidden_state=hidden_state) - - if hidden_states is not None: - print(f"hidden_states {hidden_states.shape}: {hidden_states}") + print(f"in hidden_states {hidden_state.shape}: {hidden_state}") + + _, logits = shard_model.generate( + hidden_state=hidden_state + ) if logits is not None: print(f"logits: {logits.shape}\n{logits}") @@ -78,18 +65,13 @@ def test_generation_2(shard_model, in_tokens, hidden_state): print(f"tokens: {tokens}") - generated_tokens = tokens.clone() - generated_tokens = generated_tokens.tolist() - - print(f"generated_tokens: {generated_tokens}") - - print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(generated_tokens[0])}\n\n\n") - + return tokens if __name__ == "__main__": print("\nTesting generation:") - prompt = "Hello, just say 'Hello' back nothing else" + # prompt = "In a single word only, what is the last name of the current president of the USA?" + prompt = "In a single word only, what is the capital of france?" # Get the path to the model files from the Hugging Face cache cache_dir = Path(snapshot_download(MODEL_NAME)) @@ -109,24 +91,66 @@ def test_generation_2(shard_model, in_tokens, hidden_state): llama_tokenizer = llama3.llama3_tokenizer(path=llama_tokenizer_path) # Initialize LlamaModel with config and tokenizer + device = torch.device("cuda") shard_model_1 = ShardedLlamaModel( config=config, shard=shard_1, - device=torch.device("cuda"), + device=device, max_new_tokens=MAX_NEW_TOKENS, - use_cache=True + use_cache=False ) - print(f"\nshard_model_1: {shard_model_1}") + load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) - shard_1_hs, shard_1_tokens = test_generation_1(shard_model_1, prompt) shard_model_2 = ShardedLlamaModel( config=config, shard=shard_2, - device=torch.device("cuda"), + device=device, max_new_tokens=MAX_NEW_TOKENS, - use_cache=True + use_cache=False ) - print(f"\nshard_model_2: {shard_model_2}") + load_model_weights_torchtune(cache_dir, shard_2, shard_model_2) - test_generation_2(shard_model_2, shard_1_tokens, shard_1_hs) + + # Tokenize input text + messages = [] + messages.extend([ + Message(role="system", content="You are a helpful and creative AI assistant."), + Message(role="user", content=prompt), + # Empty assistant message to kick-start generation + Message(role="assistant", content=""), + ]) + + tokenizer_out = llama_tokenizer({"messages": messages}, inference=True) + print(f"tokenizer_out: {tokenizer_out}") + tokens = torch.tensor([tokenizer_out["tokens"]], dtype=torch.int).to(device=device) + + generated_tokens = tokens.clone().to(device=device) + + for i in range(MAX_NEW_TOKENS): + print(f"--------- gen #{i} ----------") + print(f"\n------------ {shard_1.start_layer} - {shard_1.end_layer} ----------\n") + + shard_1_hs = test_generation_1( + shard_model=shard_model_1, + tokens=tokens + ) + + print(f"\n out shard_1_hs {shard_1_hs}") + + print(f"\n------------ {shard_2.start_layer} - {shard_2.end_layer} ----------\n") + + tg2_token = test_generation_2(shard_model_2, shard_1_hs) + + if (tg2_token in llama_tokenizer.stop_tokens + or tg2_token == llama_tokenizer.eos_id): + print("hit stop token") + break + + generated_tokens = torch.cat([generated_tokens, tg2_token], dim=-1) + print(f"\ngenerated_tokens: {generated_tokens}") + + tokens = generated_tokens.clone() + +print("\n\n[resp from model]\n\n") +print(f"{llama_tokenizer.decode(generated_tokens.tolist()[0])}") From 907ba0b36669f24d3ddb858e6c61b22b8238765c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 30 Nov 2024 02:46:11 -0900 Subject: [PATCH 514/589] updated full test to generate to stop or max tokens for testing, updating split model test, trying to get inference engine to hit stop token --- exo/inference/torch/models/llama3.py | 7 +- exo/inference/torch/pt_inference.py | 71 ++++++++++++------- exo/inference/torch/tests/test_llama3_full.py | 46 +++++++----- .../torch/tests/test_llama3_split.py | 2 - 4 files changed, 76 insertions(+), 50 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 89154fa71..810e8b9b2 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -267,7 +267,6 @@ def __init__( config: dict, shard: Shard, device: Optional[torch.device] = None, - max_new_tokens: int = 10, use_cache: Optional[bool] = False ): super(ShardedLlamaModel, self).__init__() @@ -276,7 +275,6 @@ def __init__( self.config = config self.dtype = torch.float16 self.device = device if device is not None else torch.device("cpu") - self.max_new_tokens = max_new_tokens self.max_seq_len = self.config["max_seq_len"] self.use_cache = use_cache @@ -301,6 +299,11 @@ def generate( tokens (torch.Tensor) - tokens from prompt tokenization and generation hidden_state (torch.Tensor, optional) - hidden state from last activated hidden layer, if any """ + if DEBUG >= 4: + print("generate called") + print(f"tokens: {tokens}") + print(f"hidden_state: {hidden_state}") + curr_masks = None input_pos = None diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index 13d4d6e70..660c49fd3 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -35,7 +35,6 @@ def __init__(self, shard_downloader: HFShardDownloader): self.request_id = None self.executor = ThreadPoolExecutor(max_workers=1) self.past_tokens = None - self.use_llama_tokenizer = os.environ.get("USE_LLAMA_TOKENIZER", False) # device settings if os.environ.get("TORCH_DEVICE"): @@ -50,7 +49,8 @@ def __init__(self, shard_downloader: HFShardDownloader): async def encode(self, shard: Shard, prompt: str) -> np.ndarray: if DEBUG >= 4: print("encode called") - print(f"shard: {shard}\nprompt: {prompt}") + print(f"shard: {shard}") + print(f"prompt: {prompt}") await self.ensure_shard(shard) @@ -63,7 +63,7 @@ async def encode(self, shard: Shard, prompt: str) -> np.ndarray: ) if isinstance(tokens, list): - tokens = torch.tensor(tokens).to(device=self.device) + tokens = torch.tensor([tokens]).to(device=self.device) if DEBUG >= 4: print(f"tokens: {tokens}") @@ -71,7 +71,13 @@ async def encode(self, shard: Shard, prompt: str) -> np.ndarray: return tokens async def decode(self, shard: Shard, tokens: np.ndarray) -> str: + if DEBUG >= 4: + print("decode called") + print(f"shard: {shard}") + print(f"tokens: {tokens}") + await self.ensure_shard(shard) + return await asyncio.get_running_loop().run_in_executor( self.executor, functools.partial( @@ -81,13 +87,24 @@ async def decode(self, shard: Shard, tokens: np.ndarray) -> str: ) async def sample(self, x: np.ndarray, temp=TEMP, top_k=TOP_K) -> np.ndarray: - logits = x[:, -1] + if DEBUG >= 4: + print("sample called") + print(f"x: {x}") + + logits = torch.tensor(x).to(self.device) def sample_wrapper(): - return tt_sample( - torch.tensor(logits), + tokens = tt_sample( + logits, temperature=temp, top_k=top_k - ).numpy(force=True) + ) + + if self.past_tokens is not None: + self.past_tokens = torch.cat([self.past_tokens, tokens], dim=-1).to(self.device) + else: + self.past_tokens = tokens.clone() + + return tokens.numpy(force=True) return await asyncio.get_running_loop().run_in_executor( self.executor, @@ -111,29 +128,33 @@ async def infer_tensor( self.request_id = request_id if not self.request_id else self.request_id hidden_state = None - if input_data.shape == (1, 1): - input_data = torch.tensor(input_data).to(self.device) - - if self.past_tokens is not None: - self.past_tokens = torch.cat((self.past_tokens, input_data), dim=-1).to(self.device) - else: - self.past_tokens = input_data.clone() - elif input_data.ndim == 3: + if input_data.ndim == 3: hidden_state = torch.tensor(input_data).to(self.device) + else: + input_data = torch.tensor(input_data).to(self.device) def infer_wrapper(): - model_hs, model_logits = self.sharded_model.generate( - tokens=self.past_tokens if hidden_state is not None else None, - hidden_state=hidden_state - ) + if DEBUG >= 4: + print("infer_wrapper called") + print(f"self.past_tokens: {self.past_tokens}") + print(f"hidden_state: {hidden_state}") + + if hidden_state is not None: + model_hs, model_logits = self.sharded_model.generate( + hidden_state=hidden_state + ) + else: + model_hs, model_logits = self.sharded_model.generate( + tokens=self.past_tokens if self.past_tokens is not None else input_data + ) if model_hs is not None: # model_hs = model_hs.detach().cpu() return model_hs.numpy(force=True) # model_logits = model_logits.detach().cpu() - token = self.sample(model_logits, TEMP, TOP_K) - return token.numpy(force=True) + # token = await self.sample(model_logits, TEMP, TOP_K) + return model_logits[:, -1].numpy(force=True) return await asyncio.get_running_loop().run_in_executor(self.executor, infer_wrapper) @@ -156,11 +177,7 @@ async def ensure_shard(self, shard: Shard): model_config = load_model_config(model_path / "config.json") # self.tokenizer = await _resolve_tokenizer(model_path) - if self.use_llama_tokenizer: - llama_tokenizer_path = f"{model_path}/original/tokenizer.model" - self.tokenizer = llama3.llama3_tokenizer(path=llama_tokenizer_path) - else: - self.tokenizer = await _resolve_tokenizer(model_path) + self.tokenizer = await _resolve_tokenizer(model_path) self.sharded_model = await asyncio.get_running_loop().run_in_executor( self.executor, @@ -169,7 +186,7 @@ async def ensure_shard(self, shard: Shard): config=model_config, shard=shard, device=self.device, - use_cache=False + use_cache=True ) ) diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index ec2fc4396..e8ad9dcd9 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -11,6 +11,7 @@ from torchtune.models import llama3 from torchtune.data import Message +from transformers import AutoTokenizer from exo.inference.torch.models.llama3 import ShardedLlamaModel from exo.inference.shard import Shard @@ -28,17 +29,29 @@ def main(model, prompt: str, device: torch.device=torch.device("cpu")): # Tokenize input text - messages = [] - messages.extend([ - Message(role="system", content="You are a helpful and creative AI assistant."), - Message(role="user", content=prompt), - # Empty assistant message to kick-start generation - Message(role="assistant", content=""), - ]) + # messages = [] + # messages.extend([ + # Message(role="system", content="You are a helpful and creative AI assistant."), + # Message(role="user", content=prompt), + # # Empty assistant message to kick-start generation + # Message(role="assistant", content=""), + # ]) + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": prompt} + ] + + text = llama_tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) + tok_out = llama_tokenizer([text], return_tensors="pt") + tokens = tok_out.input_ids.to(device=device) - tokenizer_out = llama_tokenizer({"messages": messages}, inference=True) - print(f"tokenizer_out: {tokenizer_out}") - tokens = torch.tensor([tokenizer_out["tokens"]], dtype=torch.int, device=device) + # tokenizer_out = llama_tokenizer({"messages": messages}, inference=True) + # print(f"tokenizer_out: {tokenizer_out}") + # tokens = torch.tensor([tokenizer_out["tokens"]], dtype=torch.int, device=device) generated_tokens = tokens.clone() print(f"tokens: {tokens}") @@ -51,7 +64,7 @@ def main(model, prompt: str, device: torch.device=torch.device("cpu")): print(f"tokens: {tokens}") - if tokens.item() in llama_tokenizer.stop_tokens or tokens.item() == llama_tokenizer.eos_id: + if tokens.item() == llama_tokenizer.eos_token_id: print("stop token hit!") break @@ -59,10 +72,6 @@ def main(model, prompt: str, device: torch.device=torch.device("cpu")): print(f"generated_tokens: {generated_tokens}") tokens = generated_tokens.clone() - - - - print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(generated_tokens.tolist()[0])}\n\n\n") @@ -99,7 +108,6 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(generated_tokens[0])}\n\n\n") - if __name__ == "__main__": # prompt = "hello" prompt = "In a single word only, What is the capital of france?" @@ -124,8 +132,9 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" ) # Initialize tokenizer - llama_tokenizer_path = f"{cache_dir}/original/tokenizer.model" - llama_tokenizer = llama3.llama3_tokenizer(path=llama_tokenizer_path) + # llama_tokenizer_path = f"{cache_dir}/original/tokenizer.model" + # llama_tokenizer = llama3.llama3_tokenizer(path=llama_tokenizer_path) + llama_tokenizer = AutoTokenizer.from_pretrained(cache_dir) # Initialize LlamaModel with config and tokenizer device = torch.device("cuda") @@ -133,7 +142,6 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" config=config, shard=shard_1, device=device, - max_new_tokens=MAX_NEW_TOKENS, use_cache=True ) print(f"\nshard_model_1: {shard_model_1}") diff --git a/exo/inference/torch/tests/test_llama3_split.py b/exo/inference/torch/tests/test_llama3_split.py index 6544d4419..d6fe12785 100644 --- a/exo/inference/torch/tests/test_llama3_split.py +++ b/exo/inference/torch/tests/test_llama3_split.py @@ -96,7 +96,6 @@ def test_generation_2(shard_model, hidden_state): config=config, shard=shard_1, device=device, - max_new_tokens=MAX_NEW_TOKENS, use_cache=False ) @@ -106,7 +105,6 @@ def test_generation_2(shard_model, hidden_state): config=config, shard=shard_2, device=device, - max_new_tokens=MAX_NEW_TOKENS, use_cache=False ) From b538cd25170721787663fd574ff0f36a88ba7295 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 30 Nov 2024 04:02:38 -0900 Subject: [PATCH 515/589] Merge branch 'main' of github.com:risingsunomi/exo-nvidia into pr139-dev-oct24 From 8c29d27212745f12416e63c79ce128e6c8ab787b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 30 Nov 2024 13:38:21 -0900 Subject: [PATCH 516/589] fixing inference sampling --- exo/inference/torch/pt_inference.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index 660c49fd3..6f85f4c2a 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -90,19 +90,16 @@ async def sample(self, x: np.ndarray, temp=TEMP, top_k=TOP_K) -> np.ndarray: if DEBUG >= 4: print("sample called") print(f"x: {x}") + print(f"temp: {temp}") + print(f"top_k: {top_k}") logits = torch.tensor(x).to(self.device) def sample_wrapper(): tokens = tt_sample( logits, - temperature=temp, - top_k=top_k - ) - - if self.past_tokens is not None: - self.past_tokens = torch.cat([self.past_tokens, tokens], dim=-1).to(self.device) - else: - self.past_tokens = tokens.clone() + temperature=TEMP if TEMP > temp else temp, + top_k=TOP_K if TOP_K > top_k else top_k + ) return tokens.numpy(force=True) @@ -130,8 +127,13 @@ async def infer_tensor( hidden_state = None if input_data.ndim == 3: hidden_state = torch.tensor(input_data).to(self.device) - else: - input_data = torch.tensor(input_data).to(self.device) + elif input_data.ndim == 2: + input_tensor = torch.tensor(input_data).to(self.device) + + if self.past_tokens is not None: + self.past_tokens = torch.cat([self.past_tokens, input_tensor], dim=-1).to(self.device) + else: + self.past_tokens = input_tensor.clone() def infer_wrapper(): if DEBUG >= 4: @@ -145,7 +147,7 @@ def infer_wrapper(): ) else: model_hs, model_logits = self.sharded_model.generate( - tokens=self.past_tokens if self.past_tokens is not None else input_data + tokens=self.past_tokens ) if model_hs is not None: From 80122a0c2f4b6b4698359dd8a1b5a827482474ad Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 30 Nov 2024 15:00:26 -0900 Subject: [PATCH 517/589] changing back temp and top_k passing --- exo/inference/torch/pt_inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/pt_inference.py index 6f85f4c2a..3815ffcba 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/pt_inference.py @@ -97,8 +97,8 @@ async def sample(self, x: np.ndarray, temp=TEMP, top_k=TOP_K) -> np.ndarray: def sample_wrapper(): tokens = tt_sample( logits, - temperature=TEMP if TEMP > temp else temp, - top_k=TOP_K if TOP_K > top_k else top_k + temperature=temp, + top_k=top_k ) return tokens.numpy(force=True) From 3e0d1176a1dd6bc114e287448e7d63ee880c6773 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 30 Nov 2024 15:18:15 -0900 Subject: [PATCH 518/589] moving back to unsloth llama version for 3.2-1B --- exo/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/models.py b/exo/models.py index 22bc4f6f4..60525357b 100644 --- a/exo/models.py +++ b/exo/models.py @@ -8,7 +8,7 @@ "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-1B-Instruct-4bit", "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-1B-Instruct", - "TorchDynamicShardInferenceEngine": "meta-llama/Llama-3.2-1B-Instruct" + "TorchDynamicShardInferenceEngine": "unsloth/Llama-3.2-1B-Instruct" }, }, "llama-3.2-3b": { From 28d9900e34f97b13f319afbebc1bd16dc30df032 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 30 Nov 2024 16:49:50 -0900 Subject: [PATCH 519/589] cleaning up code, doing more testing as some bugs a bit still --- exo/inference/inference_engine.py | 2 +- exo/inference/torch/models/hf.py | 338 ------------------ .../torch/models/hf_safe_tensor_shard.py | 243 ------------- exo/inference/torch/models/llama3.py | 9 +- exo/inference/torch/models/llm_utils.py | 158 ++++---- ...ference.py => sharded_inference_engine.py} | 7 +- .../torch/tests/test_inference_engine.py | 54 +++ .../torch/tests/test_pt_inference_engine.py | 41 --- .../torch/tests/test_safetensor_json.py | 120 ------- .../torch/tests/test_safetensor_shard.py | 69 ---- .../torch/tests/test_simple_model.py | 50 --- exo/inference/torch/tests/utils.py | 185 ---------- exo/inference/torch/utils.py | 60 ---- 13 files changed, 128 insertions(+), 1208 deletions(-) delete mode 100644 exo/inference/torch/models/hf.py delete mode 100644 exo/inference/torch/models/hf_safe_tensor_shard.py rename exo/inference/torch/{pt_inference.py => sharded_inference_engine.py} (97%) create mode 100644 exo/inference/torch/tests/test_inference_engine.py delete mode 100644 exo/inference/torch/tests/test_pt_inference_engine.py delete mode 100644 exo/inference/torch/tests/test_safetensor_json.py delete mode 100644 exo/inference/torch/tests/test_safetensor_shard.py delete mode 100644 exo/inference/torch/tests/test_simple_model.py delete mode 100644 exo/inference/torch/tests/utils.py delete mode 100644 exo/inference/torch/utils.py diff --git a/exo/inference/inference_engine.py b/exo/inference/inference_engine.py index 31e37d117..102ec1d17 100644 --- a/exo/inference/inference_engine.py +++ b/exo/inference/inference_engine.py @@ -51,7 +51,7 @@ def get_inference_engine(inference_engine_name: str, shard_downloader: 'ShardDow return TinygradDynamicShardInferenceEngine(shard_downloader) elif inference_engine_name == "torch": - from exo.inference.torch.pt_inference import TorchDynamicShardInferenceEngine + from exo.inference.torch.sharded_inference_engine import TorchDynamicShardInferenceEngine return TorchDynamicShardInferenceEngine(shard_downloader) elif inference_engine_name == "dummy": diff --git a/exo/inference/torch/models/hf.py b/exo/inference/torch/models/hf.py deleted file mode 100644 index 5d5b03e40..000000000 --- a/exo/inference/torch/models/hf.py +++ /dev/null @@ -1,338 +0,0 @@ -from typing import Tuple, Optional, Union, List -from pathlib import Path - -import torch -import torch.nn as nn - -from exo.inference.shard import Shard -from exo.helpers import DEBUG -from exo.inference.torch.models.hf_safe_tensor_shard import HFSafeTensorShard - -from transformers import ( - AutoModelForCausalLM, - DynamicCache, - Cache, - LogitsProcessorList, - TopKLogitsWarper, - TopPLogitsWarper, - TemperatureLogitsWarper -) - -# llama -from transformers.models.llama.modeling_llama import LlamaModel - -class ShardedHuggingFaceModel: - def __init__( - self, - shard: Shard, - local_model_path: Path, - weight_map: Optional[dict], - device: torch.device, - dtype: torch.dtype, - device_map: str, - top_k: int = 25, - temp: float = 0.7, - top_p: float = 0.9, - offload_buffers: bool = True - ): - """ - Initializes the ShardedHuggingFaceModel with a specified shard, model path, and device. - - Args: - shard (Shard): The model shard containing the start and end layers. - local_model_path (str): The local path to the model. - device (str): The device on which to run the model, e.g., "cuda" or "cpu". - dtype (torch.dtype): The data type (precision) to be used for model computations. - top_k (int, optional): The number of top tokens to consider for sampling. Defaults to 25. - temp (float, optional): The temperature for softmax sampling. Defaults to 0.7. - top_p (float, optional): The cumulative probability threshold for nucleus sampling. Defaults to 0.9. - """ - - # class vars - self.shard = shard - self.local_model_path = local_model_path - self.weight_map = weight_map - self.device = device - self.dtype = dtype - self.device_map = device_map - self.offload_buffers = offload_buffers - self.model_safetensors_path = self.local_model_path/"model.safetensors.index.json" - self.safetensor_sharder = HFSafeTensorShard( - self.local_model_path, - self.shard - ) - # setup logit processors - self.logits_processor = LogitsProcessorList([ - TopKLogitsWarper(top_k), - TemperatureLogitsWarper(temp), - TopPLogitsWarper(top_p) - ]) - - # setup sharded llm - try: - self.llm_model = self.load_sharded_model() - self.model = self.llm_model.model.to(self.device) - - # restore originals for next run, if one - self.safetensor_sharder.restore_backups() - except Exception as err: - print(f"error loading and sharding model: {err}") - raise - - # forward variables - self.hidden_states = None - self.input_ids = None - self.inputs_embeds = None - self.attention_mask = None - self.position_embeddings = None - self.past_key_values = None - self.cache_position = None - self.position_ids = None - self.causal_mask = None - - def load_sharded_model(self) -> AutoModelForCausalLM: - """ - Loads sharded version of model where only needed - weights are loaded for necessary layers - - Returns: - llm_model (AutoModelForCausalLM) - sharded llm model with only needed layers loaded - """ - if DEBUG >= 4: - print("load_sharded_model called") - - # modify safetensor - self.safetensor_sharder.modify_safetensor() - self.safetensor_sharder.create_safetensor_index() - self.safetensor_sharder.shard_safetensor_index(self.weight_map) - - # load model - try: - shard_num_hidden_layers = (self.shard.end_layer - self.shard.start_layer) + 1 - if DEBUG >= 4: - print(f"config with {shard_num_hidden_layers} layers") - - llm_model = AutoModelForCausalLM.from_pretrained( - pretrained_model_name_or_path=self.local_model_path, - device_map=self.device_map, - torch_dtype=self.dtype, - offload_buffers=self.offload_buffers, - local_files_only=True, - num_hidden_layers=shard_num_hidden_layers, - use_safetensors=True, - low_cpu_mem_usage=True - ) - - # restore backup for next run - self.safetensor_sharder.restore_backups() - - if self.device_map == "auto": - return llm_model - else: - return llm_model.to(self.device) - - except Exception as err: - print(f"err: {err}") - raise - - def forward( - self, - input_ids: Optional[torch.Tensor] = None, - hidden_states: Optional[torch.Tensor] = None, - attention_mask: Optional[torch.Tensor] = None, - past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, - use_legacy_cache: bool = False - ) -> Tuple[Optional[torch.Tensor], Optional[Union[Cache, List[torch.FloatTensor]]], Optional[torch.Tensor]]: - """ - Performs a forward pass through the model shard, computing hidden states, past key values, and logits. - - Args: - input_ids (torch.Tensor, optional): The input token IDs for the model. Either input_ids or hidden_states must be provided. - hidden_states (torch.Tensor, optional): The hidden states of the model at the current layer. - attention_mask (torch.Tensor, optional): The attention mask to prevent attending to padding tokens. - past_key_values (Union[Cache, List[torch.FloatTensor]], optional): Cached past key values for fast autoregressive generation. - use_legacy_cache (bool, optional): Whether to use the legacy cache format for past key values. Defaults to False. - - Returns: - Tuple: - - hidden_states (torch.Tensor, optional): The hidden states after the forward pass. - - past_key_values (Union[Cache, List[torch.FloatTensor]], optional): The updated past key values. - - logits (torch.Tensor, optional): The logits produced by the model if the last layer is processed. - """ - model_inputs = None - self.hidden_states = hidden_states - self.input_ids = input_ids - - # if there is hidden states and no position_ids, will need to be calculated - # this is not needed for Qwen model but Llama requires it - - # embed input_ids - self.inputs_embeds = self.model.embed_tokens(self.input_ids) - - # cache - if past_key_values and not isinstance(past_key_values, Cache): - use_legacy_cache = True - past_key_values = DynamicCache.from_legacy_cache(past_key_values) - - past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 - cache_position = torch.arange( - past_seen_tokens, - past_seen_tokens + self.inputs_embeds.shape[1], - device=self.inputs_embeds.device - ) - - # position id - self.position_ids = cache_position.unsqueeze(0) - - if DEBUG >= 4: - print("hf forward called") - print(f"hidden_states: {self.hidden_states}") - print(f"input_ids: {self.input_ids}") - print(f"input_embeds: {self.inputs_embeds}") - print(f"position_ids: {self.position_ids}") - print(f"past_key_values: {past_key_values}") - - if self.hidden_states is None: - # casual mask and attention_mask - self.attention_mask = attention_mask - self.causal_mask = self.model._update_causal_mask( - None, - self.inputs_embeds, - cache_position, - past_key_values, - False # dont out attentions - ) - - # embed positions, some models require and some dont - if isinstance(self.model, LlamaModel): - self.position_embeddings = self.model.rotary_emb( - self.inputs_embeds, - self.position_ids - ) - - # prepare inputs for decoder layers - model_inputs = self.llm_model.prepare_inputs_for_generation( - self.input_ids, - past_key_values=past_key_values, - attention_mask=self.attention_mask, - inputs_embeds=self.inputs_embeds, - position_ids=self.position_ids, - cache_position=cache_position - ) - - self.hidden_states = self.inputs_embeds - self.position_ids = model_inputs["position_ids"] - self.cache_position = model_inputs["cache_position"] - self.past_key_values = model_inputs["past_key_values"] - - if DEBUG >= 4: - print(f"model_inputs: {model_inputs}") - - # run through decoder layers - layer_amt = range(self.shard.end_layer - self.shard.start_layer) - - if DEBUG >= 4: - print(f"hidden_states: {self.hidden_states}") - print(f"model layer amt: {len(self.model.layers)}") - print(f"layer_amt: {layer_amt}") - - for i in layer_amt: - decoder_layer = self.model.layers[i] - if DEBUG >= 5: - print(f"layer #{i}") - print("decoder_layer before") - print(f"decoder_layer: {decoder_layer}") - print(f"hidden_states: {self.hidden_states}") - print(f"position_ids: {self.position_ids}") - print(f"position_embeddings: {self.position_embeddings}") - - # TODO: fix caching as decoder layer is not returning - # present_key_value from attention layer on models - # might have some other generation functions needed to do it - # see https://github.com/huggingface/transformers/blob/main/src/transformers/generation/utils.py#L2917 - # for qwen2 exhttps://github.com/huggingface/transformers/blob/main/src/transformers/models/qwen2/modeling_qwen2.py#L291 - layer_outputs = decoder_layer( - self.hidden_states, - attention_mask=self.causal_mask, - position_ids=self.position_ids, - past_key_values=self.past_key_values, - use_cache=True, - cache_position=self.cache_position - ) - - self.hidden_states = layer_outputs[0] - self.next_decoder_cache = layer_outputs[1] - - if DEBUG >= 5: - print("decoder_layer after") - print(f"layer_outputs: {layer_outputs}\n") - print(f"self.next_decoder_cache: {self.next_decoder_cache}") - print(f"hidden_states: {self.hidden_states}") - print(f"next_decoder_cache: {self.next_decoder_cache}") - - # handle last layer to get logits - # shard is last layer says true at the start and not detecting last layer correctly - if self.shard.is_last_layer(): - self.hidden_states = self.model.norm(self.hidden_states) - if use_legacy_cache: - self.past_key_values = self.next_decoder_cache.to_legacy_cache() - else: - self.past_key_values = self.next_decoder_cache - - # lm_head - logits = self.llm_model.lm_head(self.hidden_states).to(self.device) - - if DEBUG >= 4: - print(f"logits: {logits}") - - return ( - None, - None, - logits - ) - - if DEBUG >= 4: - print("hf out [no logit]") - print(f"hidden_states: {self.hidden_states}") - print(f"past_key_values: {self.past_key_values}") - print(f"position_ids: {self.position_ids}") - print(f"input_ids: {self.input_ids}") - - return ( - self.hidden_states, - self.past_key_values, - None - ) - - def logits_sample( - self, - logits: torch.Tensor, - use_max: Optional[bool] = False - ) -> torch.Tensor: - """ - Samples the next token from the model's output logits, either by using argmax or probabilistic sampling. - - Args: - logits (torch.Tensor): The logits output from the model's final layer. - use_max (bool, optional): If True, uses torch.argmax to select the next token from logits. Defaults to False. - - Returns: - torch.Tensor: The next predicted token. - """ - - # get a single cloned logit - logits = logits[:, -1, :].clone().float() - - next_token_scores = self.logits_processor(self.input_ids, logits) - - if not use_max: - probs = nn.functional.softmax(next_token_scores, dim=-1) - next_token = torch.multinomial(probs, num_samples=1) - else: - next_token = torch.argmax(next_token_scores, dim=-1) - - if DEBUG >= 4: - print(f"input_ids: {self.input_ids}") - print(f"next_token: {next_token}") - - return next_token[:, None].squeeze(-1) diff --git a/exo/inference/torch/models/hf_safe_tensor_shard.py b/exo/inference/torch/models/hf_safe_tensor_shard.py deleted file mode 100644 index c3afdea52..000000000 --- a/exo/inference/torch/models/hf_safe_tensor_shard.py +++ /dev/null @@ -1,243 +0,0 @@ -""" -HuggingFace Safetensor Shard -Sharding of safetensors to only use weights of models needed -""" -import os -import shutil -import json - -from typing import Optional -from pathlib import Path - -from safetensors import safe_open -from safetensors.torch import save_file - -import torch - -from exo.inference.shard import Shard -from exo.helpers import DEBUG -from exo.inference.torch.utils import extract_layers - -class HFSafeTensorShard: - def __init__(self, model_path: Path, shard: Shard): - self.model_path = model_path - self.shard = shard - self.safetensors_path = self.get_safetensors() - self.safetensor_index_path = f"{self.model_path}/model.safetensors.index.json" - self.metadata = { - "metadata": { - "total_size": 0 - }, - "weight_map": {} - } - - def get_safetensors(self) -> list: - """ - Gets a list of all files that have the extension .safetensors - - Return: - list: A list of all the safetensors file paths - """ - safetensors_path = [] - try: - for file_name in os.listdir(self.model_path): - if file_name.endswith(".safetensors"): - safetensor_path = os.path.join(self.model_path, file_name) - if safetensor_path not in safetensors_path: - safetensors_path.append(safetensor_path) - except Exception as err: - print(f"Error in get_safetensor_path: {err}") - raise - - return safetensors_path - - def backup_safetensor(self): - try: - for safetensor_path in self.safetensors_path: - backup_path = safetensor_path+".backup" - if not os.path.exists(backup_path): - shutil.copy(safetensor_path, backup_path) - - if DEBUG >= 4: - print(f"Backup created at {backup_path}") - except Exception as err: - print(f"Error in backup_safetensor: {err}") - raise - - def modify_safetensor(self): - """ - Extract needed weights for layers from safetensor files - and create a new safetensor with same names - """ - try: - self.backup_safetensor() - safetensor_is_used = False - for safetensor_path in self.safetensors_path: - initial_size = os.path.getsize(safetensor_path) - with safe_open(safetensor_path, framework="pt") as f: - metadata = f.metadata() - new_tensors = {} - - # Iterate over tensors, including only those within the specified layer range - for key in f.keys(): - layer_number = self.extract_layer_number(key) - if self.shard.start_layer <= layer_number <= self.shard.end_layer: - if DEBUG >= 4: - print(f"modify_safetensor [{layer_number}] extracting {key}") - new_tensors[key] = f.get_tensor(key) - safetensor_is_used = True - - # Save the modified safetensor - if safetensor_is_used: - save_file(new_tensors, safetensor_path, metadata) - modified_size = os.path.getsize(safetensor_path) - - if DEBUG >= 4: - print(f"Safetensor modified and saved to {safetensor_path}") - print(f"Initial size: {initial_size / (1024**3):.2f} GB") - print(f"Modified size: {modified_size / (1024**3):.2f} GB") - else: - # remove unused safetensors - os.remove(safetensor_path) - - if DEBUG >= 4: - print(f"Removed safetensor: {safetensor_path}") - except Exception as err: - print(f"Error modifying safetensor: {err}") - raise - - def extract_layer_number(self, key): - """ - Extract the layer number from a tensor key. - This function assumes keys follow the format 'model.layers..'. - """ - try: - parts = key.split(".") - layer_idx = 0 - if parts[0] == "model" and parts[1] == "layers": - layer_idx = int(parts[2]) - return layer_idx - #layer_idx = next(i for i, part in enumerate(parts) if part.startswith("h")) - #return int(parts[layer_idx + 1]) - except (IndexError, ValueError) as err: - print(f"Error extracting layer number from key '{key}': {err}") - return -1 - - def create_safetensor_index(self): - """ - Creates a model.safetensors.index.json file from a list of safetensor files. - """ - if os.path.exists(self.safetensor_index_path): - backup_index_path = f"{self.model_path}/model.safetensors.index.json.backup" - if not os.path.exists(backup_index_path): - shutil.copy(self.safetensor_index_path, backup_index_path) - - if DEBUG >= 4: - print(f"backed up index json {self.safetensor_index_path}") - - if self.safetensors_path: - # initialize the metadata and weight_map - for safetensor_file in self.safetensors_path: - # use the safetensor file name as the shard_name - shard_name = os.path.basename(safetensor_file) - - # open the safetensor file to read the metadata - with safe_open(safetensor_file, framework="pt", device="cpu") as f: - # get tensor names - tensor_names = f.keys() - - # collect metadata for each tensor - for name in tensor_names: - tensor_data = f.get_tensor(name) - shape = tensor_data.shape - dtype = tensor_data.dtype - - # calculate the tensor size in bytes based on dtype - total_elements = 1 - for dim in shape: - total_elements *= dim - - if dtype == torch.float32: - element_size = 4 - elif dtype == torch.float16 or dtype == torch.bfloat16: - element_size = 2 - # extend this to support more data types if needed - else: - raise ValueError(f"unsupported dtype: {dtype}") - - tensor_size = total_elements * element_size - self.metadata["metadata"]["total_size"] += tensor_size - - # add to weight_map, mapping the tensor to the shard (file) name - self.metadata["weight_map"][name] = shard_name - - # write the metadata and weight map to the index file - with open(self.safetensor_index_path, "w") as f: - json.dump(self.metadata, f, indent=4) - - if DEBUG >= 4: - print(f"created new {self.safetensor_index_path}") - else: - print("No safetensor files provided.") - - def shard_safetensor_index(self, weight_map: Optional[dict] = None): - """ - Modify the weight_map of the safetensors index json to only - get weights for the working layers - - Args: - weight_map(dict, Optional): holds which weight maps to which layer - """ - if weight_map is None: - weight_map = self.metadata["weight_map"] - - layer_weight_map = extract_layers( - weight_map, - self.shard - ) - - # rewrite model.safetensors.index.json for only needed layers - try: - mst_json = {} - with open(self.safetensor_index_path, "r") as mst_file: - mst_json = json.load(mst_file) - mst_json["weight_map"] = layer_weight_map - - if DEBUG >= 4: - print(f"new safetensor index\n{json.dumps(mst_json, indent=4)}\n") - - os.remove(self.safetensor_index_path) - - with open(self.safetensor_index_path, "w") as mst_file: - json.dump(mst_json, mst_file, indent=4) - except Exception as err: - print(f"err: {err}") - raise - - def restore_backups(self): - """ - Restore the original safetensor and index json, if any, from the backup file. - """ - try: - for safetensor_path in self.safetensors_path: - backup_path = safetensor_path+".backup" - if os.path.exists(backup_path): - os.remove(safetensor_path) - shutil.copy(backup_path, safetensor_path) - os.remove(backup_path) - - if DEBUG >= 4: - print(f"Safetensor restored from backup at {backup_path}") - - backup_index_path = self.safetensor_index_path+".backup" - if os.path.exists(backup_index_path): - os.remove(self.safetensor_index_path) - shutil.copy(backup_index_path, self.safetensor_index_path) - os.remove(backup_index_path) - - if DEBUG >= 4: - print(f"Safetensor index JSON restored from backup at {backup_index_path}") - except Exception as err: - print(f"Error in restore_backup: {err}") - raise - diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 810e8b9b2..0062d6bfb 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -14,7 +14,7 @@ from torchtune.modules.attention_utils import _MaskType from exo.inference.shard import Shard -from exo.inference.torch.models.llm_utils import MultiLayerPreceptron, RMSNorm, get_torch_dtype +from exo.inference.torch.models.llm_utils import MultiLayerPreceptron, RMSNorm from exo.helpers import DEBUG @@ -91,6 +91,8 @@ def caches_are_enabled(self) -> bool: if layer is not None: return layer.caches_are_enabled() + return False + def forward( self, tokens: torch.Tensor, @@ -284,8 +286,9 @@ def __init__( with torch.no_grad(): self.model = LlamaModel(config, self.shard).to(dtype=self.dtype, device=self.device) - print(f"model loaded: {self.model}\n") - print(f"device: {self.device}\n") + if DEBUG >= 8: + print(f"model loaded: {self.model}\n") + print(f"device: {self.device}\n") def generate( self, diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 5abb1428f..6a5364e10 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -5,36 +5,17 @@ import re import json from pathlib import Path -from typing import Any, Optional, Tuple +from typing import Any import torch import torch.nn as nn -import torch.nn.functional as F -import torchtune.modules as ttm -from torchtune.models.convert_weights import hf_to_tune -import math from safetensors.torch import load_file as load_safetensors -from transformers import LogitsProcessorList, TopKLogitsWarper, TopPLogitsWarper, TemperatureLogitsWarper -from transformers.cache_utils import Cache, DynamicCache - from exo.helpers import DEBUG from exo.inference.shard import Shard -def get_torch_dtype(dtype_str: str) -> torch.dtype: - """ - Get dtype from setting in model's config.json - """ - if dtype_str == "bfloat16": - return torch.bfloat16 - elif dtype_str == "float16": - return torch.float16 - else: - return torch.float16 - - def load_model_config(model_config_path: Path) -> dict: """ Loads the config.json of the model @@ -66,9 +47,6 @@ def load_model_config(model_config_path: Path) -> dict: "hidden_act": base_config.get("hidden_act", "silu") } - # the current max_position_embeddings requires a lot VRAM - # as it is over 13,000. Will require some logic to test if it - # exo can fit in the larger seq len if model_config.get("rope_scaling", None) is not None: model_config["max_seq_len"] = model_config["rope_scaling"]["original_max_position_embeddings"] @@ -82,7 +60,6 @@ def check_weights(model, state_dict): model_state_dict = model.state_dict() for name, param in model_state_dict.items(): if name in state_dict: - # print(f"\nchecking {name}\n") loaded_param = state_dict[name] if param.shape != loaded_param.shape: print(f"Shape mismatch for {name}: expected {param.shape}, got {loaded_param.shape}") @@ -104,10 +81,8 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): raise FileNotFoundError("No safetensors files found in the cache directory.") # Load weights from each found safetensors file - paried_lmhead = True - shard_layer_range = list(range(shard.start_layer, shard.end_layer)) - - full_state_dict = None + + full_state_dict = {} for safetensor_file in safetensors_files: state_dict = load_safetensors(safetensor_file) @@ -118,74 +93,71 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): # remap to work with our model remapped_state_dict = {} - paried_embed_weight = None - for key, value in full_state_dict.items(): - # load layer by shard - for layer_num in range(shard.start_layer, shard.end_layer + 1): - # change input layer norm to sa_norm for torchtune - re_iln = re.findall(rf"model.layers\.{layer_num}\.(input_layernorm)\.weight", key) - if len(re_iln) != 0: - new_key = f"model.layers.{layer_num}.sa_norm.weight" - # print(f"{key} == {new_key}") - remapped_state_dict[new_key] = value - - # change post attention layernorm to mlp_norm for torchtune - re_pal = re.findall(rf"model.layers\.{layer_num}\.(post_attention_layernorm)\.weight", key) - if len(re_pal) != 0: - new_key = f"model.layers.{layer_num}.mlp_norm.weight" - # print(f"{key} == {new_key}") - remapped_state_dict[new_key] = value - - # change self_attn to attn - # along with changing o_proj to output_proj - re_attn = re.findall(rf"model\.layers\.{layer_num}.(\w+)\.(\w+)\.(\w+)", key) - if len(re_attn) != 0 and re_attn[0][0] == "self_attn": - if re_attn[0][1] == "o_proj": - new_key = f"model.layers.{layer_num}.attn.output_proj.weight" - # print(f"{key} == {new_key}") + + if "llama" in shard.model_id: + for key, value in full_state_dict.items(): + # load layer by shard + for layer_num in range(shard.start_layer, shard.end_layer + 1): + # change input layer norm to sa_norm for torchtune + re_iln = re.findall(rf"model.layers\.{layer_num}\.(input_layernorm)\.weight", key) + if len(re_iln) != 0: + new_key = f"model.layers.{layer_num}.sa_norm.weight" + remapped_state_dict[new_key] = value + if DEBUG >= 8: + print(f"{key} == {new_key}") + + # change post attention layernorm to mlp_norm for torchtune + re_pal = re.findall(rf"model.layers\.{layer_num}\.(post_attention_layernorm)\.weight", key) + if len(re_pal) != 0: + new_key = f"model.layers.{layer_num}.mlp_norm.weight" remapped_state_dict[new_key] = value - else: - new_key = f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}" - # print(f"{key} == {new_key}") + if DEBUG >= 8: + print(f"{key} == {new_key}") + + # change self_attn to attn + # along with changing o_proj to output_proj + re_attn = re.findall(rf"model\.layers\.{layer_num}.(\w+)\.(\w+)\.(\w+)", key) + if len(re_attn) != 0 and re_attn[0][0] == "self_attn": + if re_attn[0][1] == "o_proj": + new_key = f"model.layers.{layer_num}.attn.output_proj.weight" + remapped_state_dict[new_key] = value + else: + new_key = f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}" + remapped_state_dict[new_key] = value + if DEBUG >= 8: + print(f"{key} == {new_key}") + + # set mlp weights + re_mlp = re.findall(rf"model\.layers\.{layer_num}.mlp.(\w+)\.(\w+)", key) + if len(re_mlp) != 0: + new_key = f"model.layers.{layer_num}.mlp.{re_mlp[0][0]}.{re_mlp[0][1]}" remapped_state_dict[new_key] = value - - # set mlp weights - re_mlp = re.findall(rf"model\.layers\.{layer_num}.mlp.(\w+)\.(\w+)", key) - if len(re_mlp) != 0: - new_key = f"model.layers.{layer_num}.mlp.{re_mlp[0][0]}.{re_mlp[0][1]}" - # print(f"load mlp {key}") - remapped_state_dict[new_key] = value - - # saving embed for paired weights - if key == "model.embed_tokens.weight": - # paried_embed_weight = value - # change name for torchtune - # print("model.embed_tokens.weight == model.tok_embeddings.weight") - remapped_state_dict["model.tok_embeddings.weight"] = value - - # elif key == "lm_head.weight": - # paried_lmhead = False - - # get everything else except layers, embed_tokens and lm_head - #if len(re.findall(r"model\.layers\..*", key)) == 0 and key != "model.embed_tokens.weight" and key != "lm_head.weight": - # print(f"loading other weight: {key}") - #remapped_state_dict[key] = value - - # if paried_lmhead: - # print(f"model.output.weight: {paried_embed_weight}") - # remapped_state_dict["model.output.weight"] = paried_embed_weight - - if DEBUG >= 4: - print("\nRemapped state dict\n") - for rsdk in remapped_state_dict.keys(): - print(f"-- {rsdk}") - model.load_state_dict(remapped_state_dict, strict=False) - - # if DEBUG >= 7: - # print("\n--- checking weights ----\n") - # print(f"\nremapped_state_dict: {remapped_state_dict.keys()}\n") - # check_weights(model, remapped_state_dict) + if DEBUG >= 8: + print(f"{key} == {new_key}") + + # saving embed for paired weights + if key == "model.embed_tokens.weight": + remapped_state_dict["model.tok_embeddings.weight"] = value + if DEBUG >= 8: + print("model.embed_tokens.weight == model.tok_embeddings.weight") + else: + print(f"{shard.model_id} not supported for sharding, loading weights normally") + if not remapped_state_dict: + model.load_state_dict(full_state_dict, strict=True) + else: + if DEBUG >= 8: + print("\nRemapped state dict\n") + for rsdk in remapped_state_dict.keys(): + print(f"-- {rsdk}") + + # load new weight map + model.load_state_dict(remapped_state_dict, strict=False) + + if DEBUG >= 8: + print("\n--- checking weights ----\n") + print(f"\nremapped_state_dict: {remapped_state_dict.keys()}\n") + check_weights(model, remapped_state_dict) class MultiLayerPreceptron(nn.Module): def __init__(self, input_dim, hidden_dim, activation="silu", use_bias=False): diff --git a/exo/inference/torch/pt_inference.py b/exo/inference/torch/sharded_inference_engine.py similarity index 97% rename from exo/inference/torch/pt_inference.py rename to exo/inference/torch/sharded_inference_engine.py index 3815ffcba..0362c505a 100644 --- a/exo/inference/torch/pt_inference.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -10,7 +10,6 @@ import asyncio import torch from torchtune.generation import sample as tt_sample -from torchtune.models import llama3 from exo.inference.inference_engine import InferenceEngine from exo.download.hf.hf_shard_download import HFShardDownloader @@ -58,13 +57,11 @@ async def encode(self, shard: Shard, prompt: str) -> np.ndarray: self.executor, functools.partial( self.tokenizer.encode, - prompt + prompt, + return_tensors="np" ) ) - if isinstance(tokens, list): - tokens = torch.tensor([tokens]).to(device=self.device) - if DEBUG >= 4: print(f"tokens: {tokens}") diff --git a/exo/inference/torch/tests/test_inference_engine.py b/exo/inference/torch/tests/test_inference_engine.py new file mode 100644 index 000000000..cf1f06179 --- /dev/null +++ b/exo/inference/torch/tests/test_inference_engine.py @@ -0,0 +1,54 @@ +""" +Test inference engine and model sharding +""" +import pytest +import asyncio + +from exo.inference.shard import Shard +from exo.inference.torch.sharded_inference_engine import TorchDynamicShardInferenceEngine +from exo.download.hf.hf_shard_download import HFShardDownloader + +import numpy as np + +@pytest.mark.asyncio +async def test_inference_engine(): + prompt = "In a single word only, what is the last name of the current president of the USA?" + + shard = Shard( + model_id="llama-3.2-1b", + start_layer=0, + end_layer=8, + n_layers=16 + ) + + shard_2 = Shard( + model_id="llama-3.2-1b", + start_layer=9, + end_layer=15, + n_layers= 16 + ) + + inference_engine = TorchDynamicShardInferenceEngine(HFShardDownloader()) + + output_1 = await inference_engine.infer_prompt("test_id", shard, prompt) + print("\n------------inference_engine.infer_prompt output---------------\n") + print(output_1) + print("\n---------------------------\n") + + assert isinstance(output_1, np.ndarray), "Output should be numpy array" + + output_2 = await inference_engine.infer_tensor("test_id", shard, output_1) + print("\n------------inference_engine.infer_tensor output---------------\n") + print(output_2) + print("\n---------------------------\n") + + assert isinstance(output_2, np.ndarray), "Output should be numpy array" + +if __name__ == '__main__': + try: + print("\n\n -------- TEST llama-3.2-1b -------- \n\n") + asyncio.run(test_inference_engine()) + except Exception as err: + print(f"\n!!!! TEST FAILED \n{err}\n") + + diff --git a/exo/inference/torch/tests/test_pt_inference_engine.py b/exo/inference/torch/tests/test_pt_inference_engine.py deleted file mode 100644 index 6d057ab83..000000000 --- a/exo/inference/torch/tests/test_pt_inference_engine.py +++ /dev/null @@ -1,41 +0,0 @@ -""" -Test inference engine and model sharding -""" -import pytest -import asyncio - -from exo.inference.shard import Shard -from exo.inference.torch.pt_inference import TorchDynamicShardInferenceEngine -from exo.download.hf.hf_shard_download import HFShardDownloader -from exo.inference.inference_engine import InferenceEngine - -import numpy as np - -@pytest.mark.asyncio -async def test_inference_engine(): - prompt = "In a single word only, what is the last name of the current president of the USA?" - - shard = Shard( - model_id="llama-3.2-1b", - start_layer=0, - end_layer=1, - n_layers=16 - ) - - inference_engine = TorchDynamicShardInferenceEngine(HFShardDownloader()) - - output = await inference_engine.infer_prompt("test_id", shard, prompt) - print("\n------------inference_engine output---------------\n") - print(output) - print("\n---------------------------\n") - - assert isinstance(output, np.ndarray), "Output should be numpy array" - -if __name__ == '__main__': - try: - print("\n\n -------- TEST unsloth/Llama-3.2-1B-Instruct -------- \n\n") - asyncio.run(test_inference_engine()) - except Exception as err: - print(f"\n!!!! LLAMA TEST FAILED \n{err}\n") - - diff --git a/exo/inference/torch/tests/test_safetensor_json.py b/exo/inference/torch/tests/test_safetensor_json.py deleted file mode 100644 index 3ec02c715..000000000 --- a/exo/inference/torch/tests/test_safetensor_json.py +++ /dev/null @@ -1,120 +0,0 @@ -""" -Create a model.safetensors.index.json from safetensors -""" -import json -import os - -import asyncio - -from safetensors import safe_open - -from exo.download.hf.hf_shard_download import HFShardDownloader -from exo.inference.shard import Shard - -import torch - -def create_safetensor_index(safetensor_files: list, index_file: str): - """ - Creates a model.safetensors.index.json file from a list of safetensor files. - - Args: - safetensor_files (list): List of paths to the safetensor files. - index_file (str): Path where the index JSON file should be saved. - - Raises: - ValueError: If an unsupported data type is encountered. - """ - if safetensor_files: - # Initialize the metadata and weight_map - metadata = { - "metadata": { - "total_size": 0 - }, - "weight_map": {} - } - - for safetensor_file in safetensor_files: - # Use the safetensor file name as the shard_name - shard_name = os.path.basename(safetensor_file) - - # Open the safetensor file to read the metadata - with safe_open(safetensor_file, framework="pt") as f: - # Get tensor names - tensor_names = f.keys() - - # Collect metadata for each tensor - for name in tensor_names: - tensor_data = f.get_tensor(name) - print(f"tensor_data: {tensor_data}") - shape = tensor_data.shape - dtype = tensor_data.dtype - print(f"shape: {shape}") - print(f"dtype: {str(dtype) == "torch.bfloat16"}") - - # Calculate the tensor size in bytes based on dtype - total_elements = 1 - for dim in shape: - total_elements *= dim - - if dtype == torch.float32: - element_size = 4 - elif dtype == torch.float16 or dtype == torch.bfloat16: - element_size = 2 - # Extend this to support more data types if needed - else: - raise ValueError(f"Unsupported dtype: {dtype}") - - tensor_size = total_elements * element_size - metadata["metadata"]["total_size"] += tensor_size - - # Add to weight_map, mapping the tensor to the shard (file) name - metadata["weight_map"][name] = shard_name - - # Write the metadata and weight map to the index file - with open(index_file, "w") as f: - json.dump(metadata, f, indent=4) - - print(f"Index file created: {index_file}") - else: - print("No safetensor files provided.") - - -async def main(): - """ - Main asynchronous function to download the model shard and create an index file for safetensors. - - This function downloads a model shard from Hugging Face, identifies safetensor files, and - generates a corresponding index file using the `create_safetensor_index` function. - """ - start_layer = 3 - end_layer = 5 - - # Create a Shard object - shard = Shard( - model_id="meta-llama/Llama-3.2-1B-Instruct", - start_layer=start_layer, - end_layer=end_layer-1, - n_layers=32 - ) - - print(f"Loading shard: {shard}") - shard_downloader = HFShardDownloader() - - # Ensure shard is downloaded - model_path = await shard_downloader.ensure_shard(shard) - - # Collect all safetensor files from the model path - safetensor_files = [ - os.path.join(model_path, file_name) - for file_name in os.listdir(model_path) if file_name.endswith(".safetensors") - ] - - # Create the index file - if safetensor_files: - create_safetensor_index(safetensor_files, os.path.join(model_path, "model.safetensors.index.json")) - else: - print("No safetensor files found in the model path.") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/exo/inference/torch/tests/test_safetensor_shard.py b/exo/inference/torch/tests/test_safetensor_shard.py deleted file mode 100644 index dd84ff18d..000000000 --- a/exo/inference/torch/tests/test_safetensor_shard.py +++ /dev/null @@ -1,69 +0,0 @@ -""" -Sharding safetensor -""" - -import asyncio - -from exo.inference.shard import Shard -from exo.inference.torch.models.hf_safe_tensor_shard import HFSafeTensorShard -from exo.download.hf.hf_shard_download import HFShardDownloader -from exo.download.hf.hf_helpers import get_weight_map - -from transformers import AutoModelForCausalLM, AutoTokenizer - -async def main(): - start_layer = 0 - end_layer = 1 - - # Create a Shard object - shard = Shard( - model_id="unsloth/Meta-Llama-3.1-8B-Instruct", - start_layer=start_layer, - end_layer=end_layer-1, - n_layers=32 - ) - - print(f"Loading shard: {shard}") - shard_downloader = HFShardDownloader() - - # Ensure shard is downloaded - model_path = await shard_downloader.ensure_shard(shard) - - # weight map, if any - model_wm = await get_weight_map( - repo_id=shard.model_id - ) - - tensor_shard = HFSafeTensorShard(model_path, shard) - tensor_shard.modify_safetensor() - tensor_shard.create_safetensor_index() - - # load model and test - model = AutoModelForCausalLM.from_pretrained( - pretrained_model_name_or_path=shard.model_id, - local_files_only=True, - num_hidden_layers=shard.end_layer - shard.start_layer, - #device_map="auto", - torch_dtype="float16" - ).to("cuda") - - tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") - - messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "In one simple word, what is the color of a red apple?"} - ] - - text = tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True - ) - model_inputs = tokenizer([text], return_tensors="pt") - - print(f"model_inputs:\n{model_inputs}") - - tensor_shard.restore_backups() - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/exo/inference/torch/tests/test_simple_model.py b/exo/inference/torch/tests/test_simple_model.py deleted file mode 100644 index 5ffd30ef9..000000000 --- a/exo/inference/torch/tests/test_simple_model.py +++ /dev/null @@ -1,50 +0,0 @@ -""" -Simple model test using basic pytorch/huggingface LLM model loading, inference and generation -with logit sampling -""" -from transformers import AutoModelForCausalLM, AutoTokenizer - -def run_simple(prompt: str): - model = AutoModelForCausalLM.from_pretrained( - "Qwen/Qwen2-0.5B-Instruct", - torch_dtype="auto", - device_map="auto" - ) - - tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") - - messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": prompt} - ] - text = tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True - ) - model_inputs = tokenizer([text], return_tensors="pt") - - print(f"model_inputs:\n{model_inputs}") - - print(f"generation_config:\n{model.generation_config}") - - generated_ids = model.generate( - model_inputs.input_ids, - attention_mask=model_inputs.attention_mask, - max_new_tokens=512, - do_sample=True - ) - - generated_ids = [ - output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) - ] - - response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] - - print(f"Prompt: {prompt}\n") - print(f"Response: {response}\n") - -if __name__ == "__main__": - run_simple( - "In a single word only, what is the last name of the current president of the USA?" - ) diff --git a/exo/inference/torch/tests/utils.py b/exo/inference/torch/tests/utils.py deleted file mode 100644 index e4062da96..000000000 --- a/exo/inference/torch/tests/utils.py +++ /dev/null @@ -1,185 +0,0 @@ -import torch -from torch.nn import functional as F - -def top_k_sampling(logits, thres): - num_logits = logits.shape[-1] - val, ind = torch.topk(logits, thres, dim=-1, largest=True, sorted=True) - mask = torch.zeros_like(logits) - mask.scatter_(-1, ind, 1) - logits = logits * mask - - return logits - -def top_p_sampling(logits, thres): - sorted_logits, sorted_indices = torch.sort(logits, descending=True) - print(f"top_p_sampling sorted_logits\n{sorted_logits}\nsorted_indices {sorted_indices}") - softmax_logits = F.softmax(sorted_logits, dim=-1) - print(f"top_p_sampling\nsoftmax_logits {softmax_logits}") - cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) - print(f"top_p_sampling\n{cumulative_probs}") - - - # Remove tokens with cumulative probability above the threshold - sorted_indices_to_remove = cumulative_probs > thres - - # Shift the indices to the right to keep also the first token above the threshold - sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() - sorted_indices_to_remove[..., 0] = 0 - - # scatter sorted tensors to original indexing - indices_to_remove = sorted_indices_to_remove.scatter(dim=-1, index=sorted_indices, src=sorted_indices_to_remove) - print(f"top_p_sampling\nindicies_to_remove: {indices_to_remove}") - logits[indices_to_remove] = float('-inf') - return logits - -def sample_logits(logits, temp, top_p, top_k): - """ - Sample tokens from logits using temperature, top-k, and top-p (nucleus) sampling. - - Args: - logits (torch.Tensor): The logits distribution to sample from. - temp (float): temp for scaling logits. - top_p (float): The cumulative probability threshold for nucleus sampling. - - Returns: - torch.Tensor: The selected token index. - """ - # If temp is very low, just use argmax - if temp == 0: - return logits.argmax(dim=-1) - - print(f"logits {logits}") - - scaled_logits = logits/temp - - print(f"scaled_logits: {scaled_logits}") - - if 0 < top_p < 1.0: - top_p_logits = top_p_sampling(scaled_logits, top_p) - print(f"top_p logits {top_p_logits}") - if top_k > 0: - top_k_logits = top_k_sampling(top_p_logits, top_k) - return top_k_logits.argmax(dim=-1) - elif top_k > 0: - top_k_logits = top_k_sampling(logits, top_k) - print(f"top_k logits {top_k_logits}") - return top_k_logits.argmax(dim=-1) - - return scaled_logits.argmax(dim=-1) - - -# from tinygrad llama model sample -def sample(logits: torch.Tensor, temp: float, k: int, p: float, af: float, ap: float): - assert logits.ndim == 1, "only works on 1D tensors" - assert 0 <= p <= 1, "p must be between 0 and 1" - assert 0 <= k <= logits.numel(), "k must be between 0 and numel" - - # If temperature is very low, just use argmax - if temp < 1e-6: - return logits.argmax().reshape(1) - - # Alpha sampling - if af or ap: - if not hasattr(sample, "alpha_counter"): - sample.alpha_counter = torch.zeros_like(logits, dtype=torch.int32).contiguous() - logits = logits - (sample.alpha_counter * af + (sample.alpha_counter > 0).float() * ap) - - # Replace NaNs with -inf - logits = torch.where(logits != logits, torch.tensor(-float("inf"), device=logits.device), logits) - - # Apply softmax after temperature scaling - t = F.softmax(logits / temp, dim=-1) - - counter = torch.arange(t.numel(), device=logits.device).contiguous() - counter2 = torch.arange(t.numel() - 1, -1, -1, device=logits.device).contiguous() - - # Top-k sampling - if k: - output = torch.zeros(k, device=logits.device).contiguous() - output_indices = torch.zeros(k, device=logits.device, dtype=torch.int32).contiguous() - - for i in range(k): - t_max = t.max() - t_argmax = (t.numel() - ((t == t_max) * counter2).max() - 1).to(torch.int) - output[i] = t_max - output_indices[i] = t_argmax - t = torch.where(counter == t_argmax, torch.tensor(0.0, device=logits.device), t) - - # Approximate top-p sampling - output_cumsum = output.flip(dims=(0,)).cumsum(dim=0).flip(dims=(0,)) + t.sum() - mask = output_cumsum >= (1 - p) - output = output * mask.float() - output_indices = output_indices * mask.int() - - # Sample from the distribution - output_idx = output.multinomial(num_samples=1) - output_token = output_indices[output_idx] - else: - output_token = t.multinomial(num_samples=1) - - # Increase alpha counter - if af or ap: - sample.alpha_counter = torch.where(counter == output_token, sample.alpha_counter + 1, sample.alpha_counter) - - return output_token - - -def sample_3d(logits: torch.Tensor, temp: float, k: int, p: float, af: float, ap: float): - assert logits.ndim == 3, "only works on 3D tensors" - assert 0 <= p <= 1, "p must be between 0 and 1" - assert 0 <= k <= logits.shape[-1], "k must be between 0 and the last dimension size" - - batch_size, seq_len, vocab_size = logits.shape - - # If temperature is very low, just use argmax - if temp < 1e-6: - return logits.argmax(dim=-1) - - # Alpha sampling - if af or ap: - if not hasattr(sample, "alpha_counter"): - sample.alpha_counter = torch.zeros_like(logits, dtype=torch.int32).contiguous() - logits = logits - (sample.alpha_counter * af + (sample.alpha_counter > 0).float() * ap) - - # Replace NaNs with -inf - logits = torch.where(logits != logits, torch.tensor(-float("inf"), device=logits.device), logits) - - # Apply softmax after temperature scaling - t = F.softmax(logits / temp, dim=-1) - - counter = torch.arange(vocab_size, device=logits.device).unsqueeze(0).unsqueeze(0).expand_as(t).contiguous() - counter2 = torch.arange(vocab_size - 1, -1, -1, device=logits.device).unsqueeze(0).unsqueeze(0).expand_as(t).contiguous() - - # Top-k sampling - if k: - output = torch.zeros((batch_size, seq_len, k), device=logits.device).contiguous() - output_indices = torch.zeros((batch_size, seq_len, k), device=logits.device, dtype=torch.int32).contiguous() - - for i in range(k): - t_max, _ = t.max(dim=-1, keepdim=True) - t_argmax = (vocab_size - ((t == t_max) * counter2).max(dim=-1, keepdim=True)[0] - 1).to(torch.int) - output[:, :, i] = t_max.squeeze(-1) - output_indices[:, :, i] = t_argmax.squeeze(-1) - t = torch.where(counter == t_argmax, torch.tensor(0.0, device=logits.device), t) - - # Approximate top-p sampling - output_cumsum = output.flip(dims=(-1,)).cumsum(dim=-1).flip(dims=(-1,)) + t.sum(dim=-1, keepdim=True) - mask = output_cumsum >= (1 - p) - output = output * mask.float() - output_indices = output_indices * mask.int() - - # Sample from the distribution - output_flat = output.view(batch_size * seq_len, -1) - output_idx = output_flat.multinomial(num_samples=1).squeeze(-1) - output_indices_flat = output_indices.view(batch_size * seq_len, -1) - output_token = output_indices_flat.gather(dim=-1, index=output_idx.unsqueeze(-1)).view(batch_size, seq_len) - else: - output_flat = t.view(batch_size * seq_len, -1) - output_token = output_flat.multinomial(num_samples=1).view(batch_size, seq_len) - - # Increase alpha counter - if af or ap: - sample.alpha_counter = torch.where(counter == output_token.unsqueeze(-1), sample.alpha_counter + 1, sample.alpha_counter) - - return output_token - diff --git a/exo/inference/torch/utils.py b/exo/inference/torch/utils.py deleted file mode 100644 index b9c4f1481..000000000 --- a/exo/inference/torch/utils.py +++ /dev/null @@ -1,60 +0,0 @@ -""" -Utility functions to be used by inference engine -and model -""" -import re - -from exo.inference.shard import Shard - -import torch - -def extract_layers( - weight_map: dict, - shard: Shard -) -> dict: - """ - Extract layers from weight map in range - - Args: - - Returns: - """ - - layer_rgx = r'^model\.layers\.(\d+)\.*' - layer_weight_map = {} - non_layer_weights = [] - - for wname, wtensor in weight_map.items(): - layer_found = re.findall(layer_rgx, wname) - if layer_found: - layer_idx = int(layer_found[0]) - if shard.start_layer <= layer_idx <= shard.end_layer: - layer_weight_map[wname] = wtensor - else: - non_layer_weights.append((wname, wtensor)) - - non_layer_weights = sorted(non_layer_weights, key=lambda x: x[1]) - - if shard.is_first_layer(): - # this assumes at max only one first weight non-layer for model - first_weight = non_layer_weights[0] - layer_weight_map[first_weight[0]] = first_weight[1] - elif shard.is_last_layer(): - last_weights = non_layer_weights[1:] - for last_weight in last_weights: - layer_weight_map[last_weight[0]] = last_weight[1] - - return layer_weight_map - -def print_cuda_vram_stats(): - """ - Prints CUDA VRAM stats being used by pytorch - """ - allocated_memory = torch.cuda.memory_allocated() - max_memory = torch.cuda.max_memory_allocated() - cached_memory = torch.cuda.memory_reserved() - - print("CUDA stats") - print(f'Allocated memory: {allocated_memory / 1024**2} MB') - print(f'Max allocated memory: {max_memory / 1024**2} MB') - print(f'Cached memory: {cached_memory / 1024**2} MB') From 30651ead42738d07f04d6146bcfdb1a8fa9ebca6 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 30 Nov 2024 17:35:24 -0900 Subject: [PATCH 520/589] having a check for llama or Llama for loading tensors, will add support for more models like Qwen as building Qwen engine, updated full llama test with more complex prompts --- exo/inference/torch/models/llm_utils.py | 2 +- exo/inference/torch/tests/test_llama3_full.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 6a5364e10..ff42907ef 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -94,7 +94,7 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): # remap to work with our model remapped_state_dict = {} - if "llama" in shard.model_id: + if "llama" in shard.model_id or "Llama" in shard.model_id: for key, value in full_state_dict.items(): # load layer by shard for layer_num in range(shard.start_layer, shard.end_layer + 1): diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index e8ad9dcd9..e69da7660 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -22,10 +22,10 @@ ) -MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" -TEMP = 0.6 +MODEL_NAME = "unsloth/Llama-3.2-1B-Instruct" +TEMP = 0.0 TOP_K = 25 -MAX_NEW_TOKENS = 2 +MAX_NEW_TOKENS = 40 def main(model, prompt: str, device: torch.device=torch.device("cpu")): # Tokenize input text @@ -111,6 +111,7 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" if __name__ == "__main__": # prompt = "hello" prompt = "In a single word only, What is the capital of france?" + # prompt = "Tell me a short 4 line haiku" # prompt = "In a single word only, what is the last name of the current president of the USA?" # Get the path to the model files from the Hugging Face cache From 94949499ceea211abbedf6a18b9305c0d6d1bc1b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 29 Dec 2024 01:36:34 -0900 Subject: [PATCH 521/589] adding load_checkpoint to TorchDynamicShardInferenceEngine --- exo/inference/torch/sharded_inference_engine.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 0362c505a..16c6cadec 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -199,3 +199,6 @@ async def ensure_shard(self, shard: Shard): self.sharded_model ) ) + + async def load_checkpoint(self, shard: Shard, path: str): + await self.ensure_shard(shard) From 5085adbfafef13cca2a6f50ac356bd0dca941c90 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 2 Jan 2025 04:26:59 -0500 Subject: [PATCH 522/589] fixing formatting in code, adding in logging to debug, changing full example model --- exo/inference/torch/models/llama3.py | 60 +++++--------- .../torch/sharded_inference_engine.py | 78 ++++++++----------- exo/inference/torch/tests/test_llama3_full.py | 44 ++++------- 3 files changed, 67 insertions(+), 115 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 0062d6bfb..03781e4df 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -24,7 +24,6 @@ class ShardTransformerDecoder(ttm.TransformerDecoder): Custom version of torchtune TransformerDecoder to allow for sharding of models and passing of hidden layers between shards """ - def __init__( self, *, @@ -103,7 +102,7 @@ def forward( # Determine the type of input and shape if DEBUG >= 4: print("forward called") - print(f"tokens: {tokens}") + print(f"tokens [{tokens.shape()}]: {tokens}") print(f"mask: {mask}") if tokens.ndim == 3: @@ -126,7 +125,6 @@ def forward( for i in range(self.shard.start_layer, self.shard.end_layer + 1): layer = self.layers[i] - if DEBUG >= 8: print(f"\nhidden layer in H[{i}]\n{h}") print(f"\nmask\n{mask}\ninput_pos\n{input_pos}") @@ -135,6 +133,7 @@ def forward( # Process through each transformer layer with torch.no_grad(): if self.layers[self.shard.start_layer].caches_are_enabled(): + h = layer( h, mask=mask, @@ -160,10 +159,10 @@ def forward( # Return list if hidden states are requested output = [hidden[-1], output] if hidden else output - + if DEBUG >= 4: print(f"\n\noutput {output}\n\n") - + return output @@ -194,17 +193,17 @@ def LlamaModel(config: dict, shard: Shard): head_dim=config["head_dim"], q_proj=nn.Linear( config["embed_dim"], - config["num_heads"] * config["head_dim"], + config["num_heads"]*config["head_dim"], bias=config["attn_bias"], ), k_proj=nn.Linear( config["embed_dim"], - config["num_kv_heads"] * config["head_dim"], + config["num_kv_heads"]*config["head_dim"], bias=config["attn_bias"], ), v_proj=nn.Linear( config["embed_dim"], - config["num_kv_heads"] * config["head_dim"], + config["num_kv_heads"]*config["head_dim"], bias=config["attn_bias"], ), output_proj=nn.Linear( @@ -264,13 +263,7 @@ def LlamaModel(config: dict, shard: Shard): class ShardedLlamaModel(nn.Module): - def __init__( - self, - config: dict, - shard: Shard, - device: Optional[torch.device] = None, - use_cache: Optional[bool] = False - ): + def __init__(self, config: dict, shard: Shard, device: Optional[torch.device] = None, use_cache: Optional[bool] = False): super(ShardedLlamaModel, self).__init__() self.shard = shard @@ -281,7 +274,7 @@ def __init__( self.use_cache = use_cache # pad_id maually set as same in all llama models - self.pad_id = 128004 # from <|finetune_right_pad_id|> + self.pad_id = 128004 # from <|finetune_right_pad_id|> with torch.no_grad(): self.model = LlamaModel(config, self.shard).to(dtype=self.dtype, device=self.device) @@ -290,11 +283,7 @@ def __init__( print(f"model loaded: {self.model}\n") print(f"device: {self.device}\n") - def generate( - self, - tokens: Optional[torch.Tensor] = None, - hidden_state: Optional[torch.Tensor] = None - ) -> Tuple[Optional[torch.Tensor], torch.Tensor]: + def generate(self, tokens: Optional[torch.Tensor] = None, hidden_state: Optional[torch.Tensor] = None) -> Tuple[Optional[torch.Tensor], torch.Tensor]: """ Generate logits and/or hidden_states from llama model @@ -322,11 +311,7 @@ def generate( # setup cache if not self.model.caches_are_enabled() and self.use_cache: with self.device: - self.model.setup_caches( - bsz, - self.dtype, - decoder_max_seq_len=tokens.numel() + self.max_seq_len - ) + self.model.setup_caches(bsz, self.dtype, decoder_max_seq_len=tokens.numel() + self.max_seq_len) if not self.shard.is_last_layer(): self.model.output_hidden_states = [self.shard.end_layer] @@ -345,20 +330,14 @@ def generate( input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) else: - masks = torch.tril( - torch.ones( - total_response_length, - resp_max_seq_len if resp_max_seq_len is not None else total_response_length, - dtype=torch.bool, - device=self.device, - ) - ).unsqueeze(0) - - input_pos = torch.arange( - 0, + masks = torch.tril(torch.ones( total_response_length, - device=self.device - ).unsqueeze(0) + resp_max_seq_len if resp_max_seq_len is not None else total_response_length, + dtype=torch.bool, + device=self.device, + )).unsqueeze(0) + + input_pos = torch.arange(0, total_response_length, device=self.device).unsqueeze(0) if self.model.caches_are_enabled(): curr_masks = masks[:, :tokens_length] @@ -373,10 +352,9 @@ def generate( print(f"tokens: {tokens} - {tokens.device}") print(f"mask: {curr_masks} - {curr_masks.device}") print(f"input_pos: {input_pos} - {input_pos.device}") - + if hidden_state is not None: print(f"hidden_state: {hidden_state} - {hidden_state.device}") - model_output = self.model( tokens=hidden_state if hidden_state is not None else tokens, diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 16c6cadec..3a72b50f1 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -2,14 +2,16 @@ TorchDynamicShardInferenceEngine Sharded inference engine using PyTorch based torchtune models """ + import os import functools from concurrent.futures import ThreadPoolExecutor +import asyncio import numpy as np -import asyncio import torch from torchtune.generation import sample as tt_sample +from transformers import AutoTokenizer from exo.inference.inference_engine import InferenceEngine from exo.download.hf.hf_shard_download import HFShardDownloader @@ -25,9 +27,13 @@ from exo.inference.torch.models.llama3 import ShardedLlamaModel TEMP = 0.6 -TOP_K = 25 +TOP_K = 300 + class TorchDynamicShardInferenceEngine(InferenceEngine): + """ + Pytorch based inferece engine for sharded models + """ def __init__(self, shard_downloader: HFShardDownloader): self.shard = None self.shard_downloader = shard_downloader @@ -55,11 +61,7 @@ async def encode(self, shard: Shard, prompt: str) -> np.ndarray: tokens = await asyncio.get_event_loop().run_in_executor( self.executor, - functools.partial( - self.tokenizer.encode, - prompt, - return_tensors="np" - ) + functools.partial(self.tokenizer.encode, prompt, return_tensors="np"), ) if DEBUG >= 4: @@ -72,16 +74,10 @@ async def decode(self, shard: Shard, tokens: np.ndarray) -> str: print("decode called") print(f"shard: {shard}") print(f"tokens: {tokens}") - + await self.ensure_shard(shard) - - return await asyncio.get_running_loop().run_in_executor( - self.executor, - functools.partial( - self.tokenizer.decode, - tokens.tolist() - ) - ) + + return await asyncio.get_running_loop().run_in_executor(self.executor, functools.partial(self.tokenizer.decode, tokens.tolist())) async def sample(self, x: np.ndarray, temp=TEMP, top_k=TOP_K) -> np.ndarray: if DEBUG >= 4: @@ -91,19 +87,12 @@ async def sample(self, x: np.ndarray, temp=TEMP, top_k=TOP_K) -> np.ndarray: print(f"top_k: {top_k}") logits = torch.tensor(x).to(self.device) - def sample_wrapper(): - tokens = tt_sample( - logits, - temperature=temp, - top_k=top_k - ) + def sample_wrapper(): + tokens = tt_sample(logits, temperature=temp, top_k=top_k) return tokens.numpy(force=True) - return await asyncio.get_running_loop().run_in_executor( - self.executor, - functools.partial(sample_wrapper) - ) + return await asyncio.get_running_loop().run_in_executor(self.executor, functools.partial(sample_wrapper)) async def infer_tensor( self, @@ -117,6 +106,7 @@ async def infer_tensor( print(f"shard: {shard}") print(f"input_data: {input_data}") print(f"self.past_tokens: {self.past_tokens}") + await self.ensure_shard(shard) self.request_id = request_id if not self.request_id else self.request_id @@ -139,13 +129,9 @@ def infer_wrapper(): print(f"hidden_state: {hidden_state}") if hidden_state is not None: - model_hs, model_logits = self.sharded_model.generate( - hidden_state=hidden_state - ) + model_hs, model_logits = self.sharded_model.generate(hidden_state=hidden_state) else: - model_hs, model_logits = self.sharded_model.generate( - tokens=self.past_tokens - ) + model_hs, model_logits = self.sharded_model.generate(tokens=self.past_tokens) if model_hs is not None: # model_hs = model_hs.detach().cpu() @@ -165,18 +151,23 @@ async def ensure_shard(self, shard: Shard): if self.shard == shard: return - + self.shard = shard # download model safetensors and shard - model_path = await self.shard_downloader.ensure_shard( - shard, - self.__class__.__name__ - ) - model_config = load_model_config(model_path / "config.json") + model_path = await self.shard_downloader.ensure_shard(shard, self.__class__.__name__) + model_config = load_model_config(model_path/"config.json") # self.tokenizer = await _resolve_tokenizer(model_path) self.tokenizer = await _resolve_tokenizer(model_path) + eot_token = ( + self.tokenizer.special_tokens_map.get("eos_token_id") + if hasattr(self.tokenizer, "_tokenizer") and isinstance(self.tokenizer._tokenizer, AutoTokenizer) else getattr(self.tokenizer, "eos_token_id", None) + ) + + print(f"eot_token: {eot_token}") + print(self.tokenizer.special_tokens_map) + print(self.tokenizer.eos_token_id) self.sharded_model = await asyncio.get_running_loop().run_in_executor( self.executor, @@ -185,19 +176,14 @@ async def ensure_shard(self, shard: Shard): config=model_config, shard=shard, device=self.device, - use_cache=True - ) + use_cache=True, + ), ) # load sharded weights await asyncio.get_running_loop().run_in_executor( self.executor, - functools.partial( - load_model_weights_torchtune, - model_path, - shard, - self.sharded_model - ) + functools.partial(load_model_weights_torchtune, model_path, shard, self.sharded_model), ) async def load_checkpoint(self, shard: Shard, path: str): diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index e69da7660..56ac2eb89 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -21,13 +21,13 @@ load_model_weights_torchtune, ) +MODEL_NAME = "unsloth/Llama-3.2-3B-Instruct" +TEMP = 0.6 +TOP_K = 35 +MAX_NEW_TOKENS = 300 -MODEL_NAME = "unsloth/Llama-3.2-1B-Instruct" -TEMP = 0.0 -TOP_K = 25 -MAX_NEW_TOKENS = 40 -def main(model, prompt: str, device: torch.device=torch.device("cpu")): +def main(model, prompt: str, device: torch.device = torch.device("cpu")): # Tokenize input text # messages = [] # messages.extend([ @@ -36,16 +36,9 @@ def main(model, prompt: str, device: torch.device=torch.device("cpu")): # # Empty assistant message to kick-start generation # Message(role="assistant", content=""), # ]) - messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": prompt} - ] - - text = llama_tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True - ) + messages = [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}] + + text = llama_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) tok_out = llama_tokenizer([text], return_tensors="pt") tokens = tok_out.input_ids.to(device=device) @@ -72,11 +65,11 @@ def main(model, prompt: str, device: torch.device=torch.device("cpu")): print(f"generated_tokens: {generated_tokens}") tokens = generated_tokens.clone() - + print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(generated_tokens.tolist()[0])}\n\n\n") -def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu")): +def normal_full(model, user_prompt: str, device: torch.device = torch.device("cpu")): # Tokenize input text messages = [] messages.extend([ @@ -91,7 +84,6 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" print(f"tokens prompt: {prompt}") print(f"pad_id: {llama_tokenizer.pad_id}") - generated_tokens, _ = ttg.generate( model=model.model, prompt=prompt, @@ -108,18 +100,19 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(generated_tokens[0])}\n\n\n") + if __name__ == "__main__": # prompt = "hello" - prompt = "In a single word only, What is the capital of france?" + # prompt = "What is the meaning of exo?" # prompt = "Tell me a short 4 line haiku" - # prompt = "In a single word only, what is the last name of the current president of the USA?" + prompt = "In a single word only, what is the last name of the current president of the USA?" # Get the path to the model files from the Hugging Face cache cache_dir = Path(snapshot_download(MODEL_NAME)) print(f"Cache directory: {cache_dir}") # Load model configuration - config = load_model_config(cache_dir / "config.json") + config = load_model_config(cache_dir/"config.json") print(f"current config\n{config}") @@ -128,7 +121,7 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" shard_1 = Shard( model_id=MODEL_NAME, start_layer=0, - end_layer=n_layers-1, + end_layer=n_layers - 1, n_layers=n_layers, ) @@ -139,12 +132,7 @@ def normal_full(model, user_prompt: str, device: torch.device=torch.device("cpu" # Initialize LlamaModel with config and tokenizer device = torch.device("cuda") - shard_model_1 = ShardedLlamaModel( - config=config, - shard=shard_1, - device=device, - use_cache=True - ) + shard_model_1 = ShardedLlamaModel(config=config, shard=shard_1, device=device, use_cache=True) print(f"\nshard_model_1: {shard_model_1}") load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) From 73b71d5a368f8d56d06a3e3b61e90b4e9eada1d6 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 15 Jan 2025 18:07:54 -0900 Subject: [PATCH 523/589] Updating model to use max_position_embeddings, testing mono logit passing, added cache clearing --- exo/inference/torch/models/llama3.py | 44 ++++++++++++++++--- exo/inference/torch/models/llm_utils.py | 27 +++++------- .../torch/sharded_inference_engine.py | 12 ++++- exo/inference/torch/tests/test_llama3_full.py | 8 ++-- 4 files changed, 63 insertions(+), 28 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 03781e4df..c5a686577 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -92,6 +92,26 @@ def caches_are_enabled(self) -> bool: return False + def reset_caches(self): + for layer in self.layers: + if layer is not None: + layer.reset_cache() + + def check_maxed_cache(self, tokens: torch.Tensor) -> bool: + """ + Check if cached is maxed out and needs to be reset + """ + active_layers = [x for x in self.layers if x is not None] + kv_cache = active_layers[0].attn.kv_cache + current_pos = kv_cache.cache_pos[0] + tokens.numel() + self.max_seq_len + k_shape = kv_cache.k_cache.shape[2] + print(f"current_pos: {current_pos}\nk_shape: {k_shape}") + if current_pos <= k_shape: + print("====== MAX CACHE REACHED CLEAR ==============") + return True + + return False + def forward( self, tokens: torch.Tensor, @@ -102,7 +122,7 @@ def forward( # Determine the type of input and shape if DEBUG >= 4: print("forward called") - print(f"tokens [{tokens.shape()}]: {tokens}") + print(f"tokens [{tokens.shape}]: {tokens}") print(f"mask: {mask}") if tokens.ndim == 3: @@ -132,13 +152,23 @@ def forward( # Process through each transformer layer with torch.no_grad(): - if self.layers[self.shard.start_layer].caches_are_enabled(): + if layer.caches_are_enabled(): + try: + h = layer( + h, + mask=mask, + input_pos=input_pos, + ) + except AssertionError: + # assume due to cache + self.reset_caches() + + h = layer( + h, + mask=mask, + input_pos=input_pos, + ) - h = layer( - h, - mask=mask, - input_pos=input_pos, - ) else: h = layer(h) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index ff42907ef..69beeeffc 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -47,8 +47,8 @@ def load_model_config(model_config_path: Path) -> dict: "hidden_act": base_config.get("hidden_act", "silu") } - if model_config.get("rope_scaling", None) is not None: - model_config["max_seq_len"] = model_config["rope_scaling"]["original_max_position_embeddings"] + # if model_config.get("rope_scaling", None) is not None: + # model_config["max_seq_len"] = model_config["rope_scaling"]["original_max_position_embeddings"] return model_config @@ -81,7 +81,7 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): raise FileNotFoundError("No safetensors files found in the cache directory.") # Load weights from each found safetensors file - + full_state_dict = {} for safetensor_file in safetensors_files: state_dict = load_safetensors(safetensor_file) @@ -150,7 +150,7 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): print("\nRemapped state dict\n") for rsdk in remapped_state_dict.keys(): print(f"-- {rsdk}") - + # load new weight map model.load_state_dict(remapped_state_dict, strict=False) @@ -159,6 +159,7 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): print(f"\nremapped_state_dict: {remapped_state_dict.keys()}\n") check_weights(model, remapped_state_dict) + class MultiLayerPreceptron(nn.Module): def __init__(self, input_dim, hidden_dim, activation="silu", use_bias=False): """ @@ -174,19 +175,11 @@ def __init__(self, input_dim, hidden_dim, activation="silu", use_bias=False): super(MultiLayerPreceptron, self).__init__() # Activation function mapping - activations = { - "relu": nn.ReLU(), - "gelu": nn.GELU(), - "tanh": nn.Tanh(), - "sigmoid": nn.Sigmoid(), - "leaky_relu": nn.LeakyReLU(0.2), - "silu": nn.SiLU() - } + activations = {"relu": nn.ReLU(), "gelu": nn.GELU(), "tanh": nn.Tanh(), "sigmoid": nn.Sigmoid(), "leaky_relu": nn.LeakyReLU(0.2), "silu": nn.SiLU()} # Ensure valid activation if activation not in activations: - raise ValueError( - f"Invalid activation: {activation}. Choose from {list(activations.keys())}") + raise ValueError(f"Invalid activation: {activation}. Choose from {list(activations.keys())}") # Construct MLP layers self.gate_proj = nn.Linear(input_dim, hidden_dim, bias=use_bias) @@ -195,7 +188,7 @@ def __init__(self, input_dim, hidden_dim, activation="silu", use_bias=False): self.act_fn = activations[activation] def forward(self, x) -> torch.Tensor: - return self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x)) + return self.down_proj(self.act_fn(self.gate_proj(x))*self.up_proj(x)) class RMSNorm(nn.Module): @@ -212,5 +205,5 @@ def forward(self, hidden_states): input_dtype = hidden_states.dtype hidden_states = hidden_states.to(torch.float32) variance = hidden_states.pow(2).mean(-1, keepdim=True) - hidden_states = hidden_states * torch.rsqrt(variance + self.eps) - return self.weight * hidden_states.to(input_dtype) + hidden_states = hidden_states*torch.rsqrt(variance + self.eps) + return self.weight*hidden_states.to(input_dtype) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 3a72b50f1..84605ff61 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -7,6 +7,7 @@ import functools from concurrent.futures import ThreadPoolExecutor import asyncio +import uuid import numpy as np import torch @@ -27,7 +28,7 @@ from exo.inference.torch.models.llama3 import ShardedLlamaModel TEMP = 0.6 -TOP_K = 300 +TOP_K = 35 class TorchDynamicShardInferenceEngine(InferenceEngine): @@ -40,6 +41,7 @@ def __init__(self, shard_downloader: HFShardDownloader): self.request_id = None self.executor = ThreadPoolExecutor(max_workers=1) self.past_tokens = None + self.uuid = str(uuid.uuid4()) # device settings if os.environ.get("TORCH_DEVICE"): @@ -77,6 +79,9 @@ async def decode(self, shard: Shard, tokens: np.ndarray) -> str: await self.ensure_shard(shard) + self.sharded_model.model.reset_caches() + self.past_tokens = None + return await asyncio.get_running_loop().run_in_executor(self.executor, functools.partial(self.tokenizer.decode, tokens.tolist())) async def sample(self, x: np.ndarray, temp=TEMP, top_k=TOP_K) -> np.ndarray: @@ -132,9 +137,13 @@ def infer_wrapper(): model_hs, model_logits = self.sharded_model.generate(hidden_state=hidden_state) else: model_hs, model_logits = self.sharded_model.generate(tokens=self.past_tokens) + # model_hs, model_logits = self.sharded_model.generate(tokens=input_tensor) if model_hs is not None: # model_hs = model_hs.detach().cpu() + + # possibly make this into a tensor that has past_tokens also + # to pass to node, currently only hidden state is return model_hs.numpy(force=True) # model_logits = model_logits.detach().cpu() @@ -148,6 +157,7 @@ async def ensure_shard(self, shard: Shard): print("shard ensured\n") print(f"shard: {shard}") print(f"class shard: {self.shard}") + print(f"uuid: {self.uuid}") if self.shard == shard: return diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index 56ac2eb89..1a44ccd87 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -22,7 +22,8 @@ ) MODEL_NAME = "unsloth/Llama-3.2-3B-Instruct" -TEMP = 0.6 +# MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" +TEMP = 0.8 TOP_K = 35 MAX_NEW_TOKENS = 300 @@ -104,8 +105,8 @@ def normal_full(model, user_prompt: str, device: torch.device = torch.device("cp if __name__ == "__main__": # prompt = "hello" # prompt = "What is the meaning of exo?" - # prompt = "Tell me a short 4 line haiku" - prompt = "In a single word only, what is the last name of the current president of the USA?" + prompt = "Tell me a short 4 line haiku" + # prompt = "In a single word only, what is the last name of the current president of the USA?" # Get the path to the model files from the Hugging Face cache cache_dir = Path(snapshot_download(MODEL_NAME)) @@ -132,6 +133,7 @@ def normal_full(model, user_prompt: str, device: torch.device = torch.device("cp # Initialize LlamaModel with config and tokenizer device = torch.device("cuda") + # device = torch.device("cpu") shard_model_1 = ShardedLlamaModel(config=config, shard=shard_1, device=device, use_cache=True) print(f"\nshard_model_1: {shard_model_1}") From 1de87fb42d7c5bedd6b260eadbcd04ecae56a551 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 16 Jan 2025 05:20:57 -0900 Subject: [PATCH 524/589] updating llama model caching inference, updating to latest torchtune and torch, updating infer_tensor to do mono logit or historical logits depending on cache or not, updating tests, updating config --- exo/inference/torch/models/llama3.py | 173 ++++++++++-------- exo/inference/torch/models/llm_utils.py | 13 +- .../torch/sharded_inference_engine.py | 5 +- exo/inference/torch/tests/test_llama3_full.py | 69 ++++--- setup.py | 10 +- 5 files changed, 149 insertions(+), 121 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index c5a686577..25e5ac5b1 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -10,7 +10,7 @@ import torch.nn as nn import torchtune.modules as ttm import torchtune.generation as ttg -from torchtune.models.llama3_1 import Llama3ScaledRoPE +from torchtune.models.llama3_1._position_embeddings import Llama3ScaledRoPE from torchtune.modules.attention_utils import _MaskType from exo.inference.shard import Shard @@ -105,9 +105,14 @@ def check_maxed_cache(self, tokens: torch.Tensor) -> bool: kv_cache = active_layers[0].attn.kv_cache current_pos = kv_cache.cache_pos[0] + tokens.numel() + self.max_seq_len k_shape = kv_cache.k_cache.shape[2] - print(f"current_pos: {current_pos}\nk_shape: {k_shape}") + + if DEBUG >= 4: + print(f"cache current_pos: {current_pos}\nk_shape: {k_shape}") + if current_pos <= k_shape: - print("====== MAX CACHE REACHED CLEAR ==============") + if DEBUG >= 4: + print("============ MAX CACHE REACHED CLEAR ==============") + return True return False @@ -124,13 +129,14 @@ def forward( print("forward called") print(f"tokens [{tokens.shape}]: {tokens}") print(f"mask: {mask}") + print(f"input_pos: {input_pos}") if tokens.ndim == 3: h = tokens # Use directly as hidden states else: - h = self.tok_embeddings(tokens) # Apply token tok_embeddings + seq_len = tokens.shape[1] - seq_len = h.shape[1] + h = self.tok_embeddings(tokens) # Apply token tok_embeddings self._validate_inputs( seq_len, @@ -151,26 +157,26 @@ def forward( print(f"\noutput_hidden_states\n{self.output_hidden_states}\n") # Process through each transformer layer - with torch.no_grad(): - if layer.caches_are_enabled(): - try: - h = layer( - h, - mask=mask, - input_pos=input_pos, - ) - except AssertionError: - # assume due to cache - self.reset_caches() - - h = layer( - h, - mask=mask, - input_pos=input_pos, - ) - - else: - h = layer(h) + # with torch.no_grad(): + if layer.caches_are_enabled(): + try: + h = layer( + h, + mask=mask, + input_pos=input_pos, + ) + except AssertionError: + # assume due to cache + self.reset_caches() + + h = layer( + h, + mask=mask, + input_pos=input_pos, + ) + + else: + h = layer(h) if i in self.output_hidden_states: hidden.append(h) @@ -182,10 +188,7 @@ def forward( h = self.norm(h) # Handle chunked output if needed - if self.num_output_chunks > 0: - output = self.chunked_output(h) - else: - output = self.output(h).float() + output = self.output(h).float() # Return list if hidden states are requested output = [hidden[-1], output] if hidden else output @@ -215,6 +218,8 @@ def LlamaModel(config: dict, shard: Shard): # hack to align sharded weights with layers # fill unused layer positions with None layers = [None for _ in range(shard.n_layers)] + + # build layers for i in range(shard.start_layer, shard.end_layer + 1): self_attn = ttm.MultiHeadAttention( embed_dim=config["embed_dim"], @@ -257,16 +262,9 @@ def LlamaModel(config: dict, shard: Shard): layers[i] = layer - #for i in range(len(layers)): - # print(f"layers[{i}]: {layers[i]}") layers = nn.ModuleList(layers) tok_embeddings = nn.Embedding(config["vocab_size"], config["embed_dim"]) output_proj = ttm.TiedLinear(tok_embeddings) - # output_proj = nn.Linear( - # config["embed_dim"], - # config["vocab_size"], - # bias=config["attn_bias"], - # ) return ShardTransformerDecoder( tok_embeddings=tok_embeddings, @@ -280,20 +278,16 @@ def LlamaModel(config: dict, shard: Shard): num_layers=config["num_layers"], ) - # return ttm.TransformerDecoder( - # tok_embeddings=tok_embeddings, - # layers=layers, - # max_seq_len=config["max_seq_len"], - # num_heads=config["num_heads"], - # head_dim=config["head_dim"], - # norm=RMSNorm(config["embed_dim"], eps=config["norm_eps"]), - # output=output_proj, - # num_layers=config["num_layers"], - # ) - class ShardedLlamaModel(nn.Module): - def __init__(self, config: dict, shard: Shard, device: Optional[torch.device] = None, use_cache: Optional[bool] = False): + def __init__( + self, + config: dict, + shard: Shard, + device: Optional[torch.device] = None, + use_cache: Optional[bool] = False, + max_generated_tokens: int = 300, + ): super(ShardedLlamaModel, self).__init__() self.shard = shard @@ -306,14 +300,21 @@ def __init__(self, config: dict, shard: Shard, device: Optional[torch.device] = # pad_id maually set as same in all llama models self.pad_id = 128004 # from <|finetune_right_pad_id|> - with torch.no_grad(): - self.model = LlamaModel(config, self.shard).to(dtype=self.dtype, device=self.device) + self.model = LlamaModel(config, self.shard).to(dtype=self.dtype, device=self.device) - if DEBUG >= 8: - print(f"model loaded: {self.model}\n") - print(f"device: {self.device}\n") + # keep track of current position in generation + self.max_generated_tokens = max_generated_tokens + self.curr_pos = 0 + self.masks = None + self.curr_masks = None + self.input_pos = None + self.curr_input_pos = None - def generate(self, tokens: Optional[torch.Tensor] = None, hidden_state: Optional[torch.Tensor] = None) -> Tuple[Optional[torch.Tensor], torch.Tensor]: + def generate( + self, + tokens: Optional[torch.Tensor] = None, + hidden_state: Optional[torch.Tensor] = None, + ) -> Tuple[Optional[torch.Tensor], torch.Tensor]: """ Generate logits and/or hidden_states from llama model @@ -325,28 +326,34 @@ def generate(self, tokens: Optional[torch.Tensor] = None, hidden_state: Optional print("generate called") print(f"tokens: {tokens}") print(f"hidden_state: {hidden_state}") + print(f"curr_pos: {self.curr_pos}") - curr_masks = None - input_pos = None + bsz, tokens_length = tokens.size() - if tokens is not None: - if tokens.ndim == 1: - tokens = tokens.view(1, -1).to(device=self.device) + if tokens_length > 1: + tokens = tokens.view(1, -1).to(device=self.device) if tokens.ndim == 1 else tokens - bsz, tokens_length = tokens.size() + self.curr_pos = tokens_length # using self.max_seq_len will take up alot of VRAM - total_response_length = tokens_length + self.max_seq_len + total_response_length = tokens_length + self.max_generated_tokens # setup cache if not self.model.caches_are_enabled() and self.use_cache: with self.device: - self.model.setup_caches(bsz, self.dtype, decoder_max_seq_len=tokens.numel() + self.max_seq_len) + self.model.setup_caches( + bsz, + self.dtype, + decoder_max_seq_len=tokens.numel() + self.max_generated_tokens, + ) if not self.shard.is_last_layer(): self.model.output_hidden_states = [self.shard.end_layer] - resp_max_seq_len = total_response_length if not self.model.caches_are_enabled() else self.model.decoder_max_cache_seq_len + if not self.model.caches_are_enabled(): + max_seq_len = total_response_length + else: + max_seq_len = self.model.decoder_max_cache_seq_len # clone tokens generated_tokens = tokens.clone().to(device=self.device) @@ -354,44 +361,52 @@ def generate(self, tokens: Optional[torch.Tensor] = None, hidden_state: Optional # masking for proper attention padding_masks = generated_tokens != self.pad_id if not padding_masks.all(): - padding_masks = torch.nn.functional.pad(padding_masks, (0, self.max_seq_len), value=True) + padding_masks = torch.nn.functional.pad(padding_masks, (0, self.max_generated_tokens), value=True) - masks = ttg.get_causal_mask_from_padding_mask(padding_masks, target_seq_len=resp_max_seq_len) + self.masks = ttg.get_causal_mask_from_padding_mask(padding_masks, target_seq_len=max_seq_len) - input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) + self.input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) else: - masks = torch.tril(torch.ones( + self.masks = torch.tril(torch.ones( total_response_length, - resp_max_seq_len if resp_max_seq_len is not None else total_response_length, + max_seq_len, dtype=torch.bool, device=self.device, )).unsqueeze(0) - input_pos = torch.arange(0, total_response_length, device=self.device).unsqueeze(0) + self.input_pos = torch.arange(0, total_response_length, device=self.device).unsqueeze(0) if self.model.caches_are_enabled(): - curr_masks = masks[:, :tokens_length] + self.curr_masks = self.masks[:, :tokens_length] else: - curr_masks = masks[:, :tokens_length, :tokens_length] + self.curr_masks = self.masks[:, :tokens_length, :tokens_length] - input_pos = input_pos[:, :tokens_length].squeeze() + self.curr_input_pos = self.input_pos[:, :tokens_length].squeeze() + else: + if self.model.caches_are_enabled(): + self.curr_input_pos = self.input_pos[:, self.curr_pos].contiguous() + self.curr_masks = self.masks[:, self.curr_pos, None, :].contiguous() + else: + self.curr_input_pos = self.input_pos[:, :self.curr_pos + 1] + self.curr_masks = self.masks[:, :self.curr_pos + 1, :self.curr_pos + 1] if DEBUG >= 4: print("model_input") if tokens is not None: - print(f"tokens: {tokens} - {tokens.device}") - print(f"mask: {curr_masks} - {curr_masks.device}") - print(f"input_pos: {input_pos} - {input_pos.device}") - + print(f"tokens: {tokens}") if hidden_state is not None: - print(f"hidden_state: {hidden_state} - {hidden_state.device}") + print(f"hidden_state: {hidden_state}") + print(f"mask: {self.curr_masks}") + print(f"input_pos: {self.curr_input_pos}") model_output = self.model( tokens=hidden_state if hidden_state is not None else tokens, - mask=curr_masks, - input_pos=input_pos, + mask=self.curr_masks, + input_pos=self.curr_input_pos, ) + self.curr_pos += 1 + if DEBUG >= 4: print(f"model_output\n{model_output}") diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 69beeeffc..e5a6d7094 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -34,7 +34,10 @@ def load_model_config(model_config_path: Path) -> dict: "rope_scaling": base_config.get("rope_scaling"), "embed_dim": base_config["hidden_size"], "num_heads": base_config["num_attention_heads"], - "head_dim": base_config["hidden_size"] // base_config["num_attention_heads"], # Assuming embed_dim = hidden_size + "head_dim": base_config.get( + "head_dim", + base_config["hidden_size"] // base_config["num_attention_heads"], + ), # Assuming embed_dim = hidden_size "num_kv_heads": base_config["num_key_value_heads"], "max_seq_len": base_config["max_position_embeddings"], "intermediate_dim": base_config["intermediate_size"], @@ -154,10 +157,10 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): # load new weight map model.load_state_dict(remapped_state_dict, strict=False) - if DEBUG >= 8: - print("\n--- checking weights ----\n") - print(f"\nremapped_state_dict: {remapped_state_dict.keys()}\n") - check_weights(model, remapped_state_dict) + # if DEBUG >= 8: + print("\n--- checking weights ----\n") + print(f"\nremapped_state_dict: {remapped_state_dict.keys()}\n") + check_weights(model, remapped_state_dict) class MultiLayerPreceptron(nn.Module): diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 84605ff61..2cc375817 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -136,7 +136,10 @@ def infer_wrapper(): if hidden_state is not None: model_hs, model_logits = self.sharded_model.generate(hidden_state=hidden_state) else: - model_hs, model_logits = self.sharded_model.generate(tokens=self.past_tokens) + if not self.sharded_model.model.caches_are_enabled(): + model_hs, model_logits = self.sharded_model.generate(tokens=self.past_tokens) + else: + model_hs, model_logits = self.sharded_model.generate(tokens=input_tensor) # model_hs, model_logits = self.sharded_model.generate(tokens=input_tensor) if model_hs is not None: diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index 1a44ccd87..09aa27186 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -16,56 +16,58 @@ from exo.inference.torch.models.llama3 import ShardedLlamaModel from exo.inference.shard import Shard -from exo.inference.torch.models.llm_utils import ( - load_model_config, - load_model_weights_torchtune, -) +from exo.inference.torch.models.llm_utils import (load_model_config, load_model_weights_torchtune, check_weights) -MODEL_NAME = "unsloth/Llama-3.2-3B-Instruct" -# MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" -TEMP = 0.8 -TOP_K = 35 -MAX_NEW_TOKENS = 300 +# MODEL_NAME = "unsloth/Llama-3.2-3B-Instruct" +MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" +TEMP = 0.6 +TOP_K = 300 +MAX_NEW_TOKENS = 50 def main(model, prompt: str, device: torch.device = torch.device("cpu")): - # Tokenize input text - # messages = [] - # messages.extend([ - # Message(role="system", content="You are a helpful and creative AI assistant."), - # Message(role="user", content=prompt), - # # Empty assistant message to kick-start generation - # Message(role="assistant", content=""), - # ]) - messages = [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}] + messages = [{"role": "user", "content": prompt}] text = llama_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) tok_out = llama_tokenizer([text], return_tensors="pt") tokens = tok_out.input_ids.to(device=device) - # tokenizer_out = llama_tokenizer({"messages": messages}, inference=True) - # print(f"tokenizer_out: {tokenizer_out}") - # tokens = torch.tensor([tokenizer_out["tokens"]], dtype=torch.int, device=device) generated_tokens = tokens.clone() print(f"tokens: {tokens}") - for i in range(MAX_NEW_TOKENS): - print(f"gen #{i}") - _, logits = model.generate(tokens=tokens) + _, logits = model.generate(tokens=tokens) - tokens = ttg.sample(logits=logits[:, -1].clone(), temperature=TEMP, top_k=TOP_K) + tokens = ttg.sample( + logits=logits[:, -1].clone(), + temperature=TEMP, + top_k=TOP_K, + ) - print(f"tokens: {tokens}") + print(f"tokens: {tokens}") + + for i in range(MAX_NEW_TOKENS - 1): + print(f"gen #{i}") if tokens.item() == llama_tokenizer.eos_token_id: print("stop token hit!") break + _, logits = model.generate(tokens=tokens) + + tokens = ttg.sample( + logits=logits[:, -1].clone(), + temperature=TEMP, + top_k=TOP_K, + ) + + print(f"tokens: {tokens}") + generated_tokens = torch.cat([generated_tokens, tokens], dim=-1) print(f"generated_tokens: {generated_tokens}") - tokens = generated_tokens.clone() + if not model.model.caches_are_enabled(): + tokens = generated_tokens.clone() print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(generated_tokens.tolist()[0])}\n\n\n") @@ -103,9 +105,9 @@ def normal_full(model, user_prompt: str, device: torch.device = torch.device("cp if __name__ == "__main__": - # prompt = "hello" + prompt = "Tell me a joke." # prompt = "What is the meaning of exo?" - prompt = "Tell me a short 4 line haiku" + # prompt = "Tell me a short 4 line haiku" # prompt = "In a single word only, what is the last name of the current president of the USA?" # Get the path to the model files from the Hugging Face cache @@ -134,7 +136,14 @@ def normal_full(model, user_prompt: str, device: torch.device = torch.device("cp # Initialize LlamaModel with config and tokenizer device = torch.device("cuda") # device = torch.device("cpu") - shard_model_1 = ShardedLlamaModel(config=config, shard=shard_1, device=device, use_cache=True) + shard_model_1 = ShardedLlamaModel( + config=config, + shard=shard_1, + device=device, + use_cache=True, + max_generated_tokens=MAX_NEW_TOKENS, + ) + print(f"\nshard_model_1: {shard_model_1}") load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) diff --git a/setup.py b/setup.py index 29d070927..0920cfe7c 100644 --- a/setup.py +++ b/setup.py @@ -28,18 +28,16 @@ "transformers==4.46.3", "uuid==1.30", "tinygrad @ git+https://github.com/tinygrad/tinygrad.git@3b26e51fcebfc6576f4e0f99693e6f1406d61d79", - "torch==2.4.0", + "torch==2.5.1", "accelerate==0.34.2", - "torchtune==0.4.0", + "torchtune==0.5.0", "torchao==0.6.1", "pytest==8.3.3", - "pytest-asyncio==0.24.0" + "pytest-asyncio==0.24.0", ] extras_require = { - "formatting": [ - "yapf==0.40.2", - ], + "formatting": ["yapf==0.40.2",], "apple_silicon": [ "mlx==0.20.0", "mlx-lm==0.19.3", From 028f30597a5a42c3ab34936d1e51c8196260d1f4 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 16 Jan 2025 09:32:05 -0900 Subject: [PATCH 525/589] improved weight loading, switched to torchtune based norm and llama focused mlp functions, working on using smaller max_seq_len as an option, added env var TORCH_USE_CACHE to toggle caching --- exo/inference/torch/models/llama3.py | 28 ++++++++++--- exo/inference/torch/models/llm_utils.py | 39 ++++++++++++++++--- .../torch/sharded_inference_engine.py | 15 ++++--- exo/inference/torch/tests/test_llama3_full.py | 30 +++++++++++--- 4 files changed, 88 insertions(+), 24 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 25e5ac5b1..cd87fed2d 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -10,11 +10,20 @@ import torch.nn as nn import torchtune.modules as ttm import torchtune.generation as ttg -from torchtune.models.llama3_1._position_embeddings import Llama3ScaledRoPE + from torchtune.modules.attention_utils import _MaskType +from torchtune.modules import RMSNorm +# llama3 torchtune +from torchtune.models.llama3_1._position_embeddings import Llama3ScaledRoPE +# from torchtune.models.llama3._model_utils import scale_hidden_dim_for_mlp from exo.inference.shard import Shard -from exo.inference.torch.models.llm_utils import MultiLayerPreceptron, RMSNorm +from exo.inference.torch.models.llm_utils import ( + llama3_mlp, + MultiLayerPreceptron, + # RMSNorm, +) + from exo.helpers import DEBUG @@ -251,7 +260,10 @@ def LlamaModel(config: dict, shard: Shard): pos_embeddings=rope, ) - mlp = MultiLayerPreceptron(config["embed_dim"], config["intermediate_dim"], config["hidden_act"]) + mlp = llama3_mlp( + dim=config["embed_dim"], + hidden_dim=config["intermediate_dim"], + ) layer = ttm.TransformerSelfAttentionLayer( attn=self_attn, @@ -266,6 +278,8 @@ def LlamaModel(config: dict, shard: Shard): tok_embeddings = nn.Embedding(config["vocab_size"], config["embed_dim"]) output_proj = ttm.TiedLinear(tok_embeddings) + norm = RMSNorm(config["embed_dim"], eps=config["norm_eps"]) + return ShardTransformerDecoder( tok_embeddings=tok_embeddings, shard=shard, @@ -273,7 +287,7 @@ def LlamaModel(config: dict, shard: Shard): max_seq_len=config["max_seq_len"], num_heads=config["num_heads"], head_dim=config["head_dim"], - norm=RMSNorm(config["embed_dim"], eps=config["norm_eps"]), + norm=norm, output=output_proj, num_layers=config["num_layers"], ) @@ -361,7 +375,11 @@ def generate( # masking for proper attention padding_masks = generated_tokens != self.pad_id if not padding_masks.all(): - padding_masks = torch.nn.functional.pad(padding_masks, (0, self.max_generated_tokens), value=True) + padding_masks = torch.nn.functional.pad( + padding_masks, + (0, self.max_generated_tokens), + value=True, + ) self.masks = ttg.get_causal_mask_from_padding_mask(padding_masks, target_seq_len=max_seq_len) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index e5a6d7094..6843bbf94 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -9,6 +9,7 @@ import torch import torch.nn as nn +from torchtune.modules import FeedForward from safetensors.torch import load_file as load_safetensors @@ -50,8 +51,8 @@ def load_model_config(model_config_path: Path) -> dict: "hidden_act": base_config.get("hidden_act", "silu") } - # if model_config.get("rope_scaling", None) is not None: - # model_config["max_seq_len"] = model_config["rope_scaling"]["original_max_position_embeddings"] + if model_config.get("rope_scaling", None) is not None: + model_config["max_seq_len"] = model_config["rope_scaling"]["original_max_position_embeddings"] return model_config @@ -78,6 +79,9 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): """ Loads weights from huggingface and changes it to match torchtune naming structure """ + model_state_dict = model.state_dict() + for name, _ in model_state_dict.items(): + print(f"name {name}") # Load weights from safetensors files in the cache directory safetensors_files = list(cache_dir.glob("*.safetensors")) if not safetensors_files: @@ -104,7 +108,7 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): # change input layer norm to sa_norm for torchtune re_iln = re.findall(rf"model.layers\.{layer_num}\.(input_layernorm)\.weight", key) if len(re_iln) != 0: - new_key = f"model.layers.{layer_num}.sa_norm.weight" + new_key = f"model.layers.{layer_num}.sa_norm.scale" remapped_state_dict[new_key] = value if DEBUG >= 8: print(f"{key} == {new_key}") @@ -112,7 +116,7 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): # change post attention layernorm to mlp_norm for torchtune re_pal = re.findall(rf"model.layers\.{layer_num}\.(post_attention_layernorm)\.weight", key) if len(re_pal) != 0: - new_key = f"model.layers.{layer_num}.mlp_norm.weight" + new_key = f"model.layers.{layer_num}.mlp_norm.scale" remapped_state_dict[new_key] = value if DEBUG >= 8: print(f"{key} == {new_key}") @@ -133,7 +137,14 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): # set mlp weights re_mlp = re.findall(rf"model\.layers\.{layer_num}.mlp.(\w+)\.(\w+)", key) if len(re_mlp) != 0: - new_key = f"model.layers.{layer_num}.mlp.{re_mlp[0][0]}.{re_mlp[0][1]}" + proj_name = re_mlp[0][0] + if proj_name == "up_proj": + proj_name = "w3" + elif proj_name == "down_proj": + proj_name = "w2" + elif proj_name == "gate_proj": + proj_name = "w1" + new_key = f"model.layers.{layer_num}.mlp.{proj_name}.weight" remapped_state_dict[new_key] = value if DEBUG >= 8: print(f"{key} == {new_key}") @@ -143,6 +154,13 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): remapped_state_dict["model.tok_embeddings.weight"] = value if DEBUG >= 8: print("model.embed_tokens.weight == model.tok_embeddings.weight") + + if key == "model.norm.weight": + remapped_state_dict["model.norm.scale"] = value + + if key == "lm_head.weight": + remapped_state_dict["output.weight"] = value + else: print(f"{shard.model_id} not supported for sharding, loading weights normally") @@ -210,3 +228,14 @@ def forward(self, hidden_states): variance = hidden_states.pow(2).mean(-1, keepdim=True) hidden_states = hidden_states*torch.rsqrt(variance + self.eps) return self.weight*hidden_states.to(input_dtype) + + +def llama3_mlp(dim: int, hidden_dim: int) -> FeedForward: + """ + Build the MLP layer associated with the Llama model. + """ + gate_proj = nn.Linear(dim, hidden_dim, bias=False) + down_proj = nn.Linear(hidden_dim, dim, bias=False) + up_proj = nn.Linear(dim, hidden_dim, bias=False) + + return FeedForward(gate_proj=gate_proj, down_proj=down_proj, up_proj=up_proj) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 2cc375817..1696d2242 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -27,8 +27,8 @@ # supported models from exo.inference.torch.models.llama3 import ShardedLlamaModel -TEMP = 0.6 -TOP_K = 35 +TEMP = 0.8 +TOP_K = 10 class TorchDynamicShardInferenceEngine(InferenceEngine): @@ -79,10 +79,10 @@ async def decode(self, shard: Shard, tokens: np.ndarray) -> str: await self.ensure_shard(shard) - self.sharded_model.model.reset_caches() - self.past_tokens = None - - return await asyncio.get_running_loop().run_in_executor(self.executor, functools.partial(self.tokenizer.decode, tokens.tolist())) + return await asyncio.get_running_loop().run_in_executor( + self.executor, + functools.partial(self.tokenizer.decode, tokens.tolist()), + ) async def sample(self, x: np.ndarray, temp=TEMP, top_k=TOP_K) -> np.ndarray: if DEBUG >= 4: @@ -140,7 +140,6 @@ def infer_wrapper(): model_hs, model_logits = self.sharded_model.generate(tokens=self.past_tokens) else: model_hs, model_logits = self.sharded_model.generate(tokens=input_tensor) - # model_hs, model_logits = self.sharded_model.generate(tokens=input_tensor) if model_hs is not None: # model_hs = model_hs.detach().cpu() @@ -189,7 +188,7 @@ async def ensure_shard(self, shard: Shard): config=model_config, shard=shard, device=self.device, - use_cache=True, + use_cache=os.environ.get("TORCH_USE_CACHE", True), ), ) diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index 09aa27186..e11bd8b39 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -20,18 +20,35 @@ # MODEL_NAME = "unsloth/Llama-3.2-3B-Instruct" MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" -TEMP = 0.6 -TOP_K = 300 -MAX_NEW_TOKENS = 50 +TEMP = 0.0 +TOP_K = 35 +MAX_NEW_TOKENS = 100 def main(model, prompt: str, device: torch.device = torch.device("cpu")): - messages = [{"role": "user", "content": prompt}] + messages = [{ + "role": "assistant", + "content": "", + }, { + "role": "user", + "content": prompt, + }] text = llama_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) tok_out = llama_tokenizer([text], return_tensors="pt") tokens = tok_out.input_ids.to(device=device) + # messages = [] + # messages.extend([ + # Message(role="system", content="You are a helpful and creative AI assistant."), + # Message(role="user", content=prompt), + # # Empty assistant message to kick-start generation + # Message(role="assistant", content=""), + # ]) + + # tokenizer_out = llama_tokenizer({"messages": messages}, inference=True) + # tokens = torch.tensor([tokenizer_out["tokens"]], dtype=torch.int, device=device) + generated_tokens = tokens.clone() print(f"tokens: {tokens}") @@ -50,6 +67,7 @@ def main(model, prompt: str, device: torch.device = torch.device("cpu")): print(f"gen #{i}") if tokens.item() == llama_tokenizer.eos_token_id: + # if tokens.item() in llama_tokenizer.stop_tokens: print("stop token hit!") break @@ -105,10 +123,10 @@ def normal_full(model, user_prompt: str, device: torch.device = torch.device("cp if __name__ == "__main__": - prompt = "Tell me a joke." + # prompt = "Tell me a joke." # prompt = "What is the meaning of exo?" # prompt = "Tell me a short 4 line haiku" - # prompt = "In a single word only, what is the last name of the current president of the USA?" + prompt = "In a single word only, what is the last name of the current president of the USA?" # Get the path to the model files from the Hugging Face cache cache_dir = Path(snapshot_download(MODEL_NAME)) From f3bd881ce7617cb6c35ec8b8201a10cbd630c5e5 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 16 Jan 2025 10:12:03 -0900 Subject: [PATCH 526/589] adding TORCH_USE_ORG_SEQ to use the origin max positions embeds for max_seq_len, added clearing cuda caches to reset_caches on model --- exo/inference/torch/models/llama3.py | 2 ++ exo/inference/torch/models/llm_utils.py | 4 ++-- exo/inference/torch/sharded_inference_engine.py | 7 +++++-- exo/inference/torch/tests/test_llama3_full.py | 4 ++-- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index cd87fed2d..4f52c08d6 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -102,6 +102,8 @@ def caches_are_enabled(self) -> bool: return False def reset_caches(self): + torch.cuda.empty_cache() + for layer in self.layers: if layer is not None: layer.reset_cache() diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 6843bbf94..efd5471f0 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -1,7 +1,7 @@ """ Utility methods used by LLMs """ - +import os import re import json from pathlib import Path @@ -51,7 +51,7 @@ def load_model_config(model_config_path: Path) -> dict: "hidden_act": base_config.get("hidden_act", "silu") } - if model_config.get("rope_scaling", None) is not None: + if (os.environ.get("TORCH_USE_ORG_SEQ", True) and model_config.get("rope_scaling", None) is not None): model_config["max_seq_len"] = model_config["rope_scaling"]["original_max_position_embeddings"] return model_config diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 1696d2242..ffad807ce 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -27,8 +27,8 @@ # supported models from exo.inference.torch.models.llama3 import ShardedLlamaModel -TEMP = 0.8 -TOP_K = 10 +TEMP = 0.0 +TOP_K = 35 class TorchDynamicShardInferenceEngine(InferenceEngine): @@ -95,6 +95,9 @@ async def sample(self, x: np.ndarray, temp=TEMP, top_k=TOP_K) -> np.ndarray: def sample_wrapper(): tokens = tt_sample(logits, temperature=temp, top_k=top_k) + if DEBUG >= 4: + print(f"tokens: {tokens}") + return tokens.numpy(force=True) return await asyncio.get_running_loop().run_in_executor(self.executor, functools.partial(sample_wrapper)) diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index e11bd8b39..703b87861 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -123,10 +123,10 @@ def normal_full(model, user_prompt: str, device: torch.device = torch.device("cp if __name__ == "__main__": - # prompt = "Tell me a joke." + prompt = "Tell me a joke." # prompt = "What is the meaning of exo?" # prompt = "Tell me a short 4 line haiku" - prompt = "In a single word only, what is the last name of the current president of the USA?" + # prompt = "In a single word only, what is the last name of the current president of the USA?" # Get the path to the model files from the Hugging Face cache cache_dir = Path(snapshot_download(MODEL_NAME)) From 027de6f65a42f66d0e7b4b5606217298f2532b72 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 16 Jan 2025 10:28:22 -0900 Subject: [PATCH 527/589] updating readme before merge into main --- 2025-01-16-102425_3840x1080_scrot.png | Bin 0 -> 688253 bytes exo/inference/torch/README.md | 37 +++++++++++++++++--------- 2 files changed, 24 insertions(+), 13 deletions(-) create mode 100644 2025-01-16-102425_3840x1080_scrot.png diff --git a/2025-01-16-102425_3840x1080_scrot.png b/2025-01-16-102425_3840x1080_scrot.png new file mode 100644 index 0000000000000000000000000000000000000000..fbe69540ced9bd3ff3f05009ecd5c0dbb96a7ec6 GIT binary patch literal 688253 zcmZU51z1#V*EJ?6AfT(jAH-As`|xT_WAmA|(ydjdbVG_3zQ= zdB69){&RWBIE-h`oO|DUuf5jV<1a5Oj(wfbXtpU^_}Q49t>hD3PhkAdh(Dgqv| z){2`KI#v$FDJv7=(`lcv*GD@`G~;-x9rWnQ8lE}1sC(nx5K`1d3I^9KBUm;ZSp@sb1$J~}!& zA|a{1fH6z{z5%SD=1 zXaD=-yGP2uYvV_czAe8i9Ow&Iy)U$GPbMNi#>2b}895DXp1K^|x|0X~ea6nR{o}#=p@Otf}bui(p+Wgo3 zo4s8K8obn8*Srif<@P-Hamjqsgm4Uxia{w64ZB~DN^{+PUdvZF5n+UsJ0YW2UL5al zbX6En)_7R*Zhxee8gb0@^z_syvyzdK;j)yLkr}UW$Vy1iu@e_XBmHv)ln=<89hQDQ zp$}KL%~ztgRI@sG7IWp_i%aB)J<^)*@YOpX<4)Q4aM(yIv1G?zVB4+MH=ZDSmiOU; zd9Ty<&c*2jO~cyzZ6=)T7%TYOV?>pmbZK;P6UQ4ab$h$65^>_y$hy7WYlFWO zStt(=<4n9(Tr+PE+_6|Y>+7?ov$5>kqO81j&Y$3koSWF4s4)<$p-R|X(+hDG=NQlz z7K}swzL{0}U~p}A7TIzyfrgHbj+WMAyX9VIXQ$qAp#v*r+;B3$`Nk1A$qTic`~ zyM5lEmA;lN0xL8u={R7!Z1mox95-Xn>Ii`pZGIB=Ji6(Ac*K4peAV)4(jqqv#^Y+t zoo&I%g2?6F{e7N^8yFZSS-N+rsRy#v^hc8A@2m68S8mj}9c=y%y(_-A+}}2eIx1E2 zwC{=s3%Va{3T#yMj`g56oc9s@r?8k9e%&8$xJhK_!q$cgW~SPkpuFzIRWg#d@41Yih9eq{zc+uUbugq~O8;m#dA$NM=l? z6!n_tzW0xo;Q^zA1m$2yRw1Io3xUdVFT*Ro*_-SdBTajg;Ws6~iLqUOn1f zq4GSqgU6&%YOy|6&c?=uGiwU@&NJ47&8O=$` z=6~^2B*XXnRIu?tf3P;*rCpg?@kv#y6JjcA!9_!3G)*e9GhRp|Q)zFpr@o`3V`9Q{ zce%ey_&7T!hpB5PCm}72nDxc?dkOA(N2Xl$tguzEF)_{8<&%<l@GT)aitg`DhcO1D)u9Gnnl ztqRUE>oTkHtJtJ{Rt(m>;}9G>KQLK}TU<;=(@MpPI_c5Ai#Ef;{%~zcPgO82s>wRK z-+{_YmVq<8Z1)+%CHaE&)gn*{F;yY+PhKlQP}ec$EUd4e9WEqfq^25_NJvVy!bL63 z%=A86K~)}aBRppx?YzMi?5Y>G&9{A}QZK2j^E*D^I#r)uvNT^fJS81=S(?i9@|_5r zZ2d7QSb3VJy&w0kAYrqvZl}d`Y4Bs0#rexU{egW-XZJa;GAe&L>9(H-r;K6&x>5-^ z?Po?kJCh5i%;)V#wfXBUQeJACgXfeootb^nI7^(ueyNjvd^JxKa>reYlr>x|7e=h} zg0ye0-C-SXZr7@CpV+iA_&HW#)G?T0tzyRYoactesg&=^chB+nmw4q|cQz)_bzB6} zX9rs}#tP2Ds4?S30w)jG&pjfDYfK8mOp&RvfhqKk3yr!gEL&XM99nc$yczZjHIMJ9 zWN`asVo?cKRa6}9bcy6;qK-pkubfJbI&FHwLVL8LynOf31Gq_NCa&h__8gg~f_uB? zt=avEzS^!9VeN%RvEYN;R9klBV%plE73RzpUE?X_@@D_?&p{$5s?8_sw-yMf@OpB; zldQQ1ILC*)e?L}cJ^9UMM%(jnZgq9Fp`l@Rc9x9C3L!2o_nCLA!Ji;BG?e1wD_I#C zOQd6?Cnv`Mc#sv|^D6 z!P!mag1o4en3!9_y2B&1aKw^o2OCzr$DtnJal` zX2=i-VNubQ8ceG4oK)qJyTt`+oc9KDc^@?#?jN4|)!D`G<;$LWxe>Yvv^flCc&Z=I zdoe%f+8wn^@xjo#dSf}%^UOBpd^tEJRv_BX?rp=SO+0BempH5Gav1w^!`r7AS92dF0;@vmi3<(S= zYoGrPLxy-dJGP%|`!f;v8Tb$~Zn&)G>=$VV5}Qn3?Q%sR8h+QmlhL4)i77VeNhaYi zx_t^rqnll{Pd})PLTG3F#Ck8*{;@|Ro!RUlzHD>h)hpJ-;!7D{h(tLN6ybN|Ox`^w zx%}wX7CoNfKty#UMQs{u_#Ls16%MmD>PpvZh&2~o%?Lvti=nC6S*1LAwt7}5{D{=S z6_KOGlna-x7SAMma+&_3p@wv`y|-u8!HY@6?7q{%LsrUgGXN6W>5`n6q_p%{t=D<- zlfbjn4KEE|BNLM^rdJkA-(K%VL)#R1_UswLe)cCG_4e+rgRO0AXQ!;BBz3Z%pC2l9 z)6k@r>!+uuYnR*3>1w(NFy2B#8~Ff8Gm7vmlp)<+U9iocuJ|`2V_U9Ht><)ue|iwX zJz33KPmgNzZVll;67uH5!^0qp*9i#;OZ~xUXYx@-%Zs}g9}4IaizUgcs^Ue@@V5&}LNa*=}CWQxUS^(a}7$qI5%Z2wgO9*?M_b z4yH1NEXBFcXUI|g&J;1E$)T$IVO$k`+{221H$kIer&q5K=XKHbMG&f(yNGfaUn9NW z5_o)MUG#$z$6pg!WV>foDgr%hklk*S511~Q0Fx{_q7FRy!8 z@IrU|L#{S^9oy-W+Ocd_2@Ayg*;gg8yN7cPu9`2Gdf&$A>FKeuvND?B4VR#S4GjOZ zRAo($>xk-c*0L;7_kg9}8t==|C==dlAczPtRX;25xP$6JYITX}=;dBZ+?deX$f#LTAQK{k)1UzJ`g*UN2t2rj$sHSA9A5^R_4}(! zkt{kj_DjF!J7VkW>t(1c?PJlP5PZ~bj>``()hQFFgLej|JUbX_v8pQ z=%l0a&HBH7`}WPZFQ>BW1co}bZ~*x7Aoe}+bow-l~^=Ce~j>@sy*9)a1aq@5?hwF0GW>JF10$ddbB%#2s-0 zbK{>YOAl6D@E3X*2CaknJcQ3y{h6}R+1SgI!;9?glnmnqV%3uFiOvPBk#UE=5!rH% zJ>Ax{)+)V|b$(ozh?%8qji1+IMz-~fy*}NGiUp7Ip?H^R(b%4qaK>F~c7xRuKI6Xk zJzDE=_-+ExU(=PpRW7s%$=VUVqo=KrV_CO5iWMrq`n)7yb=6X%c9c@c6>6woStOGN zxAjC7tyF~l#)RxsXqmjl@$a3G-gQK_Q=K3mQ7fpQy0&1H{xnULzkN@X(1F05+3xj| zegbuWqRRsQC%f(XW;JJ5giCLx*F8tt@>#XFiRilId?sDw9KFYO@d`=+y)n zuVN8TO;6(!5vgctX>Htjgv+d5#cMY&UC{KaJ(^=}ZA~sgIHz~1u&Bs?FBy&?P4=?f&~MQM}taU$~^h z*7OrHGO}<-d$fKf&JV9tM+1a zw`r5HBtVu!x{AavaLu6Bc*_aP!nC6A6ePPWW-cNrUC{zP?U2hmf6i;D{&Gq@W; z!lbykFRUjC2fw>8j-rtku%xObD z_?n6n-ye}TNQUJrGQ}9zT(1%Mr_Uw5vsla@YW{KYNS!(j=>4dY6yMStQ-U9Z#0O5p+)rZeip!RgNSDI2_8pZFn{GdTp}{g~_> zGUsLaN#iDurn2lAf{lBV8lQ^0`b=L{k9$eF&TC^b^z_TFIwCXhef?G~4o70tU%mOX zQCqv$RyeHEwm@2=VIwO=la-UHI5LVGUOevtePl~ZW`6z$zX)hUUjF_uTx3i{LL!78=8n=EI?%(5gr zRe3_wWT4<1wXqYXv&a|a$EWf6*NQC+eoDS%a`{V7|h=(7P}vA1GXg-RZh3Cuz(!{5l>axuSpstMjuYtX^-J#?j=CZAwX2-B7JE`SI5?Pw zH|)BEj7Qp>C*Dh0E?WS9QFAySTj{iMHadCMWTRY3S0Q_wpc zDsHX%H`fRT@^n!5lI&7)=4a`!aEf<<_oeor$sNyBk3vA>wGH(oCxPiTU~Y6M1=9^^BU|HV1Mv zY;0_dj&6{0N@Vqy3Zqfr<4@1cl>jol!FLI({>|IByDNiTBO?`sh3vw@!fb2>6Kw3! z7En=YYHEPAprxgS^3E~f$llf1Xju39%F19K(FXm^04T5NX=%~tk!vASz<3-38ra_6 z=6Agll1Mlx6WKObQc@D<9w9dQ(`slaTdiF}Ru;mF(2Ho=rRKND4TSxCd-RttUk19K zAW$v#@E_hPF^u6s5n0c8a8m2>{3kx5GOPYWM8m=5gE=bCeI?JJSAJ$nF~?oDIy{9r zP1w%T_;y-yh>!8{R{c)fF*1(`Qv7!EWAwoxXF5*nE%KLXWHI`+-a=2cs7XKV%ZL z)kYB){Px{qeP@%6w|fwl+89qOP^IwK`SL;Ghb9IIuS}u#cL=czAfcyu6~N9p~Fgc7CV6 zfBzmX3fdcTK3jv8fgF*ugOq}aqB(}V>})}&^)WxZyCtB@hr77Z)2lI@cB{B_$OUbtkMG2j{iV&;gYY;A2t z3%b}odh`gSg(bG>x8`0aTg?>}6^V(7OT8Ihd&9=*gRctSXfh`N>Jt+aTX-tM0-G4n zX}Vl-SXl4D)=ZFBq!2H8bIGW~J}KABsqf~4xuzu}VpPDfpPilI=>Pg;sFt3#kr2MO@z0#-w}!(H6w?g=cf)#<9mBTIkV}C6BRbghka1qC+_v!rD;7EL z2S0oQf;{c&kwo%gPELK$|oQT9Ar3Q4xm~uv^Y)ov5s06_qohAGCixo!tsg` z8-tjKOFPH}@tf--PUR9S-h}JnHw>HskM0hN-h@`Q2oSnx@x-UFb>$;Z5a*g+N&%7M#+CNSG z?IzFmR*`b+gc`w?LXl|e;^G;hLfzhkul&WlZK+Sc@KS_*vszO0@I0U&e<+#7|2!v3 zFwg&dd5uLfdPp_=A%fr5jYj
XX;MT$WZKL$(cZiF z&et~}WCV_(A;HDn)ydk)$;kx2!otE|zkboleje&e0az7ACsX)x2->tWh*y)LT>1Oj z9=rX}e?|KN|6mWD_dGK%q;O&3nvSkZ7ooNHVg%qw#H^`WVV{|kvj&Bg5D(7=XxQ_U z1rY@2i|=SWJY^iloqd6uu(cy2A|k@WwW?hmy5fa`JnbY--20Ff1wo!i49Y3(KY!v% zkp=?G(9p%Wp^^3L1uLUkp#kiYo%s$oz_fI7@q!M^eefCPbKUpX$M1^0kK(mS{`S+? z|F@XGjut+V>2J@K-LFm`WBJLZvGo&O7Q2&(ShP7smSVWf+m-C+U9@wQ;U1M8+;u_o ze)qQSg-PA5QJG4oGqr#EEC8B|_7(kEDu52fIO1~3(L9utKFMYX!25cHU}0eR1q2*T z2ePOOMv`=ioSzscco?~+r~&qb_mM`teft(2hZ5`BHN09&TB~lzm_WVf4KB>jL*FKu z)gMgE_5;+ZozP2Hu@(n%Vh_gYAtwT+5#Z%DDH#FS(%vpjef`D_N-ncc2ggnj=(T02*8#itU4gPVf)C zIPaO}kjV-MDGMrg1BsU}S@Gid?B-X8iy+?w1q3MEzX`k{G$@u)R6%}zK!1BH(K9hI z2{~^j9>`SHcpO9Ejn&n?(v3<^PKIV=AL@R6z0b@yxa3RdH~eZZ@r(?9Du{50#G|Gb z?c*a_jb^W`9HENu?FZRgJeYjA#C*^wyhi!{%_5_Y3eVFcz;QuAn1qZkZEf|`_V(6B zfgI#Hi^he%o1EOj+B&btBp@(we{J;MnaA&`DOSf-RU8}~e}saXnhWstzG`OWn!@m; zj*d?JLT_&`VEnJWTxzG&knx~#0==s;>XoCdE&fJnf6+knmCKheU%hHIT$rZ&L5n%E zD~3xdqgUwtg~fprz1xn~^f#?q7_rTbIAa2=wipGwfQ}Lp6#NX7J32WoE~`;{6sl8K zGsZM#lS}E*{08{>Ev8X$Iixuh7|T}Ua78Xgctpr6D%!zrf)7iYg5RNa36=8fZTTj1 zjslxUM689%v|G$uF|E=ld6jlYQ(Q|rKB(~Itj^sZBs7d4+Wc{3hd}Vup4ED}R_$~} zwaMDn$*Z{Bv+;UlE=8ui>gG{CGdbvPyX|tXl-^;ZnZ?~%c6hOi z!0AEL#)S7xrHmVKM_z#$yT?c4S+7f;TgDwt+#Nm^K3ub7eWaC9s-RxBosFUtT0YY5 z{^~*$gYv;M>ArJvxZ`{dj8Ec~P6oaBuWEI;T1Tb|a+92=XZZ~QcOJ}sy<=K^_ewIR zG8T2tzs9p#k9~W9pBRY3$MO{jOtTLO|5I z?OQ>!2XaRQRGS?29A)}&*o(l0ut!VNhnKqkIL6)Sz_#YGF#Vcrm*RQw8@)g?@ZG!S z&NzV}uA)A(-7q;}HRaUUfxv&g0Lt_8^QF#P(@->doa&}EhK&MDinF~j|0X>UnWo>C z!dp%{&jj&IIM*M1aNZpPC9iK~dqBf_ylmi^t7}Ewi^a*>S}6Fsy1Eh)5&)6_zLL^? z#J(yD`41Bl(|YXNBLsOf3D#9{aq;*CD3}YYtD2gc7J@Yu6(b-B7iZgQX^l4pk?~lL zARCEvK%o$1W!;1w*`aj^cnHwMJ(-x)ezU;9Kmb9%`s{PYtVPpNZ36ByRn?*QBpf#= z_-EmxK<@^|F25RE)Ts=x+kN5>x!o_X`PL@#{|#g4j&9 z?(`}ZM_bt2*c7m-m<8w(v#{h*2snOEd! zxEJp-Pjc_xJ+><=8yh;1grcMQ+z(!EvBHHyrL>%^sfH~KOagG2Iv%^>&euKx-AxYc zB=6MPXj$#iGDDEq%=)@8v~r^$()D|?K&uA5T>Jt$Ht9_Yic?V6Fkb_^qoAO$4cY}M zmalHTuS_(%A*?yT)v&fXsE6c&&YM8z#4eA%ydFFQLLL+1a1k z=AdFqNlK1XyOn9@;at16wYh1+I|#ubDJdy@id202cnfIMU`nA8Bs~F%Uf0`uU9w(rZHz<<&Xj6Hhzck+sy_5RvCngTg zJ?d(0Dj<1?hz1uEM%UEz1Vu#7-x6t?obsa5+NONZgYwNvi%J?9_IGxZIz z@44+JCyiE*HD)f2kDLw^RZg09YdYDDW)+oj*}5wh^%+YXcZG?14N`x)=K(U?Mr=`= zug~TaqFQGwNyX_s5g z^&u322(&F1WMW!7-Jh(rUs5nIm<2@hIXYTRQE>8N3-EapVk%_-e+4UNg}2S^l9FAG>#(=pV(hd3Rgq()tHw=DJ^h!Gs2#GM z`hLMzw*jeLZQWX0bD-DEj?ELB{&}UO%=siHhBSWRIekCyM?Zf2=mc&XB!ZtmCDIfg z47_%5bR;AQ1JD7PYZ-3PN?d&zL8-vQfBVK;Fl7U}y7z@kuKVk8A3k)y{9aMPztseB2gM3ln+fF$Ci+vnAV{#Y z|7vOylYU{&Nm}sQ+THyGnogj68zWLvX`o%*@sN4@<_$tj3^1RN+g_B5N}%EJ)=YDP zfKzUIdee{aB1QpN{l30FS15@1a=~ILPo<@AvRrE}T$b~{<)Q?! z{L*bEr6XPjFVx+V-W%zXh>=v*yhOG-guKmjV;sAUaidwprzaY zzIOdO6ts@cPQqR->)KOC8=EZv@9%Ch?vmOVa@e4Ia;Ln&S5XPnRjk%dh>cyDnF;&+ z8BjU!ss^PX;raXf8;OA(53&dleZawki46iDK-w)9bJ}_3r1}z3J_S)8xwwVfzPhyK zEt0E-x-hAlRd`xt=etKyusK-!CgF>Rs8zs^=tE%jyq^bRY=JhD*>Zxg>RjkIJkl z3IZg(YXo$-&r&8y;6858wm|y*(Tb#oX5rVb*FHXh0RhsJ{=m@!hJzvp03>hFj$9l8 zVP;!i)uWmZtc*t+F|U9CG?ulp@Onr|IXq{nPocApF{pAoIoNVM zOL^G-unSa9=nbaJIALM(6f!9%kQ{&Z1zK?|P&EJzx<2zMp4`|0P7I8peqX+QI{zy)cRgm%!D9H{iO+Y5a-WR_ z!)I+QcQ9f=yCb)*zf~o8R%OmgTywH5)tiS-G77CU(CX)7 z#RcEKIX4E}N@oK2m7`hy6PiiGVF0eNo&r_B?bAmAHh~ikeWOx`qnebtq^GDrxBhCc zwQ%)Lr@%Ce-26SuKBxnv+}vXTYwwC?FPR@jew;HiH(zP=6Zp*-v}ZYFa>z(%ro5Ps z9X;Tgo3Z;O@rhs;S;{Mr)o`v~mz0%V2c?M(y@j>iLoVIs!@YRtX<+s5$$su$*#%ln zui!Z~ySCNEks#<^fg3n5ANotUj49s=C9AV_&8x$zf9dp#@hK3YkCSjG@v?GqWMpM2 z*$w9WA{FB)Y7@QU{~NHW6WdxQ01(S)>eKH4^2!=>d`pc3VYLK;5~(2 zPCc|cSApiZ61sN)?BLT0nl!+`!GUctT4)$3nYIiV5Ax2Ec8B^_@gUh7-KgUlnU+F} zXU}@}CwXREn`C8W;bg~vX#GuqhXK6H^5B7K;Jq~`I4!g!234cb!a_;X`S!97ISy<` zD|f$pqF~Zy1n@zu|p|Npk_h@tTW$;Wc z?8Es$l(d9~#<~GUAlNPJo~s8(D|xlY>y@u>S8X+sQB;H5*IOJxp}3QB>Q-HR>iZ?- zGxj(icxi}4V8aJ(KMnqA^~yrSm`RUy(L()fZaU;ci4Pwfyc zWgVhOPU z;vgFmDt(8{O#-Y=N(u^mL2hYi`t?hJhzOQ7`PgxRw-e-4TG}Uhg9M?n`=ga2SACMy z=3XR)Lj{l)?E*c=bBe^o_YU<$=hOS`FP!T=a6tcPQuzs)LM2=EnYOm)&b$ouFB8sb zHv{ldrKUEtwPop|spk#G*8GV3{F$DCAt-b}IbA}gJ`lhT07QJ9b3HZJwq{c+U`EJJ zCusx)HS-3`Zl5w%K)|HS#DaPP2kq;;qKT@N_(LDy$zey6C z1nkZBkWGELz(awS7>@OxSv3B`9l6EDx42=^EU|}q!P-~s&HgMaDA?eg)2y&x0wai} zfGq;?CM9JCZrq9%ZW0(AS60Yw-c(XjLPFs&TfbPyrUzhxVR4<{E$w%jRAZeD0u)UW4B20;4b|B{ky-v zAH;;;yt2%Z04F&|#$T`kUOPBE1R&5aB?yElbao(TJbYgi>nU*w=P~dE($eqQ)-_>8 zp}U@%nnL-56G_SiOsk|=>IPb-*dE#ngf0r7!X3Fwhc{2T3W4kwu}YQrEVyj zQS642ppY9G(RJ;>7M^A(f~*Q?1PBO#L|`L^&I8cj*4`dCpy`s0=I*LRaCIo`2zKpM z$!6F&I(Bw+0If$M?EVeH6?$X@wdug_$oi;Cl05h+P63i!^{MOccK|mtoD-iahSz2q zI0}^4VR4ZcBm+6&!&I*GY|!0LA+dw#4{d3COUnZ$CU8f0wYI`#G*z3IjDT4Kvd6}7 zQ3miGP)UF;b-xcO2HZ*j699~Y+p*epmpwSU#{|kCxDNoJLxLuwpzuI0_CWlBFb(}s zdZDeVYLw|%d1~q`(ABWf`T)hMS9QD#1;61=1s(y)S~)1)3I)-YnQ=I!m{~Un1B| zTk=~$!` zp*F$BQBj4UA$$`QS44(C5<}|TG%J+IniVg zO;ap*TY}cC#Iv?8py3%A%|o9L8XULP7;l_xWLVhUtBH`9s;aADQO#J1L8J+x5}_m@ z-~j#)?2jwA)^icLJ}v+ThGr@+ zi+VN^UzfRX6_ks>_u2M`54e!d+iI12hO^a*=HV4L&mZ`fHrjlCoMiGyyhPoSm6&o( z4MJ0$_3z}zpH1QI^5vf)qLABolk_JiB!Ag)f+W1o48^qy(tg(IaZq77%;> z{5i;n5#%JexUfljly@8%gJ&*3!hl{VGxO0A31joNtA0&%TP?Hd@RrsXmbTAOu|Afy z=>RUjkJWfZX1^Kun8I2fi;4p8+KA+6wt?IR?KBrL78JC)x;h9Kit3?=O0d{L+o~aQ zgVTVO80lJ#_*J#9?!X^6S3WCl@!y+T`SHJ_ENX&XFfLT09;C)d1n~|$KCH&X{!b{* z;4}rntT<$G(FDXaAR554?|@XF(xsL<$iZh7twPQp25aLLf2ghXdhsGrlUXIJAAH>t zrIxWnR_5jcHq!{0e|m%qQ7@Yy90xV9C(yDSJSV~0KT@Q#`U@G`3bWrf?+E7gE}i{E zd8Yq8im%RqGr#AL7Zsq-p1hLM?AqG+vm$6GIY80#bBeNYx3cS+qioK2=iH=A0W9pV|0`tCqTZqGx&IzHv>30&QhHW) zp^3`eZh3thmakenhcI(>+1#3wyz_ z_=#0-kEH*f*G`@N-vnGEE8JZA@atEcl6IzCv;hvWbOvi<{?F^j(Et66-4~Fq9w2F} z=ZImE@)LILn3YsiRRO>XWcrE1n*Vp$+kOnz{^y@31;o8+0R0LJKl)vMw7B@Bjw2|iZ?j$3T$Y5=$sQM^!J_QMkN4Dii#K*87;Cdz?^`O&!u`+XtA;F>S1r&mXG~Fvk&Ak zJo+S*7^GU`y%&v#I!F7z(`bKAi27?P6lV7TD*&Mm23%9q2VFZsl-#aJ6OaYVHDwX7 z4nVIxc+f&D8x$N2${)K>O>(jxSU{R8fh__W5v*F^7y*RLUiuEaOECV!YXP}eOsok6 zfFN|_az6t#b#h^$E%2M^=CQ20k3m!fZnfe4k_XYINi2tPux!RLOlB|%3e(dA8_U4P zwgoz1lbE=8Gw4a?oG}2F%s68pOWBcw&u}-9$lBbT3?Khjia1WD5|BVzN=o+Nv_VPT zR$renjrt*|mnLhypzN-$t?jyc7#hyODL`SML}sEsw6_==)?JL(v_UeD{z+-5Xa5{i zcy)yqFJ8PjKRazaVZ-b1>qE(sASq$vINIA6mzFX!G4(ImgNzT1HPjwK=o>vfYqp}M zr}gwhwZbJ0N5&^7b?C$Kdq6Ukxzr8J$8=od0RQX^G{wM7nZv@(z8+*oS*Uz&#~`NS3Hi& z@o)e6=bml;)Fn1r9V!5aVtNo%a$8ykhAhyoQpAE_#3I<=|4uS!S}v}xl)Nu*v#`hsPQXfLD#2TT zEHGB(Qq&VF9z;4fKTj9`0jwb4#ROl|ObToR;9yXT69mOBT4xsG9;zcCy1+mw@D+Un zGp-v!sPl-w32<1p3);=U|Kv&j$e00DoL=k4$uQPrIT~ z1)fx=BaDN(NiWj=f z;2AI$_}{0YF&HT}1+DDyl|^($zb`!^SY-qq-*hGu{PF$$7W;!It~kD-&=nuT`||^J zswnL|7z2Z{5OE%ggVxA)4f(>-QZ!g_A|s6jYjShf;oV_s1Dw0$&4vaB zoa8Y5qQM-wFgweN1qvs)rX7%r&-D2$!KUhK)A+E+MX<}*$Y^h4vKC<1^cDmwq->BC zunA@%q##u0T!a}I0(FV2wXItY;1}vS1CYC$o5gY8K%*cQlAV=BaJjCv^(zc(07fFb zoK#vmaPVa=ws7-7<;HM?1Wkz09x)z;h^MAB&}R@lI-5onv1$)fSguLM)4{5EcUr^T z#;&@y*Xb^I!jtS%L0DsIc492OBQ-2h4U7co;>V^wACzBTsjjJZN+D<}%D=mcA@Cy@ zeth9@Mxu6kx4Ua~O?Ea`k1^Wf>>v39uCu2HkK_-u@xIZ_+U|YfSqKBv!oS}OaH(Bz z1n6(EOWn}k1H}SC0Qc>x&mAG5abyoT-iWPZjqFZOPMqT*0k~RPT0VL57BT?rY}3yE zPf-2 zHX#gbY>=|RlU-0+3ONY)6o?Aw5K$C*7-2e0Xn@Z50A?JXvdPj!w!t}dw6r`xHH%+> zGIICsU67nLpqd;ZO`)BDQL-XNjpxrD&Q4rrw)o?$KpcTB2Qi3h;pFAz!I-0{NX~EX z=LfRcEj&Cn=gk-TmD+hCHq&q3uFxcdZ4y$>{&N2cv|vC%W=^`weUZpivL#nhS9hIm zxK<#s;{kRvIEonCY{uYKD9NiY6wp5`IyN>o_=vvd=7JYnPD+ZSX@zWDNi;&xaK5W_ zix_(O;28jv0F@axtc)%gY2!zHe)?zCyEwPTSaHUcr8&9<99MBS)NfEp6+eE^^9>p< z*s(&Z`6-<^0?Q})cTq_5nT!nfuP(IBeB8@PFjofR6-6~JoCnQYgx?%0Eg2aHIFf@fR@LGFHmO+&crJiJ z;^BH*UVSf1-k_s66 z_H)k1A%eZV(IBC0isiorCcZ^3r2F#;2 zpLcZRpLADDb~%yC`(<)1!7lCcV;bCX+5lR01Dkz5M3%oRff_+S{y5zcQIFg{CB;)D zbI(I++GkunSCZ2ZF8?IeKR<-t2mnSkiBZm4Lw$J#=cTj$KbUn9YkAI>N#q5?iiOD=XJYugugnUR4T5y(2>^-n#4|EX} zOsLi5&0sMMUvPoEC2Zz-K?YB;oUx76I#xTc>Er=FG-N5^{v^J40K%~sd@lkzK9C2X zN&}gYD-sveBCwg~Xl^e(9rxPn#K9G?qwqd%y9yFM@M7zuWqC^U5C9MoLG#`&@skh% zU?GEDM@3FP2DJ?I^+kLB_=TXNSkSA;`0Y)AHw2@OJ--%YYbAQFbImVGUjqS})VT98 zGG@cxfSeK#6lD1=Gu^0o_5p`iX%MW9b9`B0;W9|1)zsblYZHqX+iPi10sQ?j@M6Hp z1MePL_achhlC;?dJiif6;%G+~7R5O~-;Z9ri6_PBXL2W$t4F%6qE>G9;Z1S-$e;cu zw?ogX-?{sh;+Esa-EFZrFwi+dHdeY2=G z2&f;k-Qc)~RTw9^V#zeJCe67ulZ$SYl%aEsF^FTwo8@#`_cLNKH z2D%9Q0{Etg4?OV>MJ&O+HIyDt%R=2R{1kWAI;i zOulY_N>K!k>%hNtjdCFPn{y0rQ!puG9|`nozh4-F6FJ%f9#C=LQUm|%*Fi$U#4 zhIt{VY)~(OBLfiH*_k_$)d)J2AH1Ujkn9C{Y-l-yD`dI7b#;#fRRp^tKYaprMWelh zXG}?vL;c{Bnr$htCRSFt85zc4!UDL`z(38{MjtN80dVDeyA&$or(UVUFiOf=UhUbD zPdv(CZz+(-c4~noBp1saw>2))|8d~b)HiV+z-lX})qfZZS8VyJF z`i=SBzzgCrMV3413xx{w@F3bYO33L*r8pBz?*8|p#yN=pS;xPB;l#^k07bvGy=}IP z)>~yO3P8HGtINsB3FI$+r}fVTLI5#2rAVo$Hm0T$$B9tmGZJhg*cOL)~HEf4@%k-Qs?zB1-lmYhb+i$6hu@&f9UcJDc+6 zO$LYYm&_6=x~X_|zOmM9im}|LIKY`w`Zh~Hbpz{5(jFt79`!$ez6K%w>u{i^r~mpS zknl;0OG#RKIw2QHDwxf@ znbp2Siv|Fk3Xo-HbS0$U_SV*qEe1L2CG_|2f9XS80^d+a4a(Ni(E+pp7bhoNQlKhh z4XA$*=+DHnK{5hgIM*o1Ipr|O2pt(HEq;N43=0RqJ%gtUAbbs2B0v!3#Dt}IVPMc# zTvle?{}sM- z$aQ{Q))mrboAAS84ODgu-@6G&mu)%BY>01pm0?#jhSe3g-k4>RTb z14P77OLdfeV3eD)&#Mw1s zDBuBS=cNx<9D6?Myb`9pL5cP4!r?>w4`}39&z^syzr@Or6EFJxSh@l6&!1E;e?LFu zdnI}Y8=D@7TS|98`lw)j<@d!FK7fPP#ibNf@i*-i8+?unky@>-UTnf<(w(R`W1Ar2 zMM+NnIVR@o%JLUpxOe*cGcaYgRDla|L7nXCOOTj#hayUSWV`;m+e5~eSk9MkqEVqy zFu1EaO<}&k=)SgyiM*2i3MkM^x33-4G%(x#U#R|?L4EuZF&cbq3VwS)S!jT*eO|u~ zFuIF}_Z)7&^GZ(H<%^G0RaG0&ndEamO3;A01N@?=Ajs5$Sq@5^mI~?|h@%z2+3>{z zi=ev0q&j2{7@CFN4-(pQPMc`Zj{N+-T$sANNZ-?*@tz@NY5Bs7%O)XJSxO>jJ|C7AF4k8;Dc_*rwzlU_ZRg3-%%SfDGRT&ABe9 z@6e4w3quG_F7p9K_#O}V4KM=m$zgw%*fhcX05QCafFsOz>7s#gC}qW5{@G`cqu@{y zmi0#MY3i?}vfjB@8k?ruFOQ&IOeA|LW9~xVUEEOP;mw%S@%oiu;-dZv!}rSXA>V0o zw9+qTymHQ=t$bDb-E8VU0UO>wf0k@eQLqSBF9F!4uoIS7--9O;+BG0gN^CY^oWKL@ zhXCGCev>58cW4r6zg}HJ2P{C&`|<@Sq+t4Xh$x=M!ot$j(gITzL#PD!r;*@IF;5qM z_wF5v#sE(*rLg+}`zj=VH3TRd^ z-WbLM!%1B0oNR z#0BE)!7?*7SSulMML?T?VIfq&)wXkwf!3j@_z}PtSth_5zk;j`brqGF&4klIuLbd@ zK)1p2#fu%_*Yx!?G5+-^R9Dz7Jk^dA|LY=i6roj3f@bku2ie@`@1Oh#V=& zN4)W}rl)O1OGihHdii+KRN`cMi+WuU)UIqNO1jBOxQIQ97VPyy90#eABVu-olNM2! zIX_$y*lfg2C>JCP)StVhb&z?Cg{=COX1RZSuYA8_5#=eK$1k~Uh zy4&I2yIt`@07TrenQpw>uC4(Dw#EJ&6s+a}_Xe_6@*KodkkJUkC>DlMC%`}3T^n%% zF$bwwt}-(SPd>EI-B5E_8-_0S5E$zD=ESo=N-8rSDs~!okp#=M2p9nw0$dH8O8}Pu zJMQY*0Oke)6zua03u@;3Dmkh^tt5E{CJKTuDg;@H7!26phG#sDXLix~ERTYYpd^9- z0~t9_w*fpO%KC7z(7)^~_0~A70DqxWQ2<=o&K6VN_brPx1cWCfE8myynmxcHiAVG+@sk*@jJp6F&0o*FyAwD3uje8ugB@>U;y9)^m z*D9*TW~QSGN>(!H&8Uzto(BE~yKa5%N^;vaK%`)_;t5^-ocT784S`sm_J}G8Fb7z8 z4sem619@R&WJ(5LGsXugMihSsmSZHV#ul7?@oIk5%_YSHYm6VP$IG4&`p5eEgojO@HD#N{TROiDa=i=e5* zF2!&~U?GY1@Hh*sBPsOof3y9(x`;iCkn>JKYUh1B!yd)aMC<%--AFY+5z@~Rv%Y}h zAUX`4;-g9#hR{slKOsyDajM|~fVmqe7wO)E_mFuwq&f(0E8GTYP_*x^kGUOg%{83v zJ$7(9TPHAro9(!yo zEH>SGEk+`DpjClnGe+2j+Y11ydvCBwE%quJPAveig>P=UGy?S$Xm?PAOhCt>?}qPo zJv*sN-;f_6*L~Fu>_K3TGF-oI0966%NPK3h-x&arx%09o73R#ME?cR&^bDc5czCt} z^l6F!#2i?#;P3>Ru(lV)5PZNrffG<$6AX}g18k?Sq!$trg3_CTfk~*8@~Rg^Fu);D zU2ur7&O;SFpwxmnAyELxR{$~K?OK3r0t$mP&|Lmi8=8rI-}O^w9s_?55MZ`Xn8UT| z7mI|?Q&CZ SB?|dKll&tk-^+Le|$Ie$~hISf?BDk0*R`rMJi!1~x{1D|)MF13(HQe^>Q0Wi!!QbE>F^?nM0gwki21zWU z4$8A+?<7cE07>Reb_0;^1k(>_SphTy3#`n(@+v6QPJunUQGaat=#lT9t&&nNoK3D) z)fUjtnj8;-LIC$*XLmr?Y#l~IC9p7|?Y&)o3`Q%^u&qF=f@U&Wxd|1y5s0sI@AM1~ z9?+d1w7~zT<}*b#J8KtFjglu6a_Oh21r4Ck4eGfdN+9~CLk<;CKO2P zun3Yb{)CWaIa+CBZjPdG1AJF@Q+yvLP)KCbqvl>B6;|-GOV$Yo!IT&%xflFBk9$5bxeVfc$V_p#o~}j}z0Gxp2Xt ziFqg?f!5mAR_(UmwP8~JvbVXfk3{@$Or)U8vkZZR-@vQ;;?L@U_w0Ldh_NdKSTx>r z_S)FQTncr~50Bml?9f3_;3I*ckd#EP?I4z7y;l++f7R6F5^#>{fC3|UUhM+hMPP2Z|_V%p55b;1?PissNo~_eW5JU59xeltGGF z%5LqzkN@PrM+Jl===LCO03!ruW05?cVltZvXas))!~@(?Sevb&k!)?fb?es8Trkr* zfxObFU!=~;&%@&e(Fe3e%BmQpKXx}JCOSLUMywh-J327+jEqnkHOL@PGxzn~9tSlV zSTiISSd>ChXrj_>k*4Uf)RPA1lyXGy6XFmwV6)aM$xlrXU=&d1`~+(8Kd&89>Llv8 zL!{Ou8K=@fI42=-zpiZRd6d*S7a^g-11?|*fj1!}Xb2Rqet*H4Gk1Ymj%h*;JXLeK zon2kv0SB!T1O+I*vgWI(1T0Sg${emTD$4=AJ_Pszb1%z*d{Bby2WnF6e(>>Wf|LW1 zCy4ZRsMI4EqC&=3+=oGwE1=cgV<1Y1iizPj?USkr8is2QQqW;PcaT~EJ(O7oWzZOC z+49#bHYTRrd}zzc^ElSWN22IjCsbUJ2hO2XOAzzS*XSuWgqzyl#xDx^HpsQQq2Gol z8YNDFq0-(flVnX&GO|73*(8X%Ykrphr1eZ*{sT-(A|igluYpqsY*c$-mn=gy2MZm| z3wk_ujpA9j3)&#={``3g?jnS5z%KESNTm32ERo7D2vtDjPObrTPJ%e8tbyJJ;0{<7 zYUlPq_yw_67fk5<{h{cKOfRa5FdwZfGMV+v0&bX5SAw=17OMIA=cR&+x-+Us%1IgR z$S)933%LFbs00%5tf4_u0(__R6SkZQM4pIb2@Q;Ufu&mk|^c6ezMT@U61GQbL;vbg=ZYI>1NF zz0&X6?X7BrKQ4pf3{C)Qo9aFbumwGUk_HHoOH&66u()6_jRB3WPMK+VI*#To5>dB3 zb1=gKZZx8!vonaPM$Hd2e~^s=*FmLg2;?cCP(gT2cQiWy$1E^^$@WA=TRw|n5+qR1 z>mgVJvQWOqo&xT0QjE;TZ;~rS|0f!`Be55;Gp6{A{#;X3DAN5V1KkOisHiB!X3Ro3 z<6+up8X_XaADWOU0E|GZ1VQ9(*Y50hVqgVr`P~w_b0>UQH^&ZUr@)j3fDxX9G)sCC zT&VHuAxvniX;wp^0tv|Z&i=j_$hyCm!TS3F)0Fn%eOTHtI^rQHk$|D$OM&aArbE=d(F7E`AvH(aMSe=NrvpTd5>!)7~COk3eLz)k=l=HzOZ`KHvi9y+RwnPiXGn^a( zMlEF`?iylrTK`?&0z8;Vu|k5fJjs)6tetl60Qu4=WuQ%F8@aO0NgenXl9 zUylh?(f12G=6gL*3kRgfw_YBVgJ_GKj7)VI^;~n@y5%1b(Cv_x#wj463#A@Jc^S6x zU%$$su7|b8X0|^#s19U{2 z>HIo$5GuLHlt{q8z|bvE`*V1>%AJ6=y9yz~T>;ED*xA=X)mBnb!BN!)RBe8KRV(Tj zpaKJDP&=S#&d$C-z#0XGL&zRBJbW;#*xlXDJF9{`4}>&mlek>AW>Ezub#$&`7V!Rd zA%)nl4f9>Q_W39S#H7*l&*&f~1LBqUE$H!|68z!A)D8S>&kKKFP*WtY$WMf7zE+mV ztBN1MA?q{W=kbDNUvAP3@}QJl0^iGY>|{Zp7+4YAV1c3TDh%L2#|wAn#z0JTG}KaO z=Vu$|J0))%ds3w#V6o&l)WGN+LiTrn^mPWS?2G|2=xptWKn5($U<$5eg_K7SvS}eA z07TCl+X)FB1&;uxaT(;u@O(nA3#$?yx>F#%pzO&UGi^acYrj59=_OA8;rk0%DP*`l zP_99Mukt*vpZ_{MG^C-fegX`CK3fe&B56P?0Cw}ak79Ge6N4hN$BWAE#4Hb&9|Ja1yRBc?AiL4QO__O7(~f%&w(QVleud45__1=0l1_I4=nUOu(&=YBwP|_2!&qv z7*$U_d7=#yk&w7W5Qw9^J%Abu>|_$$CP-~i$nxcY6rsX=$QG6efXKH=0s((hIk$(k z4L5b*l}}1J#9UHp6Yb!~Y1zY}c6N4EFH|GTY=Wem`p?1mA`FSka$K}M;q31KZ> zKt$o3WDBfm{EJZ@B~9I6VCdmd%<5}uN^7gJ`c(?_yT7>rBg4b_>P6Ea4S^_}TM`3o zl_7}hF>4;|JJ-|pXbNl-qZ_`RT2&&pbIns8V9^=Y~~G%il-44WZz5mm#r zwGD%M2+}edD1u?+1b`e|7hdQCi;YLiEuMjN79v5G8Ollr@GOu>Y55Jj^*8MYc_}C= z!~ll4EP9{^l3uvn=+H6vfvkQVm}0O7QLPb(9NO>PS70f83LqT7PPPz&?|!Py`o zf5n)AsR!rp%Uu8{NU|?joh$)~2>4;S?O~#z@q#vrZ?H0b4QO)^Q0PQoIYS2rzKBiw zn{XXKm-pcQeO|VvBiS@XXyso45Cmhr@hZCvD8|5P1D@(4K>L{O#V`&aCW(lM_~BD3 zE)PS$f&z^1@)GtZ!?ZRgHWpkWHCV}jRHDMu{R;S406U;mYBz|!!|$=zHgExE4`5Lg zK+Xf$1w_LLa0~$CKXY^64ycW5Yjq~w44$6nfI~GmH9_Z;3Ed~EtpvORPFr1RtP25& zwC#NUBao-qPKl-eC$ujNBD4t}>lNmk=|Ozi_@DUlz+v<>S0N`)8;18l705o8=VJg9 zJfLMkF|!As5VS_H>`(+M)tns1QV4#~t9tf3FfsH{R%zA4NkB0X2lhCNW1@_VjNkzZ zB^#_5cH>J$XW(#30JQ715@8q#KzoZ?G%)U{sd0nS8Xh=Q-wNOhDvFVi^z{Ysl6(H1 z-0~1dsmv@E;Mw@|>Um%exUCh2jQ3M7tR%pQGQ6cp$4{^8vHzsy#47fBs7FwkVQt}w z#96!)g|>t&K;A@R{D-+``=R=e>?e7us3(;tGu%Hj1}G4Z*`X5xl=(xxOs^1NWgxpN z8#Awir3FR%(p{E6bul4fsHi@MJ)1RR98=$W+Y9X{@o!97@5{{PH5_^u<1iA?pLnl} zM}AeY@OJm&yK+$&vFddXd2N#5|9l4jMF`YanOmsa@(5|4S91q>2_eq=o7Otl+C^p( zmorJfg@#}dansAGl&qYsw&+qJRy|HS8T*}!m1x9XREI&G{{|6RW$OQFuKxKAne&Bc z)l|cXXJzZ@q~*pECTy$N7rgy6d6?SwY2-21yv~q^Uwt-x1YF@_c_K2t-cUnx(i0~r zGyWIk=2Ecu3*-HB3;udfjsd97RNjQ(Gv@F6D|>QCL=vT*dSxn{FFw{&G)p8U{Y5Fn zC5&@sEf%Z}uSFuaXr4c)Iaes{O*lxlu(x=UYL0_87NLT~MNIU6(BpqrYXKo@7JL9+2`iH)^uu~_E7G8Up zrTs`-qGJ2E#x1;0IVJr_==}4{r4GQY#dF1Y?`^3mBg$-1@+xmByvZD^934@Q{>s}4 zZ=^&Kp`=Io-%NqeAN8|_#6e9k%6^a(kg0R!0wy%(11j>T8Hx`RMPje3Wg|lSy6;6O zMOy4KJR?`jh_dif%)$3x)fzQiGJY^iS>|RNAj?#e)fObRJUP!0fEUQRd|Bm2 z#R@4;PgaR7q!g+F~ z1-rzir#vU_EtHa06Z3VN2~lW8Ass_1+1a6Z?7)GJj~CE?V1Jj9fA1-+lJU_&du@xG zU%%#?!}{BG`)G(&VW(9a0%{&ZiTt^c?VhZAp_TOE#Lb@ZwdH(04 z|Fv>p_3V(x772ZI3T893FHoN^>N}S*f#LKb;W-wqBOlLOHO6iANxCakuqr%WRdn4t z?rGU)T^F&Rq{SKV!y`$ zJI>)|!{+L-7WmI^?>BTZ_}bN9EXoT;a3XzeoUl{>x(PGtB5T zwdnT;ssaKN*?EzkOSd$p`iw%u7MjN--}#;0Np@}Nd92+VT$t2KV|-a{O|tFBn0ACU zzm(1vdIXWiZ0Sy`u7v3$G;(dN(3~!tPR1o%ZS#!i_GalHKVpTgjHP6g?`J79wtYjd zIN=*sU!JbA+~54#TcdJRmC`09*Cn6D{V8DU1KT$LTWKv3t%shfRefo{O&B86THc?# zsI4x~H5~gJS=&3CczHevIcJ|yp?8cfve_r>eBknG<`#A9qr=q;w_cfJ@Vkak)#w=@ zH-Cd=>CfvzWR*kVxr5CXOYab!*OI@!$1#sn*oh;qV!a?pORE!ZEZv#vw)8n+s^Pdb zv7ct5&c@p>*D=H9&sqpQn}PXq09xB2apQZmeImTbNOfC+MJ4^I=abe|8DNsHvyOe) z>lac28TDtJ=O17n|4*XI{@(n}ct&|ui_0TIc&ynWz3cu&^u_+fMOFvx^6qwa^KZGN z#>C3y?<|$b1tW(uVu)ANzH`-PH!cc>*BqPpO~!9U-o$Eew%nr38=Mwl5@c@~cb}>@ z6qOFkSe1G8k{aTEb=uP{f)c;CHz2EZ$!$~e=#D1dSoOh zOc{~nO0_ETRVcSK=efYIfdOK_U$U{}d7r9fwF|9N-kY|q(qkc>-87ai-9I1*#SL%# zVz5*Vzr5+`P_%!bf&?wDlI50l$;HwA1)ort%J0+nRU5+x;m&Wh3U=5#Uh!$nM1+Qw zbPS5NDAe+BVR&_DU{~p4(dmSi_~j_qyUV;-x*jcYiHHBFlBFUqj)o+s*eS8fe2S^a z^rS8OvbE~mthhNVdfRO|{h3Lx!k5=irayhS`$T)b#{AxB)X;EmaZYlFns?)mACa}$R}g~%^!-~PD+uD_M!3ZvUGHG998po(gMeXoZ5UJ`aCvYD6$S4xc`3frFR+E$Ljk9HqWBp zFb=zQ_)6VKy|k`WSIeLOEwRh^Pr=~�K5|0WXO)Cd2;)en&+S1yFz;cmw785wN}g!!njHqYx{xfegO zaC3jM3BC9HB45N4%3}R>{|-Qg}? zly6paEO>e51E1ObX>Ks2=%{DAM#*l;u8(XU>(wT$)tArx-N?8EQ?D_oJb@`ltkJDZ5)ugYBB&vB#u+#o36 zyhkhVzIX2iRc3o&ZeT!g*xkxO46oK9iF8@{Hmx`bgxUQ^-0fyjqi5)%N9bK9W2R$7Po2 z*o!yry{gS}_h+^2Tm!DMbRa&dG9~U@Vpqv+}+0P>T5qo}R%kP4`L^9_c6F$D+T^bs+zuI^M3|kG)}sI~XYbcez@s zepAnrdW4uIlxE{g$WA`qh=07c!1H$=0NTtuLA2^G(U&{0EYWxzv@gPx<+SPFDa%qC zsJftOUV%~!UW`};QRQ{kdWFn&tDBV!(eWXTLn{mN1<5y(cl4wWA2{oN3@dcf+g)p>1WF_`PMpPSSjig zX_=>rH)W8tT*VEfp5q#RzJPt_;nNhYrf!cfH})eUNHu6(_Zz|Cwpp%Shz{08QsbGNYpFAt|)EDZh8B5&JnZx?eHc2qEhPzY24Zj zp|$$tKJeaO`b{QYxX#E`a7ZQu+zoHu#I%4eB5$Gx9KcVy>%VU8vWyR`1`q1GTxIhftxI(DlqD=WXEjigDQ|?Wj zWNvV-^M(~xK(2y2B6R!yv4cF_h6*C<7 zROHEy7PDaKX>DSlA&B0fr-)>@3Lbu+(Rx%v-yxK{!{5iv!1DF$w5Xj;*%0;2vR4$} z^DiH}A_zVN5Upt&+8C=TSE>n$a9z`84wI%I7?#jCGscmYxU5j0T&|rJ+x5ak@5ralthHjr}WWogXAyvQqi^84UIBSBI2@J-OX~;=pr_{oV~b zo7>$c*^m8|{8!_Hbg@CwruFDS4mbO~mn(}W1EJ<|SItRmZ20=wvhl~ARA)thsLYH0 zkf5ZYP|`HE918t>;nLF&1lA6Yc3ki9BXXu+Cr?eUuP!qM2~npz_g9^_(Oo6VvZq*B zl3(VN@_!;fbVn!%ZzJf%4{quE<~N#6%p@*q)XnGGXLrfoBt&F>YQqo6dqPmA(Qwx| zuNGJ5-irmj%P#UsT3P5HLO*A37SJoR#D~ZlrN~N26|~FTM-}dGu_~pbq;$$%!EsSFhDZ|e>Di$fv&zE)dGPy43xP*v2%5HVP zC(X{|E^JtIycaIoUme#f{}!|9do!E009<&C?8+pmO19tj!FZRloX>fZ0!NFlQKax6 za!|}Mln@3r>1Wj`pDYNj1Q*8Nqh!}jY>Y=$$PlPG@EoR z0bw^kMZII4sJh1a&2S8o_`3r)7ZSdW2pLwQp{g#e{coYDgQ5R&pHCWGTC0;o%bYIn zNT^U+;}puHRFKr>^);1^BbYwhO{~O zZH3`s8HY_n!J3nF)*vO|kvz#5Zf<(WqKr_6da(A*)KtkrV}O@KLQiXdiRxPwcTC4e z!4tlE-M0#C9KD#EyGGR>v+ppf2SE-HRQr8CeIoe_DM5UQi5X6RLanWK z@8ND-tARnL@#rV6XVcl9tbYnGQ+mkBkK+&yFVC@*^(Ib{V&0zUrrH^R{ptVn=g+tP z`<`v}7TSJMCG>6%jw(g(I!BW6sEn1PX#ap6w7)=#?v)RJPW-=j(@WHVxn9Ko`BEIFCilPSk;3VJ$Nh7(hv6+3f0iN9oQeIheqdmW%OQz5 zu|zWm*XzA*SYL9+<;AMT(8g6O!`egpCC>O8QHX(!=I15wA3-dW_Mf2t?}dQ)-)4Ye z&qsmQTed!r5JDijk#5MMuXAkPEJ&}wt9HD47;%X4Zq32+-_3WCWCyfSnw7JRli0Ed z$N#Xi(E9(I_x0+ zx5G8kt!WA0d>Q)emPVqW;2jcJz8bqx;WDe6vHic|GDm@yZfIz!8aKK6!^f`^ zI)sEvK8{S-EgA2gWAJvbcwHqWxF|N9QCz5&zR;Oc zQM_5EdmX6M@XCqlC-xPRa&=N^teA+wimvupEDRU%?$`xVB>KVT9FT`CbajfUD-rkx z|2=o7Pyb!I|9%mEv(EvIp!@$vJSjC2AM}tRAwKs%i_>4Za8OZV8VrUhIsGsR0`?kA zv#Qzs{=5DD{>xk_nzWMFl_b93*M4FMUN|8~a8Ay3&Lbkd5|D4d`Z6PIC#im7Q|Jil z*`mmp7Wkal7cf@6L%pc=&}p#NO|R|=Oo{?H{2#!%LA@9GN_7$Oe;X9m|2B`{$?$qCCunH*SG*@clX zF#&p1zL}>s&_sZ`whV1Yw!@k%{yVv=*GEo*^6|1hkaKX`YojZOFh!`pNL8CV{~~oc z;0D>DiwmG_JE5?Bt(vRIS0|(7qx-By9a5h8E6kbRGh(<8`x_HKQsJM;_acE1|Nqq0 zD%9zlnzpSkRKwO3jd%X1iUN`cACQu)=P|?(6<*QR`ELWFGSBo;Tt%RPz@^XB;o{{b z0jxTIy6iep0NgJWK>k0kelgU~YoZw=oc=G|9Ay`v8zs8$Mc382OYoyu65tAIug%Rz z0|;z2`xIXji`>7<4ZcvJhZP4UF#HW+n*i}&*bX-~Hx~vvsm{V&z1Cw}|Z7)~9 zFco}z|IGKHfb-m+P6M@IGI0g}ur8RHIXQn03{>=~z!V-BtO?wPf%#^P*|-4ruMp`d z)o%UoDJWz8DPWQVi%UyaLGTEg`|m2mOtM#knt;(Zz(=HlHDc}Hqn7Z$D>K{6)2Q@E zJmx5#l`i_xVl^$)??;fN{;A)S+W4)2k8our>t3=DD*9Fzpoq{v^`s^&`B#hNz3J>HCHcc~- z+QY2`KDta=4=@?EYMqM)DTVA8Ie{GkW^g%w=|f@L;6q>q-M}Lbc$nyUb_le8pvWjH zDFNpM_UcSp$&n}sLTQqtIG+Pw6v&*ULtsZ|0TV)q8xTNX<_^38L9o{iQSQlBVd3E^ zff1>y^)c9Rg3}smO9x==qZq!hxm?;);3Ja(zbR$lqkpC>R&)GBe8r2(_{=y9oyN6^ zO3_Ij?|d^D{CP*)WQcZF?-!bkcMM`K$MwZYUd$>YCuz7jyQv-HFLnMR2#hrH}0y$-~TMcb0ZGHO>VLfAQtL zxgdMSZ+HBgF|o-{=SbvHd(pE6k9ET89c!$v3}5L2X}sg{-RPO^9vdnG?YiO3lVOL3 zTU+OvQ46bO9(&{qH;+e^SB{cV5U2vdb0`*W$?(3^&O+)4;aV*|cO&}~^g=~VoZ)Gq z!Uc)|0;S>#2#AJBDbS;hs$fzNcM%A*18~hc`Ke%B43>2bK$3&KvNiKOTKkqI{%`>{ z>h@s;@WX8b3W^y&Z1A%LN(L~V1Ivk6J~Mvsm0#<1SRL%1m^cR>4)8^Q&AGj^1I`=U z!0NKK3-Uzg23j$&A%M#^2P*;|FjlgaBgO8O`@N?$BqS*2Yv6_hu%*Dw?1Il%P!NF_ z;S3yCBsc<>DfrE!?tjkI!zcw4S~w%!=ikV^fo; z`)vfpQWx+1$8?5|H0yXEYQnHq-2?2R1yk#88Ky})by z)x0N?i_>YlPDe^-{rkI8{gcS=neEi|o^spUH8we#Z&Y_R`*oL6@s7@&=lqYJ9yxxT z+Sgru*WO#7w^D7&r0{tAqQ`lB#D;+D(57UWYY#bwm`&I4QrTI?G5YM;UZ3g6dC1-( zS*hgC)bNJb(!q2ElI4D6!20d|^~SiGu#ZYjb{FipNh7Dv4_3yWu$hcbW7#TNHl1${ z+!QyBs+IcgyPJa}VMa{BaC1)eWGzxun%g-6L*=M0R&Cv^E=ANT{L&lEx`^sftKspz zF@-##5k77{(po9EQ2X64-DA?@eMepZIEl!dSF0@1xv=hH|iB#xK%!fUd zb1dVjpG8rTy_gfOiX|en-glZ~*;k@2x2O2|#Cq=SiH7&yT6M0r#b9dtfR+y9tcWDxzu{;3WY^IUe>TfkS~J9JNI;h`BIKK)eT!%?oY_v|aOEiMpi)Qj8$!Re}?TXOdUO z7sOvc-2#6q@Q7x8Rt}pT?SafH4ydbTWe#B24s1g3XbW;Azkms!Wfb_Pu)_qe45a$p zs^_MrM;o5!HE?A>;t0|JF^}_8r_L{sDhz;(43@4aA`uW>!1LD>Bwc!XlaQe!ICMpU zj0gE0)U4nE0}LK=lHdCY50%3d*Ze$I^}`(X_fnrI0JrpP((gQ$v0Y`3*TZbe^cq>d z{$*Z;x}C4t1$L1y4!v()>J_2FdlK@@!A#`q%+fBM?rA{w!=F_SZFi#3`BA~ zWTM~96_BOAC`y@NW)#RLws`F4>Oxj0I{Td@n2cBJ(fB&z6r^p&Ejd^Ay^7?+eB`3 znVnG+<;8&D4-DHSuIzQptg9#pkVV- z+jqa~bnaxx>3qFgL+{sM>9=R`WBFpkrzG2DA$(hrwbox zN0z=^ag2~_i`$|XxAdL~s=~Vjd7Ylx8Eq8DBe!Fn4h_cKA8=C9ijdBEWvAYD;>;g#gmIptvjJf7 zPyLkX0WEm86>vU5EC6==KnMoQ0MV1*4-mkbrUPXGP@=cDw|Tg^1F4dLVGg9KVm$P>P{k$NOj^ zGA+SXzyXM`kJGl`@SqF2nlT5QL-U^0?=0{2;CTnD2}sE(S&uw(IPgX3KzI+dV$fdW ziQghQIy!=r1AD7kV5tEmvffD2Tf7kV@_^t*;vj}0=(q~Z*@b#=s|L0#RJydYd+ZU3Kmvyken1Gwe!w{fua;*e^4m{*uRZW>1TTMA z*KFl%xHjPA98AU620NC31OepgRCjD=f#-*EOD#N(_BWOBrdJEsUiQpB(L#8xYVGuP zb~L+_=j&{Fm}z^$zB2I}=T2ami@eb3>Eoqk&b0DX;bC%Lw zP-I%LOn zyUo@sE?KE#T$vT?#NBK-sp&g&cPc5S8#pl4n3_{u+JPIotw z%v5Tuwg(UM$??j(Vql!N(-0JJ-`PzQVo>IeR%;RPX2h0<^<5yDB--WmNe4Mc_TAOmd+nZEt%WUb`J1I-vx>ubg9p#&=F0|@ zhAu8#Pu}=6u~6U}T4vE(`!;7F(P)l*cFxeKF5xN?{RNY*h{NHfh7I+*55l?mGV-)N zk56#k+u|X_E%Z_o#kR>VJhQj8F&&etsm`4>Gfvq^U##Z65E2|SG+bSOn*aHC+BrvY zsp-)#qsf*O(q7N4qj3-~TNC z`Iccv2h<*5p;!m}Y~ZY}!WQ>Hdmo4lq^gGqg81+_4P0z*Re_;inMtLsG!Rn-LErfK z^K%`YaWU9%2$U5V&L}~Klx?ZNqKypuXf3w6uHSX~?GJLapH4+F7Q^}Z4D?a_wt zQJM8MepJ_&IgZxvGwEwoCwjK}nySf{K^v14D^)7e@zc!F*!cR{nfMjLE3a{54uwTU zTUOUQj@N&5{8)drAAbvo`ZO)T!bO_|!a71e%_IGmDJW(^TMChQH)>&HgAT}?pFUwC zAmGM|y1N4B6~eq4Y_<>zdxufum$jOJPc!qoYH%$8Iyf^C%d|L#xV`-j)Q@H~umjA> z1`MMA}vdRdZBzW9K8o=sN9r zOde%xJR_4<=K^c`?{H08zRZXn8np||x7Q1+iep@0GgqpR({Yi-O5)k1E;^~Piulxd zxT#2q$gi$)KS{Pes*)Lp_dRNs5{gnRh9<_oqu1Gz) zOWmn2N*ow}m~Ilgx(nBI4^0kTLCQnz$75%BhqL0tJagJ4U=+UDpUV(YzR!6i!D@D>*6|Y#uf^ zGD?>WRX%L&Vqx}lVUMa@JGpB1NT~Spa7npD#YJkTLbcAp<4HdL(u(}F^Ok!9IqcRB zRlJ*%`)>Dp%7|*%l_K(vHiJ$)ayZ&zNAj7jRh2Ad7&w~U%+uk)=lo!dnL9zLcnM*@ zxZH$277@lf8^xT&wHs>5^;}nnAbgq+S;fR2yyJF(!he;U{33RW<#@Y?9=Pq}mosunk&N~sS z=N6hL#~&l`srBxai8&yS(Gc`xdb~)n@Fhot6WR%N#DXBg%!}S@Kj+3>kd$3SV(W1r z1Fy{)1%e^;yEL%rw`x08DS-f-?Ux!W~cu*bg0_n!n?3JVP^& z48dHkiW!)HGZFU*&o|E9hk-XPpd$dRON4F2M6|PV=i=SLuM>JKs6^6bqfzn$@EQbO zd}UP^+4NOoQ1(GU6rjKdyrL5}Q>YJkP7$A9A2|k5?-+nWYZ@nO>%r2OVPEu6f;&J4 zfz_@|`rzs}P6(Pn6{l(e`vO2*K)PFcCf*MI9Uu%^mdjE$)!$;h6%JYletxD4$;{$z zq)kn7ks@igvYAbGAPC{$Qy`5>!2ia@<%vQQY)%ESR0J9?*ty^ZxSoKET&KIul?#oY zBthee?526fwqaXiHJZn46Q#d5B^w^D#3pRyU|?AMN;LanHBn{#a5ix3FtNO@-tKuN zZXwmNiGhRYNshi9q!Q?{kXEy{PFQO9w6Jyp_J!K~`_sBk?pH)tNOV;3+9kw~gG z`pxl@vgDAnAt_3OGl8G1%w8b?S$CC#C8@Bo_?f}%T#M(cS!92z=<$8hft$uRZ=DE> z`1{%HK5Xj!UaA?G#-`RR@klb#-}0=-@Y4fx2W{Imz2w`(V|MSXj$cPxZ@yWN&-ZIl zNjymB#r3&|7p1CcKl50@T52mfQS|62sJ1bDGmGM9`u&Sxq!jGY{4FyDyD!79zufbB zSvS(u{ODl#nWkcrNoI3=`P%Q9t%TF~;Law@bOokIzI1X$6vB;lVQ;)Llo-dW_nOMz zzRl2G`hA*F+u)oTl!5h`qwPH#$v{}1JQB@oav|R%WF3EH9<# zE`LtCca37sk?xgnM^^RG;nLjn-OJ}D(_%8`9iAdnJu%}wW%FltYWXx%2FCX}v%kjT zM7R@u0Gqak}$@a*A`2;0Nj5nQM_ z$wfy{OqOZb&ETl>EO`p-Ah+Z8Tc*$0e?`nQ+-GTj5-i$aw=waV?>_m*)u+S_I%4VZ zbv{KO@{~wYCC3Ge4JYz~19Qv$Jr1lL#3fSL?T%&-#>YVcgIVjl!7z?rkZPRX@yNr; zg7cymrM~B}o~!un&&5}(CQGoczgm48EX=|`x%i9Dl5`s3<)D7hWUS;?%D>=U%j?nA zoaRQ$#aA%-DUUE@H#v8qxj`+&%%bD@N+6vi?a7?q1d$2_hH%%ACoApFj%|+Z410W4 z_GUC^Qcw^BZ;9#Nb#S zYGXIi#HExWRD(8})sBzt9Fg*?AfA7)Qx@~?OGhMmkO!XHfI_?}P0sw~v!fksQ;V*| zX$nLV*RhS;`a4X-&xy0@USI02bt-~_#Io01TeEe&zLNQ(XYI(@@NebKqHcM{{-c$z zxFh@I5y%febLl7;Op~o?x*bfMp63-kAn+=q)?4{r?3DU#Z#;9OsP*~eZ1gDt!4Xtv zEn58v`*B}+iPZrAFMakDjxJAoF$UIWSonn!>+WuDLb02bdOus~v=0}amZN#06)pI? ztRoPpL%zxzBy6yPFzGF07rSI$5}!IgJ@)s(p`s_maeaqwH$9hQ9ll*HHdumJJC;$I_-bO9kdiDB{a+@eAH6e~XQ{QhkYP zK$UfpC|ENlmV}V(e24bwojvOQ-UnZATzIS$W5>aK@=p0Oe&G`#{Iw~|=SGb!-aU*s z@d2_otA4GIxw;9)P?y|$7NPd&iu|EqJEE&2$I6LAlIJ>E+D9X?TGDXk1N)Pg%l%n7 z?}NfG()EUuOW&sL;}9}DekC|`cXPJM%~Y>qi*mTL>He*0J5D|2u4n>r|GUA7!?a*J zTyG;R_+8$Q;8Ui>dmZigj;2@qUrcPL+2fwb>pAe%xjug3Wjx=X6XaKt=8u>9D8YU% zanKys%V40)Y~kG=6HR%3?X4M0d)egNap%7Cd=u#rTJ7N~&7ea;`P6FV7!pS|QAiQn zABM(z!i)`q63kk~U$$@eSm`!|x7Z7RTGKs09@7zA5q5Z6R z7t^hB)%C=bod6+DcuBj3AheO@!Vif4rQf-yW7!Wi;&X&(63$Wg)pSsFrM@ z^#lL;+^_bEJfDZH3zMZf+8+~6)Nk9sW);__Ib9v3O-J(zS6!C{jF!2~x-j5n~+}pM(CnEA% zB8{%VxL%TA_lB(IcyHN+g=|w0GjE_Zs3%PSI#1i}$Fm~#$|KW3E&4Jn3F7Mf?&-%% z1#y9{hcc`okKAX559=``clPLr9JiC@88_(!Sp|er#Spb?1+c>hAwWS`zH z_++e-xU_;mqB|?HHNQChgvN%>>3! zupu@SJLiB?iyh8}QBf`Wiuc;XI2`7$P}n4=q%@qL9iz4sdUYUwi$tf&77GEJ2e7fS zn$n~xGlQ>=_oLBjhn%DI<*A_`}!(3|BcY1t8xwA}le#$wb5 zXx=r)_Xb0jg~EsuyT7?whk3G^L|yP+WEY%?5?Ixi~iuO_Dv*)K%y*o&Qi#Lrp9 z&@@@Ie0*mtCu8zd;8d_*+x6Ab=BRw*NrM9LboCxu$?`&)|F)&w(?3Xc7y{W6?*URO1`l<5?#|;qk3h4)T z>iUDyp51vv9~$S-oQ#kB5?1E?aqDK-7M5`^UGofQ_*QciCpHE`fx08xD^O`l{#m5F z{7D36!PiCxo_v0u66R&@5x3+NzuP}Ztu=MsNl0|V^M7=vjnN!9#}z-XBYqT$V70`s zPkj-)-n&Z7ugf#5KZQp3G^+bLBYW|Wm-4b|J0oBGcp8?3+n5~fL}QJaNp3&pSeGk5 za7Qw{|3n}yh@haUaUU-+>6%s^*t1}}5u)B4$5ynt{o$0$VB#~8NPF8%`vz04Ovty582iKe#L3cAZnGuL>hd0WeWXloCZsWGdLSZY5!?qbnxXC8fGwIm`o z#vM3ZZ#wGz$+kWq7Ee_QAs*eee7aIIe(v(C?*qR3$`?F=vyBxO4Lgsz2WW^z+BK^B zMfJI_N|k2u&Clah*F~EZWQ^AEIleUVd-MWr(k!Q`_Rh%l>>v}V2#Jw*5tJjoo%K56 z)K%qI%glF=zNt>&pR%V4Gaa$B?rvZE;j2i25XYg5UG*0n8p-zXxG{i-@c(G1`%Z)K zi3q{P0{*MDj>pWANe=A|xNaSLG*$ZTfdwz3@y?|T581SZMnp}{a_?~6i1G=0VDQ20bh}2mrF84uxwk;~M?H_B8>?QN>e{Q|4c3JJgR!#=tLj_Z{ah@% zmF^Dd?vhkWy1PLdq+=lpNC-$bNOyNjDIrKVNOv~~XZnBl-tRuwbv_(E5DR6^G3FTK z8S{DW@w+eIN~5rPWHwnYe94b~GHAXktWu21A6PVQ3~avpJv*BF0=>|Urg0QyL_yue z?ELr2Q-_cLUON%NP}Z%28*AAe8|h(;&+d%xWipl9<3?lOa`ZI1v5T-Vv+Fzl#Mn!% zLZ6y%Dd`h#NQcVhJD)R!&!dX6F0&{1U#KL`Xhg@R@)L~VLLHVr9$yPI#2=p$uFm=0 z>=OqvttI6LG892z-Mz}{jV#9_pC2q=%AiK+dJQktl-1r1zR5~g=H;pX-rwcnQC<;H zfI$mz2u0=a@bc4U2*1bZGEpW|n&U^^`vq@he1ET}Hl_4=t@(<0dV!YrW^d#`ZSiaQ z%4+5#@ooMr^=s{?qw>6&r;abB9L^)dO{w8h9H4ckxL;x3GtVD;CK7|{6^33a74)2o zWZsDmPSOEM1Y}@5}ynD)&V4&a&TTyz&5RsC2R1HbRsm zxocQnh4}Wm937#DkK1V>*R!+wTMP|VN?*$D-sfrMgHWv30OD+emR0KeWO}?SAyWyH ztplC%V;Wmx7^19k!O&-)^hjMS2)HxxJqdQR`@81#t~SLp+o zrTHH&fVQglXI_Nr8K=UeU5<|6DZ=kkJP8jiC&3IzEbGv(Pik?AlGTd%7ePi`sU%MWgDli;AWQ@Eue zb3i=xOlOvtQ>!bGn${7KbZRqRE zd{>hOG>+j*c{jL3P3`9#ze_wp2Dz;%iuW(HuMD{9K5R`tq$>n9)=qWdEqLzyGM~lH zMce*bTm2CE{Bv}|>T6oq@Jxvfrkn+wNLDeEW{d9aHzUiuNJo+krXg zw&|dFnEa)#zq!i~LnRyPZ64t0tQd?x*(azZc&j_@aF4n7tG#GdcCa%7 z6Ba~$J6=6Ip5&fs-dNB=>cLLHjarZvf&(j9WdTXy1Cjexr(; zW~cyEfl43{+pst|h>DW^tTK%WMr-7M_^!D)mvi5u=&RxSR$6IeP0CZ;^13rn|dR_}6Lry(4E28(~!&XiBFW1yL zBjiI~U;v1Bir6w2m=aNhS-eEy`&#ePWkd z5?2!eZ+M)o%sLAh&~j0+0$SXp>}0GCu4TQt6WWt52@wSY|ImmFH^fOv`*|!eXl2FU z1Lw}~sHw6>2+yN<)hu5ge10n$khNBSJd|f2DSjbkta|c0_~Q*K-O6~=-aFbM&)vpd z1!nFS(YNldW19q)ALF&CrtFAFGim?0@H#18e+vsG7DS98Z)9k@&dMzJDy*F{XpEK( zr}6mHaEplu#AF80mbmxr2w~u(3h-d`S(cEf+CqIB+P#>6jTT}EjMd7?>_>HSY)#~;e#7vVo(~HOP+H1Z5|d$$ z_o|1|h|hM(w`HXb^<+YTE?s*}9uW`z(Oi9!+a>Pm zU3q}?499$JA=1qad+CKmwN9i_u8KO4T#8Oc835(*n06@|BOKfrlp`AlFTncD> z-ajl7s*((ttpbc&=K?&%^ztR$`0sTZW%K8loH@ zn#+l_T(@6y6V6C*;Qus(au5~`vO6>f%KfP3*gm@zz$8?lCKjA7F$2DFmfWK;VMZY2 zsTJfvpb2FMa}$X9EU~ukgfMz;uI0XwnQqh?HvpJO&?uTEvfOIFSi8d1lJrvhc6jy>ygkvmn6zt$S6c-(C3yfLF^;tAy*&Rw- z6xN6e3GW|0;iPEDLtzyHqRrX4g(y65!CJ*s7=(m&$%er`_5hImB1%5=m&99i_a&vu zvbLYkZ4=Gq*w@#Dk=ASK0U)mq7X}G>D~EOPYi=j9Uc>NrC<@?P6F{D zEa+t{k#`Db=47LK8j|koy)t*(<16K9m3TDZLqb%wNTB zop?8X0h;hO03$1F29NC*$y58i0!hHZ;^JcS9+a|znwdW!!W=|T;{zaO^Kf^!+2rNN z6U4tp%7lbXpr{m-v4ZrXSXy0C6IKA^Xj_6M5D+1ShK@cwG{nuv=K)q>YOUY&O@rnN z#>OjPX%=+i13BLOAhZ@VucpL~Bp1~yoS>u2S^XHs{}MD80DT?6Qtpr$?;1ixW8*bg zzve^-&0#_M8c1|xkx>l_3Ia>NpcPCDSk(>)5Ce8D??Aj2$lVn;uCV}dDIi!2JXs1} zI*1F~1Lf15H)|0ky&%;Pge!rtA_b6()(Qf^KqwzrE7k+m&EO6JEShWjg{t&{BD_~$ z(m}WysJa2oNGvQiz;Ep8`2+;KKxFCP_hn`l2GO~oIuRnlmXrlTxz?1yeH6i$julZ+>hZ)xrxd;EOHLWovao3z>mfAh;^iD6bw&U|LR1SxMJ%5 zpRzIr)eES|KG73NdaXBu=x*FcXrd!V#eBEp_^+OIcayOa*Duro8Lm8%XzYxW$a<<3 zUy@T@DT9kM3qPhb*BHoZCAL~@ataQ;`@SWW>pmx#;AY*lw{>+I@ohJi`+f6(n4UYW zjH8&#M8$1P)?G+KWf(tzIDdh&Lh-fTawZzz_i{5S!UtrH$Y>tMqDvEQ@$wv07DTyybSu`bply=ix zsm(tNz0q#ui>I=;IyM$20h(CIbHjwsM?*v7o-?(w_tlHH20rEKX;c@gp~-+Z31uX#oksf3B@UbubF|XU1S%{1Q0N_O&rMN(ShIUX|c7swfUuE`9{fEF1?O)@; zSoS1$ms{dk(gtWE&u-q>h@sH7MzsciuUImMXqLF(CzZ1D_N0W!jub)nlr2O$#)qPi zi}X3dgRTNL3^$Q0J>WC)TfC`!F17#MSLZr13TF7Yn>=sq<@?%}IKX_ILbUqM*^Nqs z;J8Dek}1}jQ8C{6=8fdlQAfSQ4{@XiC|ic84;6KL%IW0e?~b?jjyrE@&Z6)(Dqc|qjj8LyZr~!FO$`M2lu1}U)#;I zN9K4fPQ=g_e&Q6c-=1ry^+hzuOD-N2~h)+wvH!_ZOxYo)t#GV!Btd#8bmQ#_*E^paz?5Z^$KeaV4z zKOL9LIg7<&BxR zRN}~J`%TqGWRjMa3bVZatuCZZS7WRBEU7Pcxidqk>^Q7=ItAtRr%!YFd)jMoP?1k^ zm25{11#oOCLMFde79`=D^z!@lPrcD&2muv&ydPWVoB$$Y?Az@MPzF!u_r3sKAexc- zq)-XZTAti+Hl>&M?dHKm`4jHTo6|9L8FN-YO%sW&ZG4*rJ-t;GwG#nD;ocvgl2X6& zh8@^@-$tM5oyc)V>J@50Ph7ZI8GbFln^WAM4|!JiwXGsNL|xa#vnJI2$+=G5)owgk zPB6b-v{F4^o=lsC5?};AE6q5HIg~3!B8Ss+VipWq{I37dYnGC7qWMu~Pj8-#u7T(G zDcvK*g66w>0kpGbfvRC88I9LQx21xcqr(9%v-*t~?CI+*X-1-Sdwq#pq~2N`1wNxv zI96NFKYw?;(f_f3=9-o!N( zWmWnl*Sz_IsM&BqP``HrpVMAgU%|mKuMB~orp-Y47a(Bsh1$X}@!io}Nkgn=v+QfW zQ?d89lf=XC2g>(Yr)bj#O4s)~ncQDA;@BhJM01V$rpJFtr=My&NFY^D)h{|}sKS=4 zV4=yKoS1t}c&+lO+P+LCzxhoa22&jzcsj%Q_m)pESoF*Lr>c>aZ_}S?;=NQ^RoT8Mg>lvt+0^ZJ`clVhw)2-I{9~aX9~?=lmq>Rj1~W)(?P*Ha{wrB|GG8+>a+gxp#2-c zvp(7nHz4QJZX+foMG&;NC=u@g6%gOP?Nx7p9Bn)b1`QW5x|OWZ(b3^m7l9r=)>-~q z2?3xCHJk!8O$04+<@%1n(Abeu3nu%Qy-|gfvUX^^T)pC6!|d{);-(}6rTY46m%>%B5chpk)2Zv(kSWdA@Dtv7OO*>~ z`>9gfUbFSq@}CP`fG)i*q&1N;dM?*V#bFgquz$ErrvX zs&h{x{f)MQM)!bk%k1LF?i4%@JKre+(w+ydp7#x$1JJ`iWPcK~N~vsm_ELqmUA+Ky z?apUoDw_yU1E7o0d#7(G6@%j9nX$O#=E6tM?N{g%W(1~A$d$R){SuWO^;X|PC~K@* zv`$CIi5lB{qEorgD}-^N>^0p7)|{=+EoYDW;5N zfV6GC(2_@@?+!&F!0^}B-5ch_kS)fk{@~8Vr%vJ?RRe~R=-K-4C3>&Ji=)nuhBGq( ze8}|MSEISInWQ3S2RA95!x=ui=bN2~GfZUVMaq>i#FsePo2c1%1vJ)yCP^LU2YX#C z#I-wZkBi#e1sTx`>SJ)J&v05jome!Se(fc<`A6X@jG&qWZpR|q z5%YI+A74}P_j#C$8XQ?HzWq>^2|)z{Z)-Jj!5m(84MjflKx+${`cbjNfy{XPqAa|K z{{)yP@2MDC3QaT1>z?kBRM{6R`Et)0(&9Fj51%no<7~6=Yr?Zl`~IH4LY5$w_ZqUb z%V3&UG?5J<+ZBFEpyoP-n>OKcU{M%oDafkScH)_S&>ZpuxW{99S(>)wOtP1KRe&@a zNcd;`IrzlN6fQ;>zvxV4yB<;DToh{vAR?N;y7t*G=nAy{n2l3d-d{MrCDDp;a+s0N zN*epR9o&@vEaip&d~*aL2cNBj=Kc4Qn+~4F;_GLTe3Lt@#fTSXEbkFcjt7jA+4vvN zVsYPk@;6?!%e`ZZ6d}mG{K9LwGlKl-EuE3u8-*-~Du2Wk|IMwT6JK6uwN>400MGcz<2qlnUH+H+506shE0#w4# zp^+H~vGcz%gyjH%aKmLD-$`Nn!0wb9CR6}w4<``)->ZKSg705hn-dAB7p({k1wrfL z;+=ahKu0(5pryBHC}?v6eoWT$zIb5^YToJ{L3_k3{$tz4Z18*z-*xj#@T@lYNH91y zkta`6c~`5r@ZLI@1p4FX;>VY3)IbWiT2lV^LnHwtu6J?l!pnxfmE9AHgf_WYj!{bu zM`;MM!qEm^EHl-HvdTtHEyj0OmW}SbR;aWo_l-s*w-XPi-TOcJB;rwS`i`T~g*ZKj zyDb@N)9y8>9`#KWo7Yy)oZS>8(fW@-Wnz(q>2`#C*0gP8VuE;>S{q9?!TrbA7H^Rh zlV4{k&@o-$b4julDvo1^oe{F6v(JAMJrR_ur|}7WW~(;3P4N0n7NZ#i z(Ts|qj)@2nu(+xI9_kzFuf*W+@Tvp>t}9#J9Nfhtm1vVh#(y7u16qiAY(M&Ryqw3P zAy=pj^l^kbiDWb0x!%wKlrKWQ+RzvU!;g_}UbeUX^e!je?eeY|QE4XdJZjJPbJ%Ow z9v^oJ=Gk0JluN=~dfGkvish4WdunRN$CtM+@w>T}H}5&2%{x0264AK0HAPiN6rF`S zNX)k($^{SC9<|Z@85~hM{~Mre@jNs5m6H>k?ud4;v$#Yi)4mjShiAav zt~K;bLsK5T?pocoiRO=x3%ktBG)XNkN^Izi0T!~Zo5gwEa;vscsW$>iR74UQQ7bD) zOXN3>F?=Qx9A#`+Kg;40%bC+3ywqHHKr(l1YC;iZwL{V1gSGLa^}=S4wl>m1vhYRk zl(_NOZ}C#)gBH|Zd#}g`igLcS6YOtyil4=;LKIzf;Zm6gxduO$Hi|T4*V&#NX|D)7ZW<`VpMxqsOGA*0r^kz^Tni8KHpp zA+4R4nYVE|&D&c~UPDjKC(A`jxRp+%qmo|*k?&V_Jw9NE!$6dlb`pL}zxjd@07U0r zvC>BArz*tX{(g}!gbJYhNEs4g1F&aN&$-tT>-8A0BLr2{d_{is+m#QqurzJz((wQi z-g`Z8m6(q7KP)lv^L4dT#pk`-PimD{=5U#w+nkqMdqDs|6*q-6!EjFEwS1~Z{Mqv< z>Wa9ne&ZpZjrq3+lun^d%+dRc6DD>qa?ZbgapUDNk+8dX*YB1e9)Sl(;%hF+RO-En z;;mZTy)fSoZ&9|@tnP;BXEhe&Q-8h1)}b1;Vjus6hQb^35H80Odk@)9JTeH7IFYkl^959aMu;*b9li;@3({oN{EkdQtjirH z0E+`4zz?7kI;%D0LD+>07{hq~?_yQM-(nS*f%>~326@Tlw0ACg(qE0G%t~3CVP&Bh z3(qEtJ6@iQiwa8(GJ9Z$JRCPJ2Duf~RkGtW{K=+pq(BQ3#mDv)q4b-8=XdG;bS!4* zrvA_!E>82)$j{kg9XAwdKhgj4I^y>!#~3}u9Nvt_PcMErz^3|XP2ENFid1L0v!4ea z+;Bc@3;ww28}Z-5ev;v!{ISNcH~2yu5x&2@U(m}}=lYo8dq+G8QGSuJN={e2!Qzjt z-SkGu0kJllLj-v*>7bkA(GKo?-=3PX#Ro4 z#I2eyL+9L2fnqrk7cAgBKDr+)o z0gvLH+G4xIgCont5G#su__kgYDccqpfVYvgne!X!r0Mpit(>jJfS!g1Qo%_cW{$>g zDjJMvM^^E&^FQvxwCg1=-0?#JqxwKV;>MkXMqZHQGX41vY>Ggsea>oQODDOiCfP0; z9cr|pfMB1e4)Z-ykApi3oLWu*$PCM$_=0!7A+5T&CGJSENaEU7Cdnw1Xp8RG@vFAX zMfn~0bOgZo&TsY#i&w7gVo+~w`C8&vK5y5jiGp4m<@3lo24bT>^2BWMqkG0RQL`zm zlO9}ev{ZK|8gdw3h*joDviN`R9~E1{$IUWUJ>__3G?>p?np75LqvG`~@C}Z>Pw8j> zg=A+4P~B^y8=3s zHOtdjcv3t}Xs$KrANj>Ag+N$`0;QLV*N+tJk)%jaaLV;{3l(nE-4Oio!)|G%{WLwIJPd>#kefl`ioY?bF-zsWv=4(bs@Z{zO; zXp}}Z1THg|m%^a{dZ;>`%d`Ln`B=|{^mj{_-mT?{j?{DV4@RFsGp80xB|kr*CS;Us zUHn5418JWqwcwXaQ@eufCEYXh^rD z4Ta70z{J509^e_BZ%KGTf`mwroSf#2o|oC{ELE-X6HHbmI_o5l%}c6n%b_(H6`Fsd z>xTzS>XhYN6%}RRCA?G8c|0$xHnbNk|u{+ET&MPs&ok@>R^68C9MVp z|0ic+)5V3`I&OX;zep7$u&Uu`1utNmif3n%w-{UF6VsJ>c&7T=SqG z7|5$H{Foe3i_p*Z_^15?0?IYBbP$V-ngRY$y@EJ@+_wQDcH`9Sx$Y?$rs6hvQ+eIH z7K{VB)4i2~s2Yw3pO^ARqIzr=qAHOWD*>LUxmA|LCk zt7$$h6GFYNzEOCJmr&?*?=#Z=`a^sCL9kJJTW?C<_6%qcw-O->8t^GjD1038XW{ zGBq_+l+%&E$JWy&6rbtQZiFPw>XMx60$sek{9DlCnF-i}`!Rn}XX(xEM^g@*G`_3n zY)Pvr`rg-vHhOoAoRw>ceHKd!Q^LOQJR7wvtrGcn_$Z$%JgkspCZ*w#kev7i&E04g4up0vKxwtY&7YK#4K)nE5Xgb_?_*q~TtEzI|aR_n++yEEwW56)D z*oFe9pe}S7;DOZv%yb-1iFU=c`=pds(On@OtH*&BxS7i}tUrhbp*8iPLVa^)A4CrnN~&YN~0xAqwie3?U=(S{09<4q{z z<-KlRO(F*~`m6+e-nICBHKN8gmYsiIJ9mqV2xyg`rikKwevXh9R6|XDmwt1t!UmYY z3kaz>k|L1Y;CaW8UhSmJ3d{-TM3Dg z_1PO-u1kp!v8ozZMoOg3e52&o5J~KCV99G0Ub|{E7_=kXw=OUA4)muI9I_rTSp0$p zd<{}R#_(&v%gsD^xOsKP;o0ObuyLE*noPB=*0eX8lhMZ3YZ0Gh=_Y)Pda}M2sa(u5 z_#>*#$5mQFmg)AQEZobleJjO%`{;JavsAcXDlGGdrcTGuG#y*1mWpa`8CGy5s?tKU zZV(1R_OnO23=tvqxxmw8tT$|&T>U@J#%p@R;1D!@S8Fx(*{2gm%XkN}Ib%?eJlTYA zm}eHH;rG_eL?v*vNgV~ATJl>lj2oJ=LSJN24Y^XtJw7Z25dw%qjOY~M*Zl^`kW9Q6 zV56**FD~0|`TTIVi29c5yBkiWqfX2Im*-twHSX@`?&}NM@-Hv!_o;?FD_wXjHM~EW zwu;YRy)jMrDBJDJt|Tlbb8~&hct|3g#d_sKM3A+3mv+ogL55-Qv*_&ks6Ro5|8eh~ zOqKHwxgo4Fj!o{6FLkX*Ny|#CzfeT}Y*mnM*)dSpm3*o5l@njW3z_mkIDac#Y~Vk6 zijyNA;O^nje0*0r6?lB_QMwdPgrv+y0GB`UA}{6H8(ep(N_52eCc~j?_9wGj;z7xl`EkCRD8l~q`$LbUax{V^7H>;_dgV%#3?=t( zFOjaU+0~qBrN^Db4=@>45*<`tI3c|COsmIJTcEdfrApIcm7rnH4*ZDTtb_%sIPSFVN&}1#&b{1zU*lDQ|KPUYpfb1_taT!&sCqjk+gpQ zR*Z)tC;<$TA!|fh=?MG%rn~r|DTX7LJV2_TZ2%5Xt7K8u94Ca@=X`ASTQY=GmCau$T@%Boadl^5?N%NG;|eS`uWXLa9bW556PM7UVs36D@4BMV&@IP`2VFN>=0A5OpAC~(+hmb1 zw-4N(Gz&=CYFc`y?1gieg`u>S?N2qIe|Pb363A62U|31CV!o8v8UMycEab5}?XSZC z2?c_cS%+S$w%wJgL}RdjE6LsEWqcy15xq5_&ReN|2u&(Q|ie!c#>~42iMa zAQeATy`%*67^@7QmUqf(mA%?qeEWr;;#GZ}&X9!pSArQsMpFwjN81X5H5Ew+pz`7M z!eiV6yn!M+yN}8&nheVcf zFtH0#&8+w;vMMbFQUSDH;dIxb*?UA!t#C=W4mO8Y&#%A0U;Cl`ghfBrHRWAFJ2&G*%}; z6HCgm^sc#CWq2{p--&rry7s59x2bMCnw@B-z6aKPvyooo<6$HFff~iqW;lE@fy2PJ zR<+h}$-8Ak$JQrAPkBv}+i|~Q`pBa!q$ukykL#fG`G)aK`5m3>SO;*O>lTPk57{Wd;itHrKC29h_-x5PCPG~6 ztnY4<%U(Vymh_X#^=E}K{=wDZRsK@ugV_E)+AW+Mf!n+J_4l%fz5Y)7PG~}1=i~aa zX2&M9qd%Af*<#sqBxfoX@yhHBAWm>O-@(30m%J97Df5~2IOpTU!O5nNn8<5%!Xx05 zd>!evslqbT5%)ml6{thj9s43t{rJ@XR6q3F*|I^_d~X^Y)V^UxLLW=J`a@`}tHX5r zL$Zwwn!R!uKN{3NLW~f$6T3Nq8oBsbEA=;ZqdVAe%alhe~bkttW5E=Lowpxlj;(l87EqGxrsCy;-$F!^S*(z^Xa&I0lTvOS)2LH zey8l^o$m9hs>cjY*+-M1UOYhs26r;SA|wLX+i!?LtyxJno{?k8Lu*E+QpnP>q|)s z6JLMOZzFR0fOxlQ@659Y->@*PTuY-eYaMFTA?_GM|Jc4&+8NZ(GS4=9UoNHm{kl|H z;kA^a#_1&9+d|@uAwD~PD^t&k3q3{qT$XMk+pUqf6!yG`2+XIe&79uW=pO~%C1K*8 zypI@}y3!6Y?|(`S-0ODgte{E0F={Eabv)M#`y=fBkZ!hjF zK``hm=%0{Z`6Mz{;%fwe6@ z;P2r%Bnk3=nn|(o?4KlHSR+QgUX&?e+K8i%R#=F{7g{xO8tgFb_d%1fA=_sCwi+e0 z38(;UvR2m~|9*4Gl;)px16(D-P!+j69gi-HZV(ViBRRuC8~mlF z{I4`)SR&kWiU0hse_jNDA`K>7q`MKA**|Xv*Tdn{-ax(QW*CF`<*O}bXsD@;p!EMt z0%Z8FFExs@O1mxvo1Sxp_l^bfVd_x>4!8Yl`M=&1xmP$;uY7vf z-GD8r&&-rCWYV<~I|Tg7Nr*0#@%d{}fpwS!<>OmtWXh~Jzlx2&22{WbUbGtxZJwnq zK>z&?|H+a{v~k4%cTsLcCMui9HZbxL2dpz>?+d>7B6hgjK#TT0H%9gd^ZXs^4}b&C z=5mi~7-=NmrZh>t*mBpfFsm4t5 zJA=EYxz^F9f-h{CT~D+hmeazDieNg`)Zy)>-OJN(SMN*TP3ITjqH5ZK zs;@Nn3tJMI>chW36_YAjdalk)qGN~M3(^l%%RvIWD6koYQNzQizlJ@&p`J%&Vf*u2 zV{bW&2+ib0?h=VaW{r80vG6Ke3=|iRA}AqT&KxNPI-+z*-liliZXhNaDsrm74 zz17oF9P8*q#7Mx;M%Po3=bQUw`T^Res#NfPmU2@6btP1Ju4AZUcrl0~53WPzf}iI# z?@TIc?%zC9l@^DF?;uIn$!(mL$uHe=yJbfU2p+(d!F(mS#xPIdYk)u*yAudKga(#B z`1#|%uH*Z0M9S0ITKF>pnFd@uJU>m@uCaQucFUtNEvu5SFDh?TU1Gz-OKNiew==-+ z)KD0O%izWV2FvRpumb|#;Xb=nHcMCv%F3cJLHGbY1OS|%@-7_&oNS0R&>oZ{5mrM( z{jcsk3(Re5y4hEH4`(#L9=A+Yzjdb-=NFIqM<)G0odf>U0JOh-?m0=+gh>nW2C!~! zHlLSpMFHfHIf-X67}5|LnX3k~ zdTPv21{0ygoW01ERa0wig+NwW)z12oGLpEo&eFAo zy53R}oUrnL2P9#5T{I?0C*)Od#LR>ingGCH$~Fo_j#u>%!=~3C*?9wE`Rfd zEAD@HRE#lhx#uYv4?JlQBPF^C=-l`VM$e(q(K7>w*rh~?#{oHj-Q~C1LFHgfU1XHx zMJVnF%0M|I^*`e(z)KpB6Illx7qEulbrG3_~Lz3b*W3#DPV2lGD-GlY-;5~1vG`H)b9 zl)5&@;Z7wSvPrOC$Vppb!Uaeyp=RRzXVCw5L$ImY7-zxnX55+73;DvmF^fSS+ z5g><>ZI-%WtKc;uU*OK^v|e-*cab6^(kD8iAhbZ(x&r=b3FFseoL|=*7%EULfEfTF zu+Ud(l^0=-5R4$(uE_EBC>R#C?)Op%(B4NU5e-d;5oTk=7_^*X3C=qp5658-rLp5= z--k2Pv@eB6CWvfLyu?F`UkuK~edgE4qfZo}F7LABn zK~BN;ngyFB=Vvc!qke4lm-99|+6pUtN}qLOXf`&G&1O6xeG2&ZMgM0obyyI3KFOrU z`LDpp3@zg?bU9eJ4&7w-t+{juF=PLGZoJyehTlPbg{$k11H9QdrOKu;L5=?-lfiDB zp&c{?8vhZ;0Bs)_+7UE~ZsG@yj*9uh+B8Rb# zA=*GSAZ_3w1#y@<^?QV>hKXn%;hqc$f*CLUlxX z`lT}dl8NQcSAc4TAStf+fbE}#Kls$WLvKn)x4eG;_veinslbyzn0jzOB+O%3X?pRP z@pa43nO{m^16@!Zv98 z5H~_UQKu>16>0T-lT=<^EAb9>4!6@rn6^)=AONt&vbMNJC!}v{;;KMrk-Z}96#miW%l`K?KTFFNzX?Dx zUdnVKRzWlIOUXv0MB{kjS=N)(_lOV<`#N424e>V_O$t?nw+n3E1e{5Bq`~t_w80v{$ zckaIfVNAHHX(?w4iy#_z3JYFCYE7%+zkl`jX5@{7F><9_J)0jJK^jh0LZ9+k6`&W8 zqIpHQgZcqlEg{D73>d>m#u9;w0dQRq6@-d-@-++|1cRV{3G^<+J{I6uFvfv{ZAbXNp?V%#htLMo0?>BQH^L7R7`}3JVcH;plP%)y8ub{;EP5mm9u(L0 z+Jt}ueFswnjv9cdkc`1A`)$X@FFRs$VC^6pK+l4vC5W@xR&;*285trj!%YRH8D9;e z5u(>29DyT97>0GACDNh{PzV@RQ~hUsz%48(F;a;c*d|b5@3*MY(45*V{qmn#2;R`| zl|fptN6dam5{$TuBOAq;bQT5Jxv{LC>k`d=v8t9ThP+CIjWP*$m&jJwf{?7=a{Y`0 zz6qiO2ZcGpgq#t*7}O3Y#`~m|RBiv6Tz6A6cL5y^WmnJ+fWR4X9#RM+jBP)Yu0#0_ zo$c~RQM!TNLqtvBSK;aymyX~Mpo9|a*$gJYhsT)M_;EHQ9TY#rc{o-WO_zT2fPCh8 zICK^uhFLcoMIju4bJa)!vx@jcTci$Of(KiTC|CO5H<9suw@Fd4ZI$M%+93GJ&W;#7 z@GzG__n$#~D_WggmN^oFs3cf#YW8a3PI>SMlWOA$`On-n|2qN|Xev`H$=~6raPL4} zg0Yh%)&Z9XH=R&-eNDprwa602Uxlml^&k`Mz1Zl)M6Hoj-h?GI$$$^T!{DwX2z(is zRJArr7U;E_xIJEogc)lXXc!n6G!XN|{Q!+}nON{RaCP8646HLb6(Qelf}UG$g6)X(0ze|(6 zaR~@sw61^1)?RtGUN{_A>+E_~h`=ppZIQ>+IvR#grp22@NXSSW{(xEUd z>No1lPL*TFJX+GKG%$krKj4&kiVly60I{?Y;o<*m=~X;Dzq3bon}8_~&<2;6mlr=G z|KJGhjavlHWG1E(ri&Rz$yXn55f1$B?>2vyzs)C@m>ka(GJnZDfT@={Z~cY>ZN5r! zj_j%Ul1zY0n0fs!;rPaqumE+aB}*Z}%0WH%XdqoUHSI3tht+=a43E=PwQS*XrKCrG z=Vy&8j)%4~hds}Oao+*sw@hlU2?~Q6E)NsgUE^vWc3!&)c~UsCdBKT3-CG@o7vkXUFwaI!{N#I3xW_;`)M7BYXYIY6%z+czk@qa{qN1h_)n!4r`%kPu-Pur=4;K@Ta&eoMIU3O z1DRE|C^qs+bHpcI%oNhDtZ6Wf{oKo{ny@9L6>m-&;oX^0|ve=1crJm02m@Aa^{JpOTl=*imC zHQ@BX#VEjtPK$B7ZNRFOi!k#0!1ab=Vok}y*nC6mb0m3kkgZH6be#=vqxRwgBT7lJ zw8I~du07c~G2#iEruJJOGhAS~ZiS#S<2Ml8*c<41}29 zyvA3^e|L<3l+mx zp%l}+#j7nmkSp-%gZt0zn#1(P$it_*kL^u6^R}|g3qJ!hjBmwm-`)1?Pdgdt=-58l zt4)j@Oy{m7!ZyY2%RZ#?G>(qOo$@k~8U6{0N*mHP-o(zZkGOrCbVD*cMLRCl{qwU-gUp?gYQE!VM2M#Ppy)O!NnwRMA`&$i2L4IO$DJbHsF zMS`RTTmjD@b$rQhl1eJzz{x}k0N!(73(La*#^@v67O>t9k6YH5e$TD^yrQ#9)e5@x zZ7t-GlW72rc(+CgiU9xmYs?_hW5H-rhF# zF|1r)v#h)J&*zmm)d)I!5!c&hWMlyC5YA|!5eARfHyf)M7(Spj!OKfsq=&1s)?izK zvUH1*3$<7PyyiGJ@j*V%X}(}QN9iQ9gwKQL5k_kglaUA?ZJ%c%34 zV4yi{X7G8!J{)He^1I97_Q+1l*FZSv91E8&*NBQ`kYX7#e4z1QHv1m-i;C?U3%*}+ zzBSd!7<;T?8g>r$Go5Q__~A!eIr$0~b8|6$nVW$?@diFMIb?n^6@+!wKzh%e0sb+5 zeX`c#d_@*4A`OrgS&uCHk?fXP-6Tb2Wx8EEDxpG*!)M!6AG;z3JXu9{STC~#41Mt% zvg)%Q*!f32-1eElFyl(WrWs)pO<&XoWLbumUW>WIsnNki?o;4&W+KfTrx600)KzuP zSE;Vo)kFn@+1D-ScNdvAk8thBYu&&$t~8aflUzMg8_`RDBC3Ys$-h zMIQ-rpWojfJnkd`^}^mQ;#L*i4Hf>lc$vV#O29xxuo>s zkDh_u??3(6g-JiZ7yDWJFCBcy1rA=-EACqqY#bb5-ir;j^+%>Ez$glgLIUL6Rca+M zz<8>Z6q3YYb{`PP75>`5!JT7pgL9n1H3 zGOff7TOWUpclxnX1>8>{nky}pJ$b>=OWnM0!wEjXg4Z!R%Snp*MQ`|COD`0pOOQ76 zvQplT>ALgpl_yU%u#{y`9x$xoN8j`>)Hl_8QS8r*O_q-~Ne{+yMXBdou1)>veSf>x zI+>Q`FE!z2L4fv3EbJ!M1&B;H7>9p>AsIR4UxipG$tbKW&DA?{Xm+XaZyGqJG1;baG-Eq*Dm5syHet=13R@*0(vC2Lc#SnT>-lo}on(62p;q-5Fe zKR6ogbUFiK(=PsUuWLYTHRmrGTKF0J2Z3}=XTC0n-$QA$n6*QA00-xdKVny~p}bHO zNIT(_@6R8yGy9~JR$3foA)XR+X2itF`2ou$q{Oy12fWs^TLh`Y0hqYCFwyY7VN9Uq z>LnZxlfbRhn|DhM8BvGkaS4}>%`L8~DlIL_&Qwciblw;{1Ac4}GO~`|Ic%_FyPWSZ zozJYHX8D|(FJwCJ*!;_p{1QIp z+byX;QZJWW=CI^-VbE>2_eJbvwe?+-Qgu2#yG1>TgTJQZJ`66QwVs%ppU>IdqCMrp zV5;G~b|7M`{<$ZiuEf4c*Capc54FSoJa~A6!5{IYr%PkGgy1AH=SYE}+(tek504p? zrcqh!WDNg%J5z1DJ6CG%uvbfHr90nrl`f_M`6=_n(Xc(QxazpsF#G7!AwM zA7E~Ft1glc>ORYWr;CAA zQ)r?9H8D_vK@s<|C-CoNPb3r|3B0R0%W#0*i1*3=3iyBK7Yzg zOj9O%@Wyh?hWY0Q-__@BS|`FWtJI4CYH4QPco3`{xb3VJkz zbl%0F0VSDT`b7M~~q0Sw9=N|%ANZ)i?3-x4O z+h#6oEEWh~#?-H7jbk`XOwba;`O(C?G&84AmJOCBXjUIi=1ffM9DW$mKE-a)eqzp# zWt6+xWie=pa`5F^TD)h{L2IzFaSHY;G?IXE3i>$AKG5H9|Kkpsa^-M~+8f$#qKB$w zX%2XZtH~PPKU4RZVnyADqVk{i&fPU=F)kX@L{OID!uvZe&G2GncDQRRu z-q0p_Qe~X-ct| z3)|Xz^vOd50n^> z+*N|4++>ovzWCen)TQ9hi(~zAA@2$xN6e?fFUiY$DZ)xG!fJ7e4PL*uD$me+!!Tsn z+2YkL_WC&d1aGb8V)FLBofpT*_m!wo&*(VH=eQh-z|Hejy@h*JPw=^JtNVsKC|s%- z7n2T-EUitnsyux-$vLExekAw2hvrO#Mnx%%yvW<$X!Ldnl~RmWEZc&?MEuS<;K=VZ z0hkNrPJ@~n0VKDRL19`_zP5XIe75VKwbW*qWMy+j8R=z8)m}bBNBFgab=m}-uZ6EU z_1UB?`2@WV#Jq35;#kE1b5j}(q#4^SD^t7`>naQuMj211?oTT-->jt~7a@8epwYZf z$IASXAnN1--^j?A^-AF%Q}*U+Y|<^*HtYh_5kv}9SPiW{muvUo-QHJQH?2a128h6K zk zzVSd^cRkrv39?-%ZNEGH!(=(<>dMboZMf0|sYd30R*DeS=l7bfABtdLuyiA-0(xjm zDMMe1v9mLso8)yX%elS=VTSga*G?Gvun3#)xAfBfHGQMVz6}kIz+FnJtWZi)HsebnpOry#uXnS~CFOe!4UJJ5Wgp~v&ZC?%2rVMo z#d^ji=Z*{%`bc&%Ti%`Ciji5FffP6^mz9_%(#=6$s8!{+)&J@?HvL0r8{c>S*P&y83LZ9&IzZ z;ifd8Lp2b#>SZF4x3~nl9|ASVs^wU-mo?awi!N7qvCT#UImj=YY}DqQ0EXRFK{xg| zMR^fLg{CDEv}|cYw?12FW`y;vMgWNXAX@s@QlQyILUcfqGBd3;IeYE;H!oJj$e2vX zSDmVei#U@a?j9gMvKt9wh|5KSaHAAfEWI0R*weRn6lfFEwuc)5 z(Y|vPgYd;XfezXmcK`L6l`xTgcVBJm$Z0X~g+Wmdz&N`6AJp}vn=1l+2Ik8g`)s&L zb(d#H!J|PJ2p!0EQr9`uA|H0%GgrrsgTj$EhM-%Wb-x@v`3r;XX>03x9@!mFp2a}* zKji;@JUl(uS-z6< z0*{cgeZtip_x&X`1Br`(Lp_p6;!C*79-9x0q;{XjUNG>yA^{99dA@d0EJIEb#-Jx5 zL6@6MiFIX5zcStBR=vSSXs4^o`BFwsj^FM46#B|<`-Jh87^N;eY-o_k`f6L5eqLX1 zYfv2$|Kh)xeg7~RTlYTNd~ zft{&o!Qrt#dph2Oew`V#zZlc>dPi4LyTJik$csq8U*N23{5926AQiel$J!v*J^RyV zVd46B$={5=fy)X_q`jK>5GAN1oH~;N=LgM^7_!CugHB`Ca!_`M9>_rGj(1w9bV60_ zivPK)!29=&^BB}okI(ojjM~4m1*Eae|F%E@{S_NRhSf9T9dCM{@2>q}=Hs<{b;L?YdEt8;#YtE)aWvf-D{Nr9+H^6|Jr79j z#cERfE=NgIZZY=0ZF!Dekik}pO5IY6VUJJaXMI2(_7oiHV?fO#Z(4%+}SpQ1Urz zLp`q@p{IC@^hRJ=u->i!$$cJ$@UTSpN&lw6GmkYTC7Kkvyw%6p=%M z6DUJzE~p^qlLGlnx6!pFf7hY>Hsmj+5hu1mewp2(u{TrpAx}mEdQa?II)Tdqgr}J ze@EW)%xdt}L`*uljazXmu8Sv?$XIDhi+6ty{&s!t=qMO$#X&};bX7kz2*fVd<+We3 z^@9Y*G6FRv7jeMuXmT5Y zxWRNko`y^4M3Zu`V4ueIYARQR?raesqHmb1^jSx>GQ4dp7v04wp#Y3P*3QbsDreFFmu8 z7VW-S+*k+RTjslic6u1JTqDI^y^8Q8aPdsyzS9|DRxOVi&xrOrUhUf8DqQ~6o9*ey zN^Tn>{%jp4k{5BmE0%Tc+RZ;WbTT+n+&jXf_lOUHf=mA(A&E;Khl0Q7@OtLtt!P9# zgaVoff;Alkc^r;w1dPc+z#Ztx3k}1-Ak@z?2=|PP_947kcpiXe(>)I$g49I2?SdSO zbxH>5_fs=`_Df?X`!~H41{p068~vl>I&V!4;%dR^28;?Yh>0 zVU`6W^At(}-;oh|w>!bcmFEqP1BZV+74>ADR16I0KAWdp=UD1HD%$`(Y_YO1cHSh<9AwUTi|ML@LUD5{PT)X+fvu zEH$&c-L_DU2MAs{$3bfgi&KN9|V|biL2*SGO2TqkyQlVA>4| z%3U+ah$Nr?8(y>s4si8!YGa?@dY zWQ&Y%U&=$uXSI+>to2UeJjeU+3zz~8XI@_utKFvvnf%rw1VSKK;$^Gb}-<>};i@is2RG?{hj z)<)j9b)3@>o9!k)RDy0~sevr(16uz#P=ZSA!3^71;Nj}P4~ZEfLUVR*En`Vi_{Hh3%j*kw2$|Py4pEZOz<9$6JZz<|JGTrRNbWH&<<97TS zsy0Z_5qL=zEa@>ivFgiiwDp|wLXa=QZ+jXNC(*9YsF1oga<|y*RveoHhjmOCWN+_j z=<3P^ipCC(ig@z#Ge*o5>M%t^YqiOA6d8!JKAeGz@Kp;9itd+V#p8Cf}@fqjoquW$#-K;{) zGi&*s@NZRyqV4p?+xx)=DUDllcX0v}5D2-CPY2T95Xg5I1F<9N83q8BkC=!^_}deL zcF$79cE{=E@$rp`3o^aY5z8e7r#<>~7SE6&-KZ>Oms0i|vL)X6`UVnK&7@q$b<*-Z za&ZzKp6DfyeSrCapY^p{jEZUTH1h|smK~Nr^B(K6mYYrx8xn_iUQ?hL+BJw(4{cf5 zzUqF^{W*DP?=O{~B%lDazKSt;RMhMYBYtZXGx?l~S+OC%8w*hTa*G&1*SqnBE-79v zMJ&Bgot&Ol1?V0Atiw2o-m)^=>*JN7@eKfg$QEn@4>UaoF(eACc>JD^4g@gREmr|x zwN~0a6nAQX0dR^3sa@ZE_nQM#!;4ekzdP@u{hj%u{+Ob!s+uawtXDgESg6iIa~3+ZUB+JxYsbZn8Y`ds zmXu{SV$LP4oYv(~Z`9w(LBoE1TR(^rh!YV_R#K{w0A)ZtSa6V4Z5Cr--9|t=#gkOm`MVi z-yCY)u0?S%&Bn`abN`B+dbc`3JK`X};>O;|*t^l`uR2B;i1<2VrH;O1VoIn}Wr#>j z&+G$+9S~)|Hy?PLg1j(Hw5)Vk>~O=)t}lyVI;?8=X?6^2UvGIOB3-w^-- zw2vk0!EQ5q_Y$RATTzThXIe8h@=k4pnQO)3{F0e8mKuLTMtR%0F3jyY=n49ilM%qe z4G9kpIT^Pc6aq`R+Tq&;>}g)a_JhHPZ29hkMwd!u0A#qsCF5MSyWJ-?6wqe_nb;lz zfTGR2+{ZQo0{RPfi~y0iFX9J}`X; zohuD8q;UeM(67q0+}zWSt1arDkxvB%HRbh$Sb2(m{rUym8@8)x_%}H=oP(h*7W5uf zR@py0MS)$LLf0SG|62L+bz(n_!4t`^Dz{SceLAA9lB|=gZBe~66)L2mF?S4Y&B^Q= zACnRCvF}z~m4a%PAc1_J9M@7;u0Z@H@K^-?czoHUEu|}`qM~9eH(h)wFZO2;k3taB z$nfPJC0V*w`SZ*71$E)n;=Jr`&_=v9EXIZRwWeHTIevpVZOz?zG9mOzQY}htf!n`_ z&F)-VR+P_ZH4$TVReIgB7YXc!13^)n7q`H9G807WIQfe_vIupZAoY8y1UO5yR*ajD zp7Yg%>h3!qin8@vqxzIai39x6PAjSIz<~{OF+U0wPr@9hG`%jGkn;cRCWU{y2}r$? z1z6(z>R9`XIR+G63Gj9OgbrOzJ_MS4Fw{(mVwZSvenIGkBTRC-dBWLFm4?BB^_t0W z0lc9VC4)r68T-Wz2gGKIFBY71n+PKZPu?2j z2rSrs$*s7+^3SW0qkQz36aWS{--9k{Mn>GFpkiKq;?dFVr zR}Nc$DG(ns_vm82+T*LabWmK}rPrhgXkF`MOP?Co@Cvp#`SaBQ zfykKJmGyH~6@X@_tZ<0UzIRF{qG8uEf^zU|a|d6RlO7YaW@R950Tk?aCs1w6DSUg( zf|phxH?f){+7(PW$fm2`Y_3Vu0>|dLJGi_^zIo&w=SE_FErieAFeyL&JHU{qf+_L} z6In>^&iaI=^(yqLf$gOViO!LyXe=Exue}OG>NQCwS60zUe>u5ihAn%zQXKE^w7WNr z{l<>2Ukpr1jXSN?bbz#~b*y9s&#f(AKe(MQuGQTsrlx|2-#6I2Y%8+ou~xBN^2KFf z7Gp+BI1={8$HmO7EGsCem2E*%61={VeqKr8=X$;MYJ+>rAg1~FiC2_ zxkxeT6p6zDM8pqK)6ya{CAFRVH{7E7vxe1>WgTsThduhYl z>-WLTq)HTI>7p&Pm>h!cdm>gF6eq2lMO5VghuJArnYOD1velg*9#Od4$EY-O@T+jw zEHyJYDTiC{6?VJ^HkN3|3Hx4zyciG@HE`mI7&U;kdwUq4 zwg!ECO$#M;Y^a3tpAVyncDGc7OA|QqIZl7HlmG-ScF@whZ&PaisY6{~-_i-A;YBI@ z0lV^=Dm(pbaIMb*QtgoZGk@A30V@HM$fiq5u}Bua(eIhPTek_~dobgZ3#*UjB%0P) zf3E5~`oAhm4QRX^jktCqI9?UBKy=upf@Rp>J$o`eql$|wKi@b$y1Bixm6n!PRKg&? z*X^s5nZLsBYkA*<%tk zD|pRzYuG(=qgD(p<6OL)@`|#y^V2E_jD$F3pCJP$-Lcxz-RvQle1%BzXSv<7_sN8! zg)$Jrs|`QRmqT~YLiMB=GzOH;f7va|C@TYaHK3pRRaSQHEeV_h*GEeg))E$7N66;7 z(p)dnEnyGg5Gsocm)E?RsdwI z$Zj_e4?wv?Ef(niryNnr5tr)bIqzNp-KxitA3XBTinrSP3lmb>>?Z@$sf4Ma9JiFWp=v6lfL!!&w&Rnv5|6^`X)%O5P%kYqV z*?UY+Z~;@pmJiNdlNqBAi_v?DDYVER*gC*S7|HlG9~E3o!k&s!IGp824pp1@=8{3OfPw%j{Q}Oww&G+~BU*0^s#t&|a$>Bf|Wai$?C5(`Y$;n7gcG=mA z9~*NA3FyZFZ;pw|P{9gbgP1B3f-C@GSLv@Bl?*?;KS3L+8X5*of^V;mGUZ7vDyqB~ zk>}Z`qrV56`{ZZ}OX%gnv>48Zhpaq2c*G>~X-`Wki`2B$-N!cON3G%`J6F1xs602% z=M}po!!QH{1Pb=v?b*D0?$y_JP2=sX{P`lq!715O{1xgqa#~C>5H^m5Kazx)xK^emPqv$ zO8|VmyFU-l&fNdnczA?0Im4Dfti0BbC~BOPJ=nmDUCVOg}%(^2nBpPT;n|wlaQ{Twa(G11#f8(YxZT{sa<;;u-t95Hu9Fza_?s{L z5mlCS8j8fph!f7;E9g5X77#lQTTo{y{=H!K6*-n>+Vlt2iKyxu62A>i2by8&OX>XL zw)Jz{JtoJKUpVfV8E6l|*c>{Lcuj2IlA$duZIq2*v`Nx5ERNvX9}9#i+9(pq)c@B) zaek41Xmx(^uT3m4Qia(N3@(0y9AZQ*u>Znoip0a#w^-e6ZG)%2%~0Jm@zVF0vdx zt1^yl)O=lx>J>%5l}d7dWdZ0jAZ51OUgYGNO1Yc$uC;ef(d9erI?NqMH-J*GEJH6% zlarl!zCkjx*(pcgOl^l z_Gd`G%aNc5aoLE+3fxiT4xGM|kdW}ENjy+g+2E*Kh?nX8gz4R_Ls^=JUb7GMPkm6T z5ylboN`Gu?E@4^IzIId5`}XthpndK%1)M?0T-w!?5*tIuiG1R#sLO{sz!h8Z;r;MRU8y{Mxe8$)U#I>Nf$lxe2S1`tKos zKZNtaY{gW$830THU>SvI3uaMR;>r&xfPg>3lxUAvy`Jc4jQ7@V9%ZH$#!Bhhx&pQ6 zNYf&Z%`iKg?;~5A|3*W@R#a32AdTu)yD<%T+}qap$uK@r_jAh$zFeFa>%w_S!SlE? zb<;#}F3ZO}B z>U!93SHrr&y^?>$ur9oXc|3T+hrEP|D6DGlk-Xza%RcM`+Q%XnXK1Ze{%_)^AGN@_ zOFV9$Rh}nWy|b|IZu8yR-a%em_kORs-{-MDoGeaE%1O)6sepE9<&1vJ*Lv0K8&9(T4xH^Aka?KCIatoK@&Q`yNaQqsP@mxM`OWW!JucLELRLK zG*n~JQ`8|W{vMB>0{4^W;-Ct&zkrpFlHcf9+bN!v{Zqy|V3@!Ft?MnJ2y{2R>ofqd ziNEPV`w>; zy_L~Wd!DyoI0N5K3>Xm>Fn{6nzkN~q7IPgH&ej}2;;g&1_c0iYEXa--TLULJDVht-B-jpy3PT{9v4!1eOw*0GacgH{F^f_*cOe=qFO+VQiO zn#xk{IPC9cbwnc)tyO5wgEr6qsQIG=O-%=9q-X+Ioux)OAXIyCoFU+Kdpk2|*zFs5 zh8%9W1^^K_`Y8bU9cWGXu@f{@H~@UZT&;X{KHb0(L5e5V7Ri-EXR4F-#?cwa-xOhk zvZRTiFffc(;F+-H7e7E?42Rb7A@zUYjgl~QYwEcbT!-}y*QnY z!;Oqu>^lu+QYbQX0YUR6#iDHWpvQ_k5Uh3-yc8d@UY3q6FR=Ua6HLv`adTGMs&p2A z++=CWg>vL*jFnYF>mH#OCHRMs*X<4B&nOYi7R?V!8s%t{995?Cb!hAYoMIw>JQv?+ zI8cKcvoK5dcLySj5)3Fo>k|57SF8s+EZRkM2|;6~rQaR^PXfTp0MXd66aHs>8wUrc z&k=dB64Y|~E^+v?)`D?81Q3uO3A!)W9Q`WG@Lk_e2wDWl>kB#3s)>=Y#RkW{dAzRQ zEBGK`8f87b_6keP&@&BMkhQJNUB%)`Lu)9qZxnWM?MRTtwOX#0x|X541Y9 zbj%Q5E%%X^28dojbuJ>T^_6Og^{V(3kX#Kt81u#6IxKn4&rb7laFm%9iJz}D->%J! zZcire?^DzZYh`GaS5;N1l*dFjHN^-=2lf%6uWwBSD0O1OcSU3*BxWjXc_Nw&h$*6- zzNzH+RIAR`mXT7bxwg&MHU*I0pO1DnaTYPUv6lOCIuQ%i7InOqCIE7pdT39+86Ixb z`^JWCAL5}C*lxZt)?iH4rd66cr&W2`LTy#GlQF8en!I1r!g*uYnt=YXQ3&$^U&F!T zAM&2Lphpxknu`>2ip>2z%Bb{p6P?lht+N2tjAvcRmddo*st~85gc&~_aXfjurm{Aj zIa?k(ok&(4UNraPH7$O$MV`d_GcB%BpX4#u6pwvdyMvBcpMdN6OUcoB9!J~i{L-Ql zC+p?B?@4l7w^YcRS1Bk`*M?}oOh?=Ky7{>ot$+381?50}Ye4&-QM6P)us_lB2fT>L zgz3l8$DDU8LwYSNdAgUn_8L3^5@w!zzXwzYQu>Cu%5@DSBnX2IxUul?6d4kA%9|Q9 zGNe2;!wVgOvmMd#{VQw;nW3QeOXqEBKAg`bd}Mp~x9Z2Dv;*=)v$T+~xl8{((qyy_oL6J$3uI@t3*J+5>t~Pj$<0ZM$IlN)s z4vr59c>}9)bLBQ1koF~5Rj>9{leM5a@W}kT<#7+eIOB1yV>wuSauG#vw7gboR}JXm zY;5c@waUAkhY`Dc?76}nc$K%N%WW zpF;bH5?mjE%K8`@S(0*tIp?$(RasW`^QYcbvgA9M7@v>?fO-~4QNN3(fpq`$iw#7J zh?(Q#n4X;`p<%Yr?gH}EsnSg>hI@g9DjQ}Eo@gKjQ$hmnQfHpr%KG0%XH^Ej;=tud z`0Ow0Ovn%PrxQf63*LC`X9Y?P9vAn^V|2#eWtS#J1@ZLI>^`Bw_=*E7VSe$o&~q7u z5`}u5Bo2iiL((JU5V82!b^10pO&U7r2G<@XMgHwP?q`gyg#|lFTvQS?j2<0v z3};NT*YCPj4a1(S^BChSeq%XZp?(Ik;ERrbP!b@3ql=>CgJ^3|eTSwT<@3V6YEK70 zi!af6vG!7Rsd7l0=l}J4@1!1Z%KMh2p@sD@th?HTc<1p>;(be^E8xOH5fiUHi6i`P z@Bz$Qt-vh7_O>=A!0hEeJ~zk1#dUtV(Km7|LvQwy9X367cD_A9Oil6EUuNt|ra8QT zPoCHN_+VZ#jeP<=?vBHIrq4J%nNry3@4U_T{y^-yKA7$$YS+W2*efv+w$x2di~di| z0tdV&0I{nit0Y(#bO$g)mDECW6%LKFe&IEX1C8>YhhlzLvl{^UaIfXv7L|S6E~;$= ze#v_^elB@T@TSD~Nqw#sV$9#UyI}9-xtF2+#-_CAx%Y~glw!tF<7;;I6JWnBurr>U zZ=0}OUy%t72Yfe*>*`YR)jbbc4WehD(%U%_wfr096RAl_k#1>G3{d;`+ZQDXw$8v4 z5OL81nb8+zl$G}aPHadPMtMvYK6p$H+VKTi>qlbV=l}?r$WhW%2jidwH zJRpvB3*|qTrwyf~j_}_u|4}zthI^GxcZ?@GJV@RO3oxUff}vP#?p~hMCtdW;0-y`q zISHr0+Q}tBs3d55I>BtjV%M=`dLmoU^=t)d z!AxRjYYRJjIG8JnERe?z8Qk38d%TYQMKci!M8OxV9KPDkz<6pz^x^SpYw_euBjF$} z#t5M?i|LiH@*d;jVg`8~jG8wGktioVYB=;(V$gPvR=|Ei2+I{0O;5MEcJd2qjIS)S zR9WTXMezJf1IWos0j3cNB>)X1|x{fem)Xrt^$T=8p_Jtvoa%3BafPmSJ?yvM#BvCTVF9d@T>xox=HZ5@m8|J+tViMt zqwc7rpe*`JwftTpOR?(H^vlyg?LS5o?_jbYM4fhK;{iV>KCWP1c}Z?&=G`l{9XC~3 z*N(zky!OCagTZ4v-s9{`*idca3cwa%@3H^zmz_4#o-hDQNLW^NGxd;?+;f1wt8HfOo+_~-p((t0XOvW>zPL4ITM!5R3tIc*xlXIecv+Y#qI9wb3t69++c|m| zSWfdPg1W7hRfwxwITS*TlTwlWe%$R~K;xenB}de< zU^#1eW(P2_taVUOba=4lg0X6BhqJ^)NT~QdCBw!iHn&A1{k8S=(O({~E|VupM`dnt z-o(}dW{|AIF|6yp3mm2mL;7jh@MTwII4&)pQ+zIvYJ`Ph$UZ&ua!QWsF1!%qd?mUy zInk4c2546%Mu>7>ULq_C7b?F0WE^{TuaFAVLiQ&U(ZG%XP8kJ-@fZJWffhts6(2t) zDXn-Qx$_uK=bFEJqL~G^iyut0?uj#erUW^olVJ*Dj-<#0q0EAoK=DO`Y$UQj0#MW+ z>%IpB<8aWPsukW%FN@&8G4PYv23GJ}fI%p~g+REdF=(VAIj@Q5F-^PeUt|A5u}8XI z>ifw2*hS}f_J2EO{0zCT8_@C1EgIC*T&J5vm0?@hkmzwXt{lpkESN{B4`vm=HQLog z4Gkd`XET1fh^AmHQ1eM&jlUgG%QXmU>H`} zV{S~y@zdRjq6dYg+y3LBkOfr@nm1O%bMh@Qxx|v9hUI5zjIUDoT(=(NoU{;rpAp&u zs0eY$+&7Fjw6&ZXv~B8Pi}qI=GhV_pt0JDxem)oT`;((}ZjVvF z;`(Eq>JkalsEF}qbi-Mf0k%_SgYE* zNuNxVA^R*jzbd~Eyspue#k5el&;uB69*`x^#3ud{>%rlzqohjDlP?9d>&I9tuBqB; z0xywThBQ2@DP=0@Gat!Qew3+1@@DKbrc+s%2(mwfdf5&wWE{`OT4*d-LgPmbmY0&; z#nTyOh$%hIs3WJSEp=Qgt9){;jhFa&xarm6AU|*Onm^=N$ee%fU^pdo+ zv$RQ57HsPV=-{#zw%~LBL9T28^BLN$mp+gjfhKyq*WdP_ zS@&RDqirb%Vc%Cd-<3H!;JidzWF&1k!g0q~LMs`efK4y!`vzTzyk$d>2a|wal5aOr z#pO~rJl94=s_28@!g&PGv_Hb#=E|@Nw~~@$HFU_^7adfNX%9o6);#Y)E$G%`rl#*uO$+{XtPHU2b8yP~K7?Ijzl?_IQ*m}D2TmcXY=j&coY zrswnw{VGHj(EeHpn4$d6WA*u;7oYH$tUwL^_C;i$nX)M8npq=%R%)}68~L$b;_nq zxof)6uoCI(6}X|kLt|Z(!2md87W`*k9mmGR$>~U^do%D;@3w#M4BBga3q&DYc}{2!~2#ysT11=DI&MOPuMruWg$UX?#Y zF{c*KS%jqgz(5v05sT#(YBGB0FR}BTefs4FPe)fbhDM~VSntELB67fi28?nOj*w zUE2NkR^l@p@l}!}9m*j%3(k1aHWX(roL(zzM(ytBSaX-OzEHYvx#T|Y?juC8f{J#n z(nmgtEhMc*u+P=nel{;rXF6UjwZUr|87|r^*ghZ@{!n&W(~U&z_BE%n@veR~or(ls zOTJK1y!3JhJrVhz7GN4K35b*tfsbF(%|vhSX4T6BbDA3M=PV~{YL^5N0uY_&8?W@{ z*`FfLYb_(fUNV(tp-dJTEo=`t)XR(1*_x5dm!>iugOS+=AvWS(c_{0+{Al4l2}}-p zwE?m394n22T8~r*TH(B@YewmXFXWWaK)V`}v@@W99w|18cFG#8ykLQzO(xoz7oX`F zlW*U8qh9sk8q-XYJ;AC{Z=0Sz$OhrEztSp1wX;;Spgs+dCmIXH+r{!ncSLguCL%+f z)v8Fr?ZE||()_}C$=)w4ReN}@9t=f968|S6p{roOvzQ%&2p^3{TL}l0h*A&QYm@H? z?V0+oHHhez=|Wq-tH#}hVOcMBDxY8km0-FBK9qQLawNhVHtaoVh1 z*IxX~Z)NKGpoYQ%l#%lT5jPajb5O7XC^$pEY{$!ox!zn;_Oci)MVzju+SxX3<;*Xn z69n9U%{-VF$|P-wZjX|&q=24=YOE@XhOq`cLQgf5c+csbxtli&kx+~KQm{B10)oPi z`xfmYq+Ua%>ophX=(nFcC~(a)J@z+^G3q2{8rp91vj&4hX33@`SE0N`BQ3d7FYd|G zuLEznl&j&s@PsFN~m9-mw^i0B88QC|-24%BSRh^ZIfZ zf+Et+$KRrd4nln#4RZ2*ZQe{GD#-CUwOo0lyBQZ){{6rytezB^;F{uI0slF8G^K@EeVOmB?%1c0XA1GuAj~cgK_Mn=sq@i|uoX^ZUSuWlYHq9gcSZ(B+*vD1NVKtL(0{vU~ECMq^ zHjQDvG1(%*g#w$M55{JMokth9?C}0)m1Rl#~nZ%ufpJ2L=XWVq$n! zvH!PL|F$9U`n#aMf<1X1jNjvEjQ$d(3I)4L*$KS^1BoKkL(~%AY=NL~#bs@xBh#z4TARpz4HLele zCfa9!a|&@2h(SzwMt=`I_SJHibS2)W0on@jCmaD3IY4Ojehzj8D%U(o+b3Q2+%fgj z*Xkgy94NEI;^NV-NunQPq3~dCgNTe24UUX^2rl5CpkA&#B1S(LZy?2j7TocjRR8J? zLg5-AY`$mx8ZDZvS4I&NOf!XN1@j$>C!#!WNpnQmh;|D50tzyjLJ1y1uch`h&7yyJ zoBT?^=wBfWbOyx)k>2(0g&pgO+3%RwQ7jZW(JBnIf5T3y6H z|7)n9-&w7Lb}1oKUT7K=COfNea6;w$U;yi*mvPrcp}3}5m$mJY%QZI7D8g=pv`iv< zJB!~vn$FJnsz|{xseAo*YriZf^XmK0950K8ue{Bs1j7myMl^Vzd(%}v=D(uosN=oQe0a53p+YA*nfJq*uWuKI#V(j!M zAj}I~pm)JaYULW04(k=s?G>2+xR(E}{Xf_4^eyO|X23(#(dZpOrt%j!*cMgH zVTpxZON#Uu2Nqzi0bDW^UbH2cyO3)GXdh^Q$oO=a_Y8`FoAj*@Q2!M+2&M-v{;O%v zLx);m4_Xa$8lXaku>`}&sOJKwRGlZTs}kcE)Z4?K!#4FE9gyykAF6sx$^FMtO02=+ zK+AqL-yPnCX#h^3aAINo-rYfiuT|us0FR(dJsf54kvHHo^j^p!H}foA58?yq6yYY! zB9w2BQq{P*Fk*X9co6$V+jn{ypF%en1t9ZlRuGmHur{LV7?Pyocv#&#GiW2|MBwy} z09v}ooD5kDwgEnRD9I3VutXO;MrgE<`W%Uo>|^@^^m{mFjR9JWe>aFLJa@xoE%>ks zwXyYRgveYSQTk_xdq$!2U*%6hBXW~@h0jB@!YFCke~NR(*<{9lr3T)j@ zp=K6x*9kn?41D?Sa<~#NsB%mjth)GQT*2?P4i1R3sOhdMBn<{w0b) zy$e0Kgk^TKnNo-jg%rI8>7fpE)-DmR-qE)*&+GIkF&AY0v%=tqa}%!Y24@9Q`HgSaxcZSL zBG)&e4ldy)&b>YpD&cCEKLLQB;dy-9X>(dLGdk09(SCU>_Af31Y8V|(L;+9`o}DH9 z{7!JR?tCLJ?3*c!MJ`~qi_;IsWAxFW_2Gvk47wzI*q#!EUm^6TC4zee)32G9nhT=V zqmK}5g5g%7KMD%6JDybB+|3Z?yG!5NI{NEHoZtuT*kT5;XNL#;(v4GaKpR=1lrls( zKHt)Nc<^`+QvZm^p>_7IFDWIw;PB!CIsL2u2{gW5(`tBgpm(<<^k5p{)YP}kN%s_V z*fWqd(Ax+i@s7Nts;#kg0=}DC@@S}##z!D~nZK8 z0cq#NB`hYKAH1PaKDRDYCS=;Zn6l$T4S0>$=L?5jVwa!h`I$QjSO+k=NVklIpD&Wk zZ?jlj^yB$%Pu`H9U;1ZhzftSAm5^UAud-aRjV3zmV{}FxIEFEIjwGKB`*|-?w#vpJ zq%SN=*B_gbMG0OW)Z;Zmi4F%G%!sVkM5TK=^d8{_l&N$-S+ zL(cngr28@^(&fr-nJ*V0rto5fVsIj zn>K`ZxwKbqRR1%gp8aNZv=PHbQVfPz|89k2A!IAmIg_phS+TSLyYd-SNEbGDO~=R0 zGqm!Qhtd=1mL55>+#+2_H2=>VZ_YM0HaIvq*rVxbX*#t(-zgiftgK*9$Y$04W`Tgr z+ltaf(RX_+7;LQ$H0@J46S>k_JZwR~o1z=LxYL<1uC~4fi}?7qwZmTy?R*R_VLKvN zc}8|Yz4IN$^SE60+B=-~pBzy48=x{M79trXY@f7lU#wpHYQkA)&2P5dM{Zx?o=D4o zIk6Uj^=atGa#P?*J2$`i%a8;OMB?!vj{h4c7nixPh-z&Tl55QL%-$8pP`l$-T> zFCO|yQ=0PJ?x(G4lh_T_)u4er-6I+5`hsVd|40H^&1zQmQz@@2@ z9mYr3_d4Uh^O94u+Avi!65=p%NaB8}MuR68YoeK+kk8l;`@X?^#duXI)rw=JbH=db zL4}_(pF2u(cvbwnPa1nCj;pi6FVfz@M9D?Yz*Kq#JL2koXk%}|=W?rv;o5q5RsQY% zg+SLu6UDRYun8+4NAq|iJaf&z+rITmhA}Sm&kOvXBi?aYjGd+HuK8wj%Tan`yV2vq zU8d_kiCMw5Nj{~69^d21B~zNlO=qKcN|zn?es8AiZl%u0WFi*<4%ev%S2PDRgu3#| zQLgerdDWCZr~)h`*s@qpSY!bT*t{`id4ScwZukGLKj;fqTxM`k#Pe11vqLcMo0)X| zFQN@Q5?4)rw~4>~tf*F-J$;1=NN`A&_>{_%Uh98b^MwGt;Ay?%da57%&GGXmg7Jlg zs`-Xe*Jfs+)s;K_#*M&u%=@Vr5L&hN-5&v@2Ob2p-rSE9j;3YI`dy#O$$Q}~TU%m6+|@`Wa)4}}rwA@IC!>LMobcu+P9%Mu3&(6%YRKH%Tvt@HshOsr^o_$n z?PsX{q9y$}#LrwHNZ|D2jh|joo~Ho4G$K62oJoP3g^P+$dqicHi?hL|Y{c)4i~PPk zK`q=?zq=|cF$JBNQjL4l#h!o_MmUsO+NNz6(ME5BO_=IvD5@KKk^=LBQrH^?-OO^% z?K%)qx#&ZV3LsA zdUAJr>up35S2+Ws7vlb}7z79-iw|~%7LroP`+zzB7hn0!yd4*m=lNRSJh z85PIAPj{>TDVBqr#w^%S>S(V}x40~CtLWpOfHXNWv?K8)(5c;dZpHdfQz1;o;ESt{ z_1_`NuI^Dpo_AtHre@#$uTU_;%@=)os*Wy86nb=PkL)RFu(MU|0*o|cPtS$~(tWp~c7yKEd5)($tGp0pGd z>4b|}a{?v!x<_mrrovze%Wq%g_(6SNx|5CNROA%Q%PNJ_Y}%f)x$II(=xx&Yp#yxR zOF5YeC4wW8>~xE>^fRAyCr-)_nO}JhY)b&CNk->}kgW(ghi7$Ahl>gY226{~+2(7) z{Syj-=j)C^=}lPJfrZ&+w!ar&!FC`EphHV>%=5ND??fW;e-as6i(lZg@i%PFD_H~! zpT2QJxMn2vxyi!2nk|m{`dRWMjx_Z5Pu94KAHz~__YVyN>)Y65F>5Ar9}VD}r(pDx z4qLu~2t_i`-JE|ERj~Fp#@(`Tr46<;+$tC~DHofAaZ5=v>;LZ2WQF$Mh~qA}v&F(W zSkLmA$zObBCJKrcm>Pe5{8^$}C4>G>Uyt8^w-gtHLchJVSa88vtaesV^ESg{P2kIK zdm(U7>F%i;Qost99-(nv@u}oJ-AVv_vHZFh53hcn-HgxQVcGh07Z)_+U&u?b?G_O_ zt6zy*J-Y{_$Z_!*uS5i$SMF#GO=eS*I~Otp*@q3ZZ|s^t)RoJPB=((8bm{{h4}05h z$n|^gtD-7=*Byk9ubRN*sr*bd=Ym~TVc~wPNgwZCSER4ex_(-wHDom%U(9$sZFVm4 zTklmB|4mh@2Kk>pnwnNz<2)2>qum`n6wTIr&j$_1C5$Nc54r%Alb(;7}t)|5UkJ7c+> z*43Dl2jU*fcw_B)5alT%dzf#^-OfWL&X{-E85#d-YUx#!5X&1(8x?^$U_!wWmi+oD zrI@a?DXJJRDm}DSplH;>e&Ity;5L`5D)lu7z(U z8jJaKb@+HnlNHE7yHT?L!`oG#P?ydUwp*y#=Xzi<;wCB7Wi-lBvvCW^IrzQmx^i;M z;I;D~!)?er_c}h(ohJ}=eypXL5Pje6<+i=DfBExyj%0-spksafaP}M_3+YAuPCD$R8TOd5F6iMkDR0~A9WQ!Mb2p@aS^hPgRvS9HJ#sm2$zHk*PPp*+pO6( zX-#+a%Atp;Sx*n)dw()nc8&)AwyS{GIJCQWFzEmIuNng-%1pj;b~DZe$yo>@Ow$|J zN7u)1Gx2Lfg5r?50%sq_js!|p$-5KM(rn$1;We*ozys2(e$-8a>L>yzR36; z$Ts_D8@6F1r&LKdJKHkZWw^4>b~@rj9hkHJV)9k6n{)V;+(idj(&TpVVibr|-<)PD zN&wk$#k!y?Jc%rw^l~?8KZ%%*t+%dVC2F-9pLcSqB1<%u`04NPc0FPJjB8lAba5PL zuu7oaxuyP?bGD7?p;X#{$0NYcLt9wA>2%6JL;T(u=<@gTFdoHQp6KH6;Bq@*S`}`! z5S6=(4P_4j|vXg->@H6M{B|LA;PA3A@w{4^Nac#|+w zr*|@Tw{aPzIIhF}n~js%e(r7+p)?hsMO5|WB%JSEP*bF26FSb@-?v}f_(28xjjt); z<;GyVMW?J9##BKVftFnZW$Y#y#bI}D>id9M7s5|%_l09uGZl3q^u`vQNO<0+dY*YD z4b&(_q5GTJUNEUkHaWdaAgc4xu9Bp<1RbZa`~JA+q5<5V<;@1i1c*kEm)qV?)0c@W zda?L`FO0O<%xu5-$hKs-sr&3L+b(~9g+aKt7O0SS%XTCnUp>5JGg;bjkkH{O%6}OX z9=~Z+GYE|0yQJM5svya4+Hko?VW<~KXy?)9=rf-=17AN0%(2a(%1Y@6Uhev5uesU@ z$tNxQr#08tv+^jVNEHhAUJ|nSf^J#Y+>r zp4gqn`B;Y09^x}nhu?@U)?4LyP?%0R4O}wYnjp2VTH852-9=Ubj+gQB;z2YA#a$}s z67}}=_59{5j&+}SFfg9Yy>o22@%#cQfI&DgtoAm?WTp86Z6^DDH6!N^4#76#zhSAL zLfND*)n`IlWw}G%1^9TK+i2i)n^65cWAi!+Qr07W;mP(>+~}sxhO`1bx+$KOzm`_t z52O`e%;nA~k%${(!&4-ZUE5MPHx*Z>gD@+rtKr4pdtsY|gl-7W?BGLu%vM}L^F z=-5~O2&DZtKeax`bKNwD1})KU^@@LOY$^u(Dr?f@-g9(+A=W7$O_HQB6uLfL=$}fEaZGMJ zpYwnIF^kqkGK;o6y=p!m?o5n3$J~WYu`%9n3BGIJ39K)a`Q+8sgh)Y-(0y78zA#r) zRhT-1vQHr*x$;x)JnysRaH!NJ;oDN*so+K);KO9I=-7XXu@6TN6)Z{987=hKP>u7pT;~8oLu~cvR77XB9#v1yvnD=qK(+R3 z!TIFm3lwhH;SMf|x5>0sTDAMzou%ek&$A~RB?=WncCZ9a$SM

RXH-P==5FUDmqgeZrLapE>`vHTxVy?FuRIT` zVAj?FX(-y1(_M(vn@LMgPqJgVc;ObmnY(Xi8-R-BoGxDnBi|Wee6B@yqa~CrDW3*@ zW75-;81`8WvtYsLzx=jaH5RCPQ>Nhww!V0!`34pAXQ|xs&t3<3)M0n~N`81~nw>`_ zA0s`PPHas4zQ;I0*VD@D>~kz5SN}+3Cf|$q+bw&3Hk4Vjw!7JY==cR#6Ya4Gzv1XF zzrO_2t%T}AAYDUMa{TU+5YuNC2a-1QtV;X6H3iK~7%4!i-BF|?r{3Zw!FOLFY~RY2 z(EV^>^z!Q3vChgQ&3ehacS~&}eP5A&{#>vO^u`^1d+I}5b8X&mHyP)YnaElO~&KG=}M;~AAb|E&*a;vn)O*lCixb7 zC{j?%;+?$eij1(HpeK2n^{nUB8#3pP%)+zBVfMf|XCm^dyzX*gOPTDUOKQIa`nyX) z`bqg!I$*bT&^IZ!ch3ngcBiUdT9y7tCbqzG2VOvR89z0ssSNR=A0|h>)t!5SH?1d9 z#K*L}D|$L2yj^Q7IL3}D`lzYZ^0uMHK~a&an5w~M&U)atP&umH8^w>JSD48@bwt=Q{7c@ly(wR)}>JyErncs)lWH`HUbUvOBeURE6r`u@sJ zcDDH|4VGwt$s_%;_0TPfVcv&PjQtM_fX8Z?Ezz%W4k51C>gKYWFFtPwwAOhV0OJ5~ zkwR{l&hkPDn$2htA7IaEZ5j-vwZw~(<;hD&eJ~s{ZnF}K3i{)0%f(yU zr%pk~OLfX-JSy9)ugGHWd2-vG^`ZH9SfI@Nw|8U*p#H=qbU+-J2<`P^ClibSEI$V) zMC~tnbO!qUnJd$7Cr65G3eSUM;jOUL+rQ1E3_fwASv-UgBM9*o4Jo6dq`#907{$QR zpJ|b{KU=V^4l(iZ$lfFE!(24HFyJqkPdtlS%({nW?KT{RebH?LhZ48FNU&QVb&$aayv7HLSP~3yOH`;QS_i9gfWF4%k0$q25S`x=j7uTh&4~Oo!7vD}c zjF%sO?a*rttp%5Rd)^vxNAh3NlCiVpcco6uWsP6K_8luT(9)D&+J*E^CkUtRQ4v+1zK<>9z5@0k|k*077iOY*tZgD%LkR5HnYQ; z0Aka0b#n&=H&BGtFiZShFZY#4%Xt%!rFC@wIceWst;@RHbG9E{t?s62c)L#AUpz>Z z)+Z!QUCuubwLK3ufzrZ1Mzx-u8W%YDosWeY`6vmWUOxKdsTUy3D2&pRT)r;3_HvYd zrKHhqtls;WTnV+IXu9}ljZV<)fRAK+jCry6UY?d47x zkxD?6KIz%<&lTH;eW3ua?Y;R~3}dm~5#bu~Y7|u*k++Mdp4-`pJ5<6V#$?4P>7Rtp z`&dAFF%tu~FIq7w7}p*#GHS*5RL#P}q8={l%Pk%)QV>bEv2f(T(MA>c2lLC9iZr_T zAHe6|64-RcFHPG~>3oh_T%+X?aNi#={(sSN${!n6HyK3^)SQi`W)53dt-3@DQ2SNU z?#(sx6Jt-qe%_llx4`{-fmNQg$7l700u40K2mW#VySpGTu5={wWOI>y3LZA%iU}7H zXBSM$fM_HMbEqcoHu#q-?cDLgu#Klz1J(e%sI2ft3(l%*zaOqRRZmA1H5nA`yut?> zo_Q0%-PhCh;y&d)LKlQJuc>Aau9diR+1S7k?Bmfkkj#uI%x_z4cE+W7`bi{~y@jdG z2}-oloCbcEiA|91ClDHb{06tEQIxahakgWD;vguW$_8z;jGDQ&`~&6JT^7emM?WYf z*jUIlGWWw-R^YnZ(dB}V9HRZ7yTv0y;j6h&dTjFs_~J8)9rl0tql?-Bdg zwtP#prHgLgfIL*4LSY&8Rrw&5b3x;dN`6?*Oz!;a1*WrHhIAb@Z=UjSkI8Uiw}Td> zW5rN=K>1Z#vEdQEz zO|NzRWFQ!opMxK;Ew-7MYVY$a8!;jgI>^1<1BW7;l4K)@y{JeEZ{b_m9h-c6v-bpY z3^JV4&ZjlHW^Hjvx8=qwhhCK5)>aFqnb`uSwhn3ygVxKhjJFO*G@jFF0$pZLZD`Pl z0JD9!RZezNb{u=GAZe}Hn=VGrHD)%i;Bs%_-(1l^iMDi-n;WJc)pK|(QeYfikgvgE zkPu2D@VRcX3cMbDO6WS_{hlsDA?D7T0HuU01Jz8jv;9ru3}nz(L&9dC;lC9^HT(J; zD-Ff_M&|ZX>)sFF+}b)@e#1Pg#7*2~79_SAg8xnNoh%?#)joLAA#6H_=Q^1g=iRF| z?NEOY3I{H1ESV$+B_8#e{aa>q<5N+5S}} zVD|D(a(H{-`mK#R(pvb8rll_6=3)VUK?plhSdYS^9-)85I)pU4-RK3E?c-eK5R+$0 z1MLrFQwv!5E)<-4`{QVqTBrdJ?cTo>5MMXolM42SDmd{(^mKaP_B~oQs9Gsn^(`H) zit@O-c@e#j7~h7KO-3v}zetG-D4MZrMS4UlYz+WIV-t9@z6t?HISSk3Ey*0*Xz!z5 zkvEg~m+h}7qK>#!aR4SPb1-ZtbnJxUZcUQ;s(SFSvi0(DAVzj4i{l%q)ym+@$Dkr& zNiWb+|Na2f#cj)hyQyM8jl0R*VgQGS?h=2d9>c3$!Opa&^1CZEcL^gf|KIisn; zzS8UTd@Tm-YtUHZ%3arR-%%&OxHM9fRGuZBxrA#X%`vXehJ*u1AqF&sh*;&z;>Q(hHAQ>eJ{_L6C$5%{)PcAuEy z;OqJzIFPQ%2y+bwv9s`ApqAFaDm&xr6rTGs8~h=aOz=3?u36@z?GG{r7*QKPtr!Ht zW4@b*6Mv5G@AqhmPePsVHrJ;`j++mZo2@Oi0XGX9KMNZ{pj0hB3ybh_xLs;~c>3$` zv=Kw8|3(>qlmOmBU8}p(*Y166AkJ{(r-Az|7_eecMe6R0%V)<3Kh&-6Y^rBHSkAk< zlziRK_P0^rD9CLU2Cz|WZ6>J=vhyY1TqBE|Z*ob4l!@)%oP7xgw_$!k_^50-9Um6b zO~P*u#Jgo9A!FOPCRrY<$D<@Wj1o{k+yW7jgm=zs>Q>$K9I zn}PSMt(t??6>76*U6jVHyuRI7lYwvN?TRf>ob^s{mD6QfRPqt90ut`maDRMs48mFX zk}t4Q`wMw_WWn*93BXx84tv4R%FR@+H00H!baKmwDEg)80<1B9w2h9L!f$r%o~z~a z`=$Rer0VoE(tAk8`J(E)aNiIZ6TT^_%v@d*9)DX3KD%f*X82WFC=h~zetgzOirn1)5$fQaFmsa&?y7EzI$LP)hyxBrSXnvaWD0e3 zRzN;z5SF0pmE&5eUoe}mr=z%v=P7$kT*M><4QBJGVCqWY2d~{7v%njFSTaf>dHO>0 z)eT?p(=sG22G!y7x;d3P7+ooVgAjxo6A7oZI96BobakG*i#thlvhFLj@9^S#gMaq+ zw{dlTw%R~2F?(1kVp*IT?#uVrT8m^Y3gH0o)?@cwE2qnl;yBJu$MXXYAX5J8G1m^q z^<`(kxK>$cKWb!b7}{xP&$}wRi}7~c(hwLwT7~k$zEB{;q@N<%S`-U-{$IiOpYqKQ zLxTREDKeh2&|wp)LQJUy%=r1JpL1r@TFBxbD5X_CwSASCDpqK8RdmIEVssq202fshT+h_P`~)EyB@9%IrcxXyIXC(7w4SahCFECM z)!tvnK3hm2XbT3JGTUGMmo?i4`j=x0b5i2xMojoABkbgDZXnBhti=_XbEh5~RtY(Gg&Rw}l? zIQE@METeMIFKcdz{DeL^lIk{8lvGUT<}L}}g%8BWa>0Bd@#>OYoEb;B=(y-R##d}j z=Dipm-#>FS6nrnOZe>SxOI<1AeLqxc>)TX;H>31)dUrIw4-Lgy*o&6+gviw9-hd8; zUYqXjT!#tX@Xye?>w8YM8Bf!7emFnt8XD@PruA{*5G`1n>hg|DNAc?lyc+d9zlH|i zRW{W7JCaPG?85?PR5)wt>0*)(!q;(~A`Amh@P(@UPjCYpe{!BbXLtC)Cfa$upAEmpKVj4U?{k$LPBxlQN{Zsn~cp#L9MU>f~SodUU$u^-t z!zzJ~iF4)>p^=ar%>%+TGRiD}BjJX}=I>%x=6n9ivY5631*+NcHwleaP zp)Ld2x2gL&2*x93eJxheezpOeQiFzZnah|Qf3nf8n6+ThBttY$x23fs2V5h=fzXtUWkRZ?Bsh zVyT|p3HCp3JRrx2gzl)~Sdys=G=`c?Wo!y}PZ){O>fFhk#(eN3NqaLkr zh|IZ1Gl#TBJa;-wHO%$>k?#6_G09?1$meIkEut>IsMT90tGv1(L4mO!rakzsTTwRR z^_7(vrIIt?=^mWGUI$fh;R<{#wuF)zADh-tg_|mf&P~JI--@h$1a&?_i2}i+c5tr*n%*&biHxhw2ub!H4a|_NHjuKOofYIYz-t(x-m01_Y z{tZ4`Om~}VSJxa|DTpMwEK}5tayj9? zjYM*VCZ-TRjZRH3%0x?}jK>I(tGHsUmAWFMeFNw!}Asc9V=IUNba->$!H8EX;G!^s?>@{*1FDS^b zJ8MjiDh{mucyje?)2ax#;>OMtJ@4+*XXQ7&@c6(`nKa z5XP@OImCX8H0<0M+Go)2PNDr)#9x@F_k8_Z37!ps3)J<-m>fYJmY=BI-C6nNyJwA* zc6Cx@luw3akdo}T@kw;LCMOEYtM%ONEV?0#>kpJ&T`_VDT1rfphyLA{kvg7MK3nM` z-3HAb7JN`C)pT`o*`F6hGhqz$%mR8CXc)+;qg-YiR7s9Xv$tuGdTkny5;ef_^b8xXJ&*tkr-n6zg= zEaY@C6%G=rh?Wwq*hzI29c(uFF?%)(EpL9@xeHytgtHCdp{$y~MBAZAxsyy@^0TnTC?llfM4h_4+7tQ1|ISSvs+2Gpm|L zTOLl$;h&FJNh$Y_;$rl$T3EEN&DGhee8D?JGGSs+4Gjbq4F|TaB7qEAc3k1O41tE2 zA7EU~qgOHmVIp>BlxA_>*JD-WLC-ayKn<~AXJlc)h>8y?-O)%&%$82h!9{m+-u}EU z?yFmp0p{FWfEiU2q^p4D9jC(rOavfYOK!Ffg&e?%gb~$Gmz{MB)%$8atPF78B6G}Q zLY~(?99D7eEtU8pgPf4$Ta{<37CCG(DYU6sz>j@P|08COoi#<~cgNDyc|zj6J4Dgi z0p=h3PAYz4x$BER_SelIDL7J@kbl#Mtv}+tm9LwL|L8K>YfbP*xMWlg>2Tp{U zcFK_y8L8j5ceJb=K~$%aoJ?&ggldXYez0JOOMnh0D7;E1AeJ-6@Pg|z879nT%}AD) zHovx%bHN(f?*g;L=GX_d)}h54IWDT`q6ih8V|ob&wCs8eY~y2VpPPtThE@&2@j75 zc=>{3gBG$CYiXX>4A!XCH|cPEsN%!8a~=0#dVOb8vJV3gx{!Q=fEe zOJx)zIB$A8@!Mx>d{k^AdDvt<{DBH9`uy0})t=ya%l4zY>qX5jEJ`6Gm<0cxXvJmw z4&_Xty!}^QecsC_D_wzc)@&(zUqTpjQvm_h1s>zL$i6;Jn!8TC*OW+^i&|n$eEkVd zi)~#4=b~il+C>A%C?;JMX*9)+i1?P*Acq<47*x{dDN_f^70dmz!NxY0^6~ElBkkJ6 zpMCb8h{%|EoAimS_7flJX)$T%Z}{WVS9CT|j&xuDj(#rRfSsZr3=odAtc^e(@}y~!Qs3G9>Rvl_i7cMoUze_5zIFXxc{~m5>i15O9X)%{TO}`fUZAoNY2!QqS+mumx8c$id zEDdbn&%IIq?~(5?g9i4LhOjI?B0;4MThx(ZAY!HeQK$QqTXT~TtNE%pu?EsLv-J7$ zi1W(Gvg~Ick+*w?_0&Pa>Kjw)&cV{$$(9?CN7PlSu|SqsF2H7MK9B}?IW-Mk8Ft(>(N9=hc>qOfdb>Q}cSJetfcyZ2gU%X)(kl2Y zqjM6FE2Ux))3o7reIcG3mm;B~+0?Ig2tg+dzWspD`=v3xo5eVo9NBNdfe(B)K?vfj zLXCvy&kDe-xs3OeDFS+O@j+O+uH-2@k`uRHw_DD&*E=bdp!uGn;SdgZXc4fbCXb8j2t zYxWEhhArW1w0$_G7b;I6fh}n|G$HjD_K_p}=JMK8u?(I>Ssc8nZ?`cUsRR|;YF1hS zn+;k44%g46{YO>dI5tb;ojZ*(nzdWMF&6iKg09&z3oDFQJoe7QH6Aqu3fOQLUcmfH z4{fFEoDeE)lZEY^z||3o2ipFxvith!^Iez5#7v_Ofj5rHzS(j$g!gs#1g7m%*~KeX zJL}sYnDEg7q7;(GiB_E$l!-ZeuHA>ruRd5;ECOtd)I>sYg0{H{m!er};~l#<-xg1g z{Ds*Z{v77mMMos7$25ZBk8!(zB2X)(r7c*e3WH)+0ZFmSzFGyH_zx@b<6<5-fuCE@ z&csfu+xL7yMkFs?Zj_`l50@9OJKR*~e$~m(*c6AX^vFR2t%$uH8dvh;C#W6^`VV zu@kiRz25<)SNj#|%D)yV;FCkKCW3>PdX8!7D$@%$C1RhUbZ@Gqk=?|J#dt!)fW`1@jaxR%Di0U zEIVNo$Uofsy1MdQa0OX4$eI)@@<}pJ4sPFF}%vh>ywW`Js0qMh)Lsx@-vM&z&gvIW!kG% z?RU%sxu5+ZB{gc>WPvC`qe~0N+S9t85QBjfEfhz^Mk}LOe=sQ~Lfz4|)!=qRz+>zG zJhhQBNfLirx=}NHA2yAho|{(OR*NPyIQvNZFwh5Rn81omN)Z0#CNR83^2m;(DQioS7T@4y8=BvR=S(Z65=cR z#Aco{3uMf^Z2N?6j?ci2VIr9|ufN`oXM4y2Mhm(-xAuzgsDOFe3Q zPpd?Ri#jxboj&cG4M%fb&tbAr;cm=fpWKZAnm54tGs>EM$$Mp_slbL^+N6KY$Qho3 zg$q6fg?KBhz%KgZ70r?$S@CVvbkrCDIXCh78g_EP$L)k1z))kX@sjayNHYfAA%1CXHp%m0o{Y1; zXIHe;U#dR?@49)}dkq>dYG$G5lVu(Ke(n`gJs9dQ>4e3zM9sMAc)gffE65 zKnAhwMM!*TaKC{NE>O24T#;_?H`sujr%$E}Ka0Q^Db2+lw|&DU+2^;@$96{{)u{Q{badkJ z`7)CNX@|l8#w)wbWYy9HW;fV&xQrieUa(^~5C8mLSers6`D>i=vRF6c zba7aYQF2_6!8Xhsbx{(V3u@sSnQ?&k|v6SDD{KJdo;0`d~)D zi$-ko@!pzNxsiK@UZC1%O!iG{WUaLw8wn^#{A$avO0Rv<8UIgYq{dMc@C|M+jgReB zEL>v0Q+ej+T!USS^Q{^X1ShU)sF*sjQ>Za`3hUS1)lfcr|8tt?e^>xD5f-?%1aZU1 zIoz)M(+Zs>ZN8!Nouf2$V;FXoEjMJQ$T5tcsFxoQ>Q3~Z_RpI=@ug;GAbBHl!h*hD zk8yt7wF}8#nIzy-Q~yB{H3Pib%6eP%M(*bXT%dgc#w?J1IRg?>#czLNpT#sQmBVT8 z5=YZ6^Zu56<-8;&e4i?ptXeI71#5GZigO5lTW+6hp^fGSHYZQiTLgS?(Ffq@Y3YNp z1@4wi&k1Dx?4>!`2;@(cKj@mwEq&wikYN0gR0J&(M=d45dvx0d4gfF+jU>je-*#b| z>p28C+AZ%qviYe@N{7zP z8}khvBl$~&j5zJNp3C2t7VT{;ZsWhvP>{D}B}8JJbCKjl%Z#hrxQnlHGiDd(o~~Ji zwr0kn04jyt{7lu=p8nkBCnxr?jxSej11|0d{Tb7lpm7PsW+{vI8QQLSYOG43V4h$I=!Tl2l98XI zlK={o>H~mdQrR~K~XQ?%}l`60*bCIFT;O92G|P)srkAT)2mKs-dIr&4h&zILH>A| znu#K%lPP~XibaGf#y&M3 zYVzsq%E`$o5(tJcy>|AEUVrT{S4qN62lIAr8wWsWY3ypn_}-`?tG6cl;RAM(aiQ-V8P>3H1zuLbFSqw!_IauVHLo zSygshtzNW1tp#UvCOku4@755RaX~-1lgeFg91HId^S}P%1k=O#pX5Bgl~GcMKXAh4 zUe5N17?a&$#EpoA!TcbHmi1*I$>pUM3`NhajOE}-{MbY+24b`o}&Gjcg}r?5>c84ceTM;?bR_r}0I99L1H+*4JT%>16^} z$zmVjQz2JRKQ`(irGa%U?5rn$&kcKy57J@Q2B^-l(U}K@PvHr(kjXm)LK&?Qs4#IxtHsCys#<6ki z2APHS6pQ?)5p=9`?JnVMWT83Q;YJRXDafK*1_LPVnBnNod0HLxPB6_~RoYklbc3yJ zC8c=9kA^=#T{V74)ApUv6EHHjSRA(`lRqAnjWJkX&XuTEH#`^i+*Z<2$SW*uwCv}` zB6nR3e5~TXVBPYY^?>nJeB@%fgA*s*a>#@RzG+~4+*NzPSfXXM3vuxmXuRM=`GY~7 z9fk%U;ebL&&pB6{{9CFWs-i*s`O4U2-$bEFdb6SDM)uQ&zF!4LqD`?y2k5=EB37qloQ=A-!DS;xF=Acdj8as z152%(#;)E#Yyv^K>ZnToH&@cZTcYm1wH58k*rp>cX@lqZePI~!Z~MsUYZJ3zlZQqd z2hGBHMd#x@DB)akB%PfM9J1ncYTcUZI6pkN6N5zS-8_qqIB}Og=+KldI?an>m49^&|g95oa)+bWzNhggN=)JQK`AQ zVs?~_ zrM=YZYoif*bw40TwvC5Bs8$DQ94AB#QdY~+#eUl3%1o)!gPNQ?SVlUGIaB0Dey23Q znU(IH$K9TlE^||Jb#{TmfOb@afiBbYPq3R0m}X3qz?Ek@n@2FB4N5|nbtXiOc#-D| z566CAlgfhmqrVmD?fXLZazHYiu;k@(Yk;DDYEa9@dSsne$h4w&puu=uyi3FS2Jjo# z>|%{@m`Vf`shFfbE5HI7t7kopGrm4;2agbM$*N41Vw}utV&col_f_cztKAtP6F{}` zqCXt|vzirWiPnC!b6w`br@GlpeY;8na0~(YQ{~PpZ&X%*=gv#OaJaJ+UTMK2X){gO)dYr|8+(UFeXbr~4;^$PlECt^uxvI5 zYs#P}r|!7V2*$cqCWz|+*~MNEu2gq)v#DBz&0Lh9bPj0+BgG$gnf;TUZ}mGl9w8ZR zp(j~j26r{NL@g#AF}E8A^Rf$N=vUdp?wRe=`~HhWxLSN{HBufL)@0CIQ+9sT(+#Cz zKXP-6)P>V1Qw#4*Yd7u7TJ7*b*^OYPE8$1DI1_({v@Do-EFr9#;|^KA7f@D2?PEs( zb|MtB+N=_jN2vTXk}9T=G>VG9oSy3FsGF9>FtPIr}w+JN5|N`zqN-oTZ?VnGYzY(bie@Z&Us48 zN>e}hc)}85xU$^hjx8Cz$NI&Chf*RlDdmkf?H_3`*8?Jay?J(-M4tV!Oi~(ocB3t2 zDgBKEKWCoTrTC*dzg=pQBbJ7OpIMzozuOylEy#aTjqScd{OK9htBMQt`LCw?B;SwG zCkw3)oq*q8VH4(y^UNjWrz=hgH} zCuw&p$%R|zUKY6z7mIq%D71V;wPpWQP@Y!9{RXiFk?0@J*vUfg{QcMG9X!xN5$pJo zwusWH`n=pvPM5tq#P5Ars$7jXBP#QJZD*X{z{EPe9GpjTysvsq&mtFrgt`p=)ihSm z+|xPsIRa_r{%V^z{B>VR(es^w=w3=KdobU{VLsVr(*0FR zsrqM!%z4dDcl?gCeR+{KJ0E)}jWh7>Iu$fiymhY?ssWqGamCRFQG){2Aw(bMUs6`H z?w^W`P^Rg{&5gfGU(HPBUNtXC&MH{*e2H4ChWDtTf48xm0LOren_Ic2Spm@(+c}|q znfE@bofNrY+)s-0=yVZ%eE+<$VYoVI95VoW^HQG+XG2aR6qH#xSPlQvnDw2 zV2~vh`$oaGRFlDXI+l#;^X1$eyN)wQT*7k|=kTH@EEo22ZG4^5dUG#H1^l#&=ojWk zSb$l)AX3{G0T>l8^l1_>Rqw=O=yyT&PHwP#X~;0RohH`b9QHlf2^T@VBwcPalIU=` z*y$>cuSMEv80h9>oW&Qu4hFe|%l{^BIXUX5)5|I=;~C%NL14bG9Z7X0g%>=Td5G@R z)medi5Yp-}z0ol66*2tvN%XVn-OP6Q%k6Ha^H_!RiHlz(*rYA$*&8vcfYy5TDhH!1 zV|3xuTie}z@YU({Y4w=o^ZlV+X;z!J`+8QHIV#D+=G_98qY}!CX2Wrt+IB%%q4%lM z*zta&&pkH4yw<|zws15_z3YI8VZiP@8-vV&5DD)$KixYr;HF_K!p(KQ#5B}N#Og8B zo}i2Fx!p0M6LnJAs&SsspQfq~YTmnV zMBhqM==y%RW6e0XlP<)dORXN3EkXpU8}b`j22<)*nW+Pe>I_|2THB(BRr7t~&1!e+ zmA>lAMZ-KbA&Z+k5oQK}4a7;g)AbggpEnxIsydHPajy*2r>$(pF*4sdFF8csU-|M- z1db033=gZ)Crn(~JQ6eDqzsv5jXU563L9$iPPDE@ zhiO0eujf9MVK9J1ks^Re2_%X`D{Gl#17S;f*JmpNDTEYr7xgQa%UfPui6gILMdu?V z{^~1pHSwf;y|*mz5lg{IsfH=d#gBBFWB*4(Gx(1ys2< zqRQ4gKT%eZM5$1fXeL7-@ExNR(`dNSa_dlKEpVbVT*eYs8~_B<`YUv%v6N6PFE@Km zOp_n3oIVS93aE^gtURMQ&^-1=x{xlF*pcr>)YTMbfWPutW9z8o_|eNZdo$#mSzWv~ z)*YG}n40*`8bQD7qp2F~PvgJ1YPUSBg_BnByV;piyBvn%%f-dD%bx?TFj#nedj0;* zMb7hUCvMna@qA>SlafO&5F7ry8`kf-6TRrT1#X*DtATfnjUV4QQ;Hg|URhZ+&*qg` z9W4mBb7KC`X>v~#=8sSUQ)kfUR%)^SA+ z4J#CP5I%E>&O?(cLzPPd8W7nbA0V9D?)H%1aqp422C%=cNE#oM2AFfAs? zD;hzzi^v_-i<+*s)48$L=b>Xz*BYuT5TBi?bL^|H)|d3vmkMek$DWc{MRS?>W>Hsf zOU&}J*bj8MS)OdZMdtIZ=HmCN>L!%ZL7DoO7+>-4fAJnvznV}vtOSv4WhUlO>hCj%)70Emr1aD z>FQ1D5_hzc#`k?_(!GWji;3%7+YMBr-@vnY}`Dr#?u zR#OynBn0Cc%bbKC$Ea)1;>sf|jb&M+Px$1Zq1l$HQ>s%*sDeG($5|z4V(jm{8_45z zT8lU}gV!T#eBVa>MOL=b?^8X#yA5m9p1UzZt*MNsbS~E#J%P&2iL}t@OBwCv#i-+D z)2Wm;Yf}AIcNP~9u`*<^bt^uS_gexW@;_5__DG?lTHY&UqN1ZS(|gb?!JaB;o;l1*vvW2>Ue|0h}0fHjD;Cqs~Reqsg5?G zOl+?60{!` z;TW^c#;#wG2fIgWsWub+5dbrD4!ron$91W%QgoXx6x#fDjEB zJUXJ_`8l<^@2tYH(%yQfEhsDPlm4}5hCp^*-ts|(f>NHlsPr>ZD=SmNN~?@# zTyO7Iy{@|)IX6-cPfTpAa;MKz>IuB)40QU~h5VwC-zoc736KC0s^PNBmBPbg_b3T3 z`B%0pqr#KluenDc*Cc#)Ux^W;^xt&&EN;Z@a|w9_VtQC?NpygBSWdb=lfzdnS2NTW zwgm_amA5xP)XuJo;a}lhpu*Q+!PJ3{!luFAZGU&)!#iO2v^@W6IO}reBKpKm;=~Tp z(zO3lIw{FEck=4EWM8;Ld8bD)I;w}L=@TiJ^f!7 zOIyy={=sYobnXsAp{FT<1alIP&Fs-&0s{a<QSb?z$;fj0 z+gV7_LR|kB_QH(o<-scJ1Edd;S|R+7d9|W~gIQleiPc|1@A0l`l|F|S7RVlI^WFC< zl{<#&&~A%y-PY}y%{4#lsbFJcZ=RkSOo2ZXYa1IIuSyBHL|@y`c&5^llO%J!Sl8W) zL>wLnxqQ1FhtK|ji@PN??ZrSWiSiB(2y0eZ;w~4&($Jg|lSIk^{Z@7V>U!H#X46W% zH)VElcV=*j{-QRu^5O4p0whna#&V$bhQ1YkT+pGa-0cRM_^XpJq-C`!@K+0o3K5kl zy{}?g@U=e<J31XwW89Sd?Dg-YvS7 z(4P80b6cC|*RQrqjdlyQX38Zv0C2@8WhHp49$Ees`o_pv|JkxqpMtr@v4YYV2B^N5 zAQ|75!BKBGc7Fvx?>GQZiA}-5CMvN0voxQ3@!o4c_EGek7}GoW4|mYXyeIvkSWs3^3Q|Q9dTGo@~_`yM+>R0~+0Q0)& z1sYogpH`%}<~512r7cenEV~hmO;ws2;Sn>RzhOc?2L%Vuy5M7$R)Ut)&wNCa5TX1R)j_vl z+!w~&@81o%vd=FpWY%lt#vzjVAJ3G6WxpG(1IQ?30 zyE2r4$Z4V|^tz3p!o7w|B9XvDRZ;g19PwN?{?Q4yS%`{Q-*U=+ji zx>axxK8Gef+_}{%@2kNF-3Qm6tkl?T&Mm1GC5EG*(07C<*Sef85B{(l=ZN4xUiIc8 zxKJi<@x0wrdvV-K*b3BL!^_~Ozh2CJ6PgH9tm(mbW7J*-N@5}>#7 zm0?vBeN4N3E8G7?@4$0Y27$9yy?R9<4ARi|fp-WQ{b$RKgsJ8P0C&iM35*@S|w&C_;vI+4&1b zyA*9rn1`}wEG-S4?*+f+@I&$83l8*!7B3Cj=JAekHZ9+R!a))7KZZYE^!dMIJ>PBU zK%$3-2Z#CUk9iZE*3-1Ctcn%t9K!?7@1~$>wNd}%%i9aK-rGFre1MyNSA)av;(KwI zJs2XgS;F8#&u@yjjrJ?8UiTO6?XW+79gJuzYG2*?99DU!C&znZ?>rIGRqbQF3#5OK zlk(f_D|+x>{QAPr!J$&3Dlt_B?Ey;5&O$djb=cqmPAo(tr+^Rwy#26bOWBE~*DQ3V z^Ae)M1KEisWl<5{4A7D2Qp30JRc?>VG@|C`XKF&aL-_ivVFA>2otCArt(N_LYXra3 zqbAeF>lYGnpJX%85(qmhP+~M$%q$vRUv{uVP z3Q-vQn}SyQ5MwiyfgcPC-i$vIJD}Oj7T$%l(o2)ed|Z!i%qVv;L=^9j+O_SE0$rL5 z&9lZ-x75nP7Bj!(-wyetmzXfx9`JwssGF&(F;h6Ch?6vipk1ZXxm)c8wR>`9D}Xa; zWJeiiX?gj`bu5n+j`Ke!EeCY8+~T}Dnh_^mfZ9X#{{5lL3^?#!za}!swn0}8j@qfz zj9wwm=!!#hPsR057YwSO1bHVMB6_==E9sieQCNa^dUFfDISUjM{?83FZV`zY)@O_@ zYMJW80*%ATLMHYGoiE;0j<+TfYP`p_K`RKwmI@kC_WIEPrjt=p)Q7BoahLGWvzq^R zfsYSnbJ4I_n3V~TfN@)fBy@fG1uVXWdrm)j!@1ryeIuXaemmEDc>BTbWSw%=DZ`VQ z_8^ndW69SUcu-zcrOEXL0dKKJgohx+F0tjsx{wm0G7eQC?ERd2bR9_LaJ;KQ%0VZ7DP9-G^MX!3Hm)>D=v~ z(hR9WJFg4dk;BQ#z@%%Kg- z)eKbJiX7$tFK8m5uB@#5d@K0|nwqp_xRuZ3#u-tJmw$tsT3NCWO@fT4Nz2GAwSi8P z*-WfkDUraFt6gO5fBgH0bLBi&&)%G-I3O-1gL+T!%eOS0`GEOPe3Yy;m51}iKaM81rY7hqX41G1W`_F~_}O(! zd75k{TcSSnaO~D$-%#NZeScT+VX4}LCpORofAf{=^h=z2)>l>zgmF#9l4{CSEzm|l9FRI)_P7+ zKM?$Ia0R7Ru`K4yeOiZC9JLTQB+17l9+UqN^KWR*O+{1ly@|^yEahP09B5HGBc^CJ4w#wk8e}>R_L)iH;cN8yp3q0hzoS5%9hCBd`w~2yntW7LzXxOodh-*H+ zi;5gp6lUUbI=8nqxsnZnj>j&6C$IGE0zGlx@f39y4KI_O)?F-5^1nn8IF-k!WlU<& z9dw@V9L<*d$AyQCM6+T7w51#lV^JkXV6erBRH=H^cPX&&)KaZInSi6-`D%Y9&W&ch zL!Co!xg*0U0}w`HVY_xIa%%ynt+^hK=nFb~x`u&X(hyqAx6>TtWtGtX8Ryc+ADYab z{Utr!Us!lUql=DW36`K|_&kO)WErM6s{&RZaIUWXr z*CN5@!S;Ev-P4Dt)6KVA)$=poKh74N@Om%Q`$p$@gK1qMGuh#6cJI-+@xRPN^Hn5i zv*AUzr7C1tCn8`{00Yn`Q_vZK>0e zmg*f7pzvWfz~tEF0SvKV8NpkVXMIWfY+B1E4?85#SOk8d2!VwU{sYo_Ci>lui`KRE z`IWtKdFRtte?Wz21 zbciP$y}e~QWinX zwj4JR{1G5{09EL?h!~20;9gDo^!F%;Rz<@2gcr(bNO$RlYy*Hc(hOaCU#M zR6Tc|1TV*xzXuUDz&yM>$ktHMo47X4zgL|i4Ug>lYl{UteBmsA!Ka?8a@Cx&QdbE) z(l>MM1Na%o=p@)8FJ4G+UUgA*FFdUV&hHs=6HGsmthTv|eG-NT#3OzAo81Gk0l>sB z#aA~283_E5p0VO%BfB7|Y@hdDj%Z?aWX7L*M6TJx>oRsqD(s@)wNlJNmVE8+&kGXq zO=&)zter235V9P>ww&$>M^W>D$Z#|S-evXY%XL~g2j6km;O59{OPj{vZRanGG}%AZoY8|||5@EqtXI<|HeL@iE<8}<^bY(#KKC#DQ<&)u z#Ri=$hq8_DnP3tS@H%evHd%e7;%hkJY4SSBS*yaIyRwAt4r{QT%GwYD5|F2|CE^jE znODhf2&vfTi|l+n?k#RLgP9Y1MkRvubkK)U(1u4%&!SuMk;00!8v-u73|v`KZ~hQ?z`A3teS63= zPM`=*T}b_G*qZD7{MywF-L#9tXcS?Cb1U3FaRKJNgDTH^a=G%Qu{FV=Hrk8n#7!;! zZ;@qb4#XIUH<8P=QH~*YdR&!8_Giyvn1u)}ruPb+>SX8zFym^aUW$E@yN~xhUx&6- zEE*-Zv59qfve21Uf(E)pMyD75Iql?r3)MdhhTnCqq3YMBy*KS=#bp7XP3}Qh#-uN`@l37uUb1rYx1IS~(7DU}@%Hw~~z| zL!QRyzV_Q&t2KTIN?g84;ZCd0^_?hGd!b?N;P5%pbony!jW-ylk4D;=fv!S*)Rdz0 z{B&km*hh@`Slg$q+X+t1;a7D>gEXwhSf)D^xf}^nt%9s)tFczNnS@!)=;E+@Vy%qr z_(}ZIethAe4G>fT^lj4rPFDlb1{M1h-M4jMcnSy4hu(sm&1W1GwC*ed49ZSPbr6L&z{3g*%uz~FV}=2glfh!vLn6EYKXW*m%rO5Ae3d) z7QSO?!dp-8+r{_t@<-dV?r7y0f8a>7Pi~$T;O+D7FuV1idbhorj!n)!1mzdv9n2qw z*sr##_kw2MBV=_rvR#-fO4k%lPFf?Zm2Qs07YJM(>|YNQ+L{}C*QaB&%d0T1FsZa4^+C@^> znJ3r11zxR+yCdu8gf@>!gI!Go9s!o0^?PIS7c~jNZu4)VBjZ_CU4k%rKKdPXEnvl8 z2nEkz`*1tc?(GX4v?ro)vJjfXyF}sR<)DqS8f_IUNoJGggoJ}q{5ceWhE7uceI&_T zS;*d>sUQRtn|JtOy@cCZl`VtqM{hQ4tcKu=DXtqb@`T{sD4+S*ayOI_YQ>?1O&8Vu z*-r-Vj>G$h(Wr3cH^wU1VgU+A(W5i!x^46JduMKm8*!cv^ahlX>;|6KdpR*t9Q~EN z{2$VB(o(%xHjX;71O4AW$dfwVb-|653=L658=aq<&CcRVm?!$;4?2b+AaP1L5YY; zVCdjrFeN0KRRPUzYz{<|^L}~D#8grEaFO+8$D={rW}jxweUhpw1_7u*&&yIWD<27M zLqUQ^1@_RWIPl+067*OQB2-Kf0`s=$T@Pq}CmlDVSN&#dZO>_94{!i0>qeFs<)3E< z1Km@%iQ&SFfFlAz< z!XUzOs8--bX#$-QkR-Kcj%`li9g2!Al2^I_t)N}8yr9xxN$YFo_4}0^*vCF>0lLFv z4uQ*_R^za|8CjVZq#K*=bsqG1@gFIR?&A>V@HtVg2hDsl1vl;~6>b?(7c9;##xUPq z9fyjzoy6#M1gg=}O^Y%Kyhj(LB`unr@BbP91Aj|>rJlDg27%*OWXtWJ1ZN~`t)mj` znj)a>kX)C0muJa3fQ@twn5%27pnxmb1_SSW7))3KHv&Yk)nM1^n`|MULI}QTZ zXG_P{yDvWUm$1ycONvSsHsUHQ9^O`Jb z1WmkJX{0)9b>@+A;A~f^t&XQ_FkP0s+-_a{^HjRPdG1I^%U_X};9*?o6D~74JF{d~ zywUkJ5hZZuAQtSFu&71nd=ZJEkE!Q%aVklzQ_o6mfHqn05_i$;64m?l`83ylRWXHfy9*Oz6vRZ~@)v84XW^?R5?MyrrT zg#iX3{5>>=R$K6i!iBPC-lt)$wrE;ggjbz6Iwoi0e0wGu zmoM>e|8p%v?yy9KdtAQkQA+$e)Z6@v6@<F&tbANvE^vlt!px*N01_sfvHYPROmHf8zQdVSeziB#VXH8S065IOus;p zB{Mrl7plPV>`|fi5!tc++vMIs(59;wGlyM5fyB_2okCSA9bsK>YkNp>e+p74*2{KV z&n!vprFE!FOtnst%|HEb%hDw#N5i zWDw*Gn)>KyOM4fKEIMZ;=scfW?l^@5SSOqBa3aV2eoxgY2>qIyqK-BZWtT?!|5u_@ z0XRGc9)_7RgV!;|(_YXfy+a)VHf0XaSgCMbumI_Rjh8Pu{Y}yo2HIKoeOrJ2h@a6h z6HNJv93fuQ{@Lk*vZ&dsXKaEBxEP}Q7nd)MQoZ+Q^OErO#qtkq;9+Td1F$ktk}S4O zr`mf&tIFk8>?|7iwEicL#*Huq_I83qb1cUw4q_fp@$k_!@nGLc;*a9e z!i4fKll__6?)s~NH(Z&0c_8VL+qBz}dc3eMzsNbK>1^3y_om|^QJxVLX=beV8iG%A z$Y}}~gw_qt3l|xQ0WTQ~H_us@c=r7IAMhqLu@rYD(_cFuFXfL# zM2V6qndg79eR3ay;6SKDA<$!-Gll_2GNm0w!}91Gy|XZFqq!E#JUBEs3oWa#T_&zE zJ}uhu&Az0)TFZ7iw=@3PdwjqCCnf1s;Ty%G=bbqa<=z@6riuy<=w7~8!1wOQ`<9v- zJ6g-N2OTqEs?i0oGmF3e{raQ`oS(1=4KMl8fr8JFo3$&^qE3wC*|Zs5%p8+=0l^<0 zhmHQB=iBitfxMI?+4zO+mVhcY57I(ki7Wt0ll%vSsB&?qRm?o^>ca$EW2QwWG5)nf zd-ln=j)%2mwa#N0KPUjH=4;PVe|{H-SMg-#bDfu!MLD{wlL~&MYw#FF&=`oNEW_QF zQ6*|jL3$mQWiYiIpPW{V7cqpWjJMjA`&l)Y4p94zN8Cz4mQO^8I%W&*pcKP;S}jnR zLUNvu56up?AXB5#fbHp$$>NHmvuSmn5!=6v?Ka3PO(l?aQpt6;DK^=8FL^6~4D^;8 zuH~LQ=$F+GLQB5AAV2cjJiq8Z1>hp0A8h!dM3vwd&;_6Y9Q`=+I{CK9fD)I?r(U-oRa)&M3cR zpu@oJD8ac~X&FU(gZ#?nXN!JF_J;SEGy*)9&YZZdw;R?!v^AtE8$OEJe9<2^pkbTC zA`sRSUnY@4K$u1b9@SJPCGkXkv~l5n;UM*d4U$f9&KocBj^Ufer@}V@5-*MjD-0rK z(9TtW(4NQ$+deOuif1Fmxs6*v41RRppv;6yppEe`T!N}RQu|Tq8<+bK~UX5`D%4Y-#Vwmx{j`DvwdF^=hF_)*J}hr zrC}KdGVJc<4u=M>U1uq811E!U3IK5C)->Nyf9j9EV0vSS8ob*iqx4pd08P*E{AUrK zT9U-j=diAwAiZAJm>;(^Xt^(psWxcm4{n0*eq7#8Nei~l$5bsV&(+I+{y4+2NW+p9cUS+^sTDCh@(rc_rms&im-!kl2l`)OC(6U~8%BxAunUv?8}< z#f}2NhsBdsj$0}iztlsX)A5^3bEx7z{#)nO6J!Uf)^T5T?o8vJ916`FY4!9Na1A-_ zv~lrSzKd*HovADqaypASkIZ1yoNONV#m{I-Qs+)-N9LFvt5k!4c5c)dc!c7bQ}N~- zwOETa@I6(e6CCfAV;)l^2w9y{1P$+4L7p5HoHwJP&bB9YMI*Xnvj)SqOUpfrfd%vt zksbBxs-JOGI#$p)_MK5q2%;}8-FSBColZ}M+HEY{xF{2?P$WC=!L5z1wK#0_DXJt- ze=Z=-1p&r>Q^r%1p`BA3j^iFpm1q_pStrv|=_N-6miJMI{DXZ`yjs)VoXPl} zdE^07bE<#DQj>f|%RTF900L25VowPFXO>gGR9XGDtAt^GREYaC=4q>!nbYt+lMX1I z>1mL)tLd1{v))&-i9gCsG_&I^-_Awiv2WJM*V^}%#>c`tMYSwJ(y?hCw+AQGFt?dL zI&@7FaWs*Zr#sd^V|QqDg*x1S?^B6$3G@M3rySlpzl2A0E5x(uWWl6PU!F3Fqorm3 zC+BOh>eS~;ZE8T)BK!ll`pDq93t4ZO^nP&bmtU4TIXcS|eqr%>cL4E#fD+I}U^sZ1 zRP-U}RIks$rVk%6Vd|0ylyN;(j2>C8IXTRrbHgyeNEUNB;8#<$eJ4v-%JzR!{l(Ud zc;Q3a#;BYS)@EJi@IH?Rx4#Y`TG37708}SgrP(AvL;u(`gJKEs2Fr6&2RxZYQ=l*D zYRm{t`WqK&nokdAj1GH83Ix)n9Qlx%;$ThS+HcTUym7`>%h29Z!jt&q({3F5T|ott zNMuD46|N@wyK0mt8W{Fj^-lKvFo^x3E4n0Dm;O#dl#C$1_QKF2 zH>?CUla7bQF4P> zswsJi2`xKx>LeqIDMbWQHJh?J-4j7dTO~{bbuO^P8a!*K!QGeY(vi!A-|6>x7Pg+gqGgqs;RQI-+CmG(jdZZ3?%JBV0W*~1W zOvhCGaao58ZM&oC=?||w8X*YUTJ@wXm-9%%MWR)i{#39&+MS7nU!zYn;hJ~LpNB21 zO9yLYnCqTSi$@p#B3Dw#h^fQx4!)P(TEB^3Q&Tbw7cL-$M`*9*p1LP!nlXCUQ%%(Y za#LWy#n|d<{oooJq|nJX6-1(_=2;r5&onX!M~7mmSHr82{_^;4#D3D#HlTcdGl^WY z4Ro~TGu>StUouhfWAtF?Jx`&t)whA{f{Lt%hG9RYI;a}LWwI!<#f2`x>0res0uJSm zRbc(Xul2ePH>F?$5qB}GIKnFCy63$E#wg_bhs0Nm#8)?@jeW5FM@Ulc zPHO3PYgY1?XPp=?9VrG5pI+W^1mpLY3*!B(fQ-ZDnTHp7EmUs(oamB?wrb39&2zgZ znx0*e*@OXL^p>MCh!t2>>nf-E!!olN#tV*D+OE6f$b)-lLG~;x>Y=JS+#YpodH5qB zrQi^~Ro&tmm-CY~P(Mwr+w@4p5;^FEwJVq*S)bYx1nuoKK zFDv2E{u=t)p}~0?0{!G7&%09UbxyK6-iENlQfmp?rN6?dNU9vP zDF&PMB0obiKg318;lz9V`4BXGY8{J9LQpNDXJ@`SQ6YIcE9oGp417KgW`4O&iaL9{ z_p9_%MV!OhPizeVr=sZBpIO>~-s;2a6z;u)XX@}KVlIM?FOaQ-&D>T81x~>4cOE5i zN1I9ahP%A3$mcIUhlw1iS|EmS)+hyP9V+s9IK*)<@%GRjxP1T-|P@&$#at~E0 zRIRsLA2lFnWsfiK3g+8U_Zwpev{;HAB|L1mHS392`~3klp@dzVeqh{C->VM)_KCEK zz2vkB#ieia$uMc~PPl@DG=1#La{qw5ERJAySC{(f?Nz4npUzdGFwD{L*>hG_nUQ5W z-b%lF5cl~5$TW4yQ2vYTASGtb z^BAh{KFulMtCKp@5%BH~K@J3LBp8Ja@KU|JqT{HO5P%f%PATt0Wl*ATr)6|9;QaWZ zeZBj_p5x&d?KNgnUnZ9P`HVrQK%6Cjgtm6N^s*bEB*9h?f^-icc9x5a@?2^gn4%@N2iysO$Ue{>SoSEAboHE z`~?H{oPoO$d{pS&uCt8H=T5%DdnMNhJkE;3p03bEym5feg6Osw|Ti93;)gP}e z6#tptSxYqChq)@cXnBdUHB(i${?p<@IMGE*ch+R3!Qs*sQ~XGZ(qB*iz|KZ8gnr3C z$JBoFFz<{3?L4b1oFepeR$WP%A(m*C;6Sl^*C8eu55%M!&D49J_LCk*i{q&B>BFrR z1mSDodR-njp>jNP=fT_5W(sY6j0Qpr5wdIQs#SrNOjEw2z zj?1~6G?s9O%J%v^1-_U*8QCyU1HkmYNKW4VCWo_xeWW@}*S9bC znM^7V=r+DCa$oMz+RIg59yXUv0Hme8<<{$_ip3T?1up<#G;=aGWtS_L3xUZY8wf!t z5cFirK3bxGg#B|5>>2M)Y_@47rf0Pp%2mUDAjBD10k<4_&QBnL@nl8*<*$lcU%|p8 z>9|J01g;V!4dEiaa#%zE3vsuklfkRHMU;o;_#`h}4%c&iC$9R0>mFC^N4)lYUEK;wA0dW{sR6vqytJWRX z1YK+j706*N-V=HkK}B=twjWLXQEEJamayoD1|=CB5YLz=2uWE#8xG%?r0$&@E#b7y z$mLST#hSI-v{zpj1;YS-P#wUHkNTj_&yRez=+PtVj=ZuL8RNEYCJMKuxB1_AT^*(( zk~~i|4Ylt@rPGwC5CK-siwYLS0evB6nu4b!{T4EMlR0N-NB$0Wx_118i!HQIT7*IM z4J{)}JKNJvU~PBJA9oDj7R=<#f^}tli$n~r9NBK2Xo0ucGP1wmD?>|MXNXztPmYFT zh1-?64b|779ZF7{X+}lR|6~oBQysng4)(-jIqHYGgjI^`VwPAKvyP=_i zG)mpBLi^CNA$L|wNmZ9N4E8m9Jhdkk=CAfO@^V(xpO}tbYb-CHPg*RNZ<&#jPKCx+ zWab#=##DK1PENWD4Vv{+`G%@%%YL5Y`VyC=FM2fX=4W*fRc&1ky9+vJe)c#W73v=? z3vxC~^^o64@SoIZU%t{MG%;hTbXElj8k;{Ob41UQMe!?e%{8#9$`N(`sMB{y_5Lz8 zy16~Ew^Wl?zNw4(Q5$1imYqs*~a&g<h;>u-|&{=*W%@;{;HyK6&xduE*1xUYk>E6lYL&8$*vjTwWD2!hwozl!F>3~^ar z^FTkl6;%7UJIK;FFxmu1?+c@Imv^Ql4zN4bGx%=9HO>$%lMf$Z;HfWt-GYtMtsYOA z4Xdadh#)x*WAf^%^a{Z9Qw|zIY)-?N=X9I@$Dbj+GpmP{~6^hPMGFfluFk%62wEhA=l~-&@>X|U)BpY-BdBA@YP!| zXgIoeQw(`4-h?E7cE33VzkAvp)x5rKzVcK;cI4@PUU9#$M+aE_6Gv}|F29|UM|pKx zgS6CPv!g6jX1sXtJ>T*PM^zkKX$pAisN+2=zF#;u8WgMeHQPvysnaO_gJmjlZiW`- zti{~*rYtMWX6;iqo&-(OZeT!MLVDRZwVn(VHJ^{$UX#*XUD!y6&$B(VTEi;4yHrUT z6bPmMs8-@7vp?0lO{S&_XtTnaS>?1R!V6RKgrXbLgkM7TTji`{=-D>oEyha|oV3g` z%azf-xC_Fur?iAUyl?FDU?I5s>DYPHJiXP1k&7Opi(0m(H9p^z*3ljCefk@rHH zMNjQ8)OqbI7+PAJ7N+XUE!Bk&Lxe#6?!ism&Bya#N5({Dl7LxarJ<}-{+mn%nh-y-58F5Gw*qPzIO zAyF6F+{P>0Z}3Ec^oUL3x#yJpirC(Ckb%~ZP@BPX9ycJ1Di4F(aiHpfMRi0*{Z}lp zF>-;wUZ=O$??4BKI=S#p#Y&6a$HmV*1S zp1jvcaLgLq>}}1nmDXu2G_M-Z)~Nior>&YrSL(o-ha7;z;N~w*e_DJ!xZj@AVA{$SPnzOH~vE7 z3Yz3WM|$p!N^U#D!Rw{*<8>fme2wtvFQQnU>fYSrGqoSdpXO{dDUGva#;4B9C7^$* z?hAfu%}GPe1(g6GQ{RTp*$m`(g;k2zMQ55z;}B?wcdlzppts5@qFc>cf|duIp#Js2 zub!WE2I&XY`V-42Jk)~!tWgu;YGe@ja53yts(+b1%YL}&rDZhU7bP*@K+Z|4VAV+F zBmZo^D@CKtJSxiO=a9`|PMYFtcv{$rn`m&FCix{y;Iv@DukAW{s&BXmm%#A6yS9|v z&@YW0pj?uXb|3p(kaP*T=9=V(PeE}-0C^=RJeQm_#@hx!yko3|5!K53T`@;X9&V5hhBzA=4A*eFEaKmKMDBbh zF5ew}7`iYnRo}f|ss6rgor0Y8mFTOnZ(ri^vF@?Bcq=PkUhNL#BO_-#KD8KEWr%&& zyg%;QG5`s8r&{PSrf&^mQ~(1e3_#+@Mg_rH`u36;>%*aUok>CQdVOAB<4E2!aHMO^RG`kSzTmDx+Yr^>}o@rye*G zasfTlh4G-Q>n{8OF$u`AA$R_`LLBvU!jB50!68@c|M8#yIr3`T`I#tLu)s(goN}(u zHe`vc)ufWDeK6;>v0@?~Mt!!oZVkfPYjW0__w{|ePx%hu-nb{zTQ}4f4BBAM1ogee z3=B-2&w0ORR>HXq_(nuU%xul9QV0ztCrNF#y*Tr?CPUm9k zw&mnz|EYNO)@NtrCiuGhV8<8@O|g*8DZRz-`g||5)@%1PA-$zOJQA;BZUyc)+jh6) zjoCtw(mCtvd2$f{)n>KZu2SS;bgGj-$Se)+U|rw&+umkRAr`M@K;--=?-c!|1d-s$ z&Zry?qR*`NEsdl&%*&_{SR7A^VqY@uIemwdy z2x%WH4HEc?_#E#Lo> zrnX(0_Hv5lcN#&n-!$#La}s;vS{{E{=D2kB!IkAhS}T)2eh-Thg%Y;D&(hZ^TcOHJ z_7%PeV|C}5-)A8a8Rl)YBxwoNgw0nsXLVdngeF3F{5!i7Un|j?mVG|~*sWj9oaKz{ zCT0kgMExwTyhBORTk885ga_w){F9>f7}dl+XBMDJ-*tCzNs@+$Y!zfxz8GO4lX-LS zbG|(GMgOt&oO&7s-)c+gW!56?R95rJ*;n7wFztl2n zJc8`DsJDe>-;iJ{a?kamXQ^1u&*D-n#H8rHQW+4rtj6*;Bn&*d&brYO@sFmRGK~w~ z^c5HYmK|2^=WCn~XJKS6-r^n-t7Z&N>E4?Yozr5gd$8aRy^Xj*Y>kS$wZw~e6dXbV z;(piLosXd}HW;AqBwe{@v{9MR??|6{uJvlbu<){@CR?teDVK~$Iy?^ibhGxRB2hH*V{hC+CWoT?@~}(c57x`KkQAnqLh2Yg7-=#qa)`MqG zgMx;(3ysLn!9t+mL!HlV6JH6K{j$5oxi!AOE})d8)K>m?RR42cQ8-0pcUWTZVx~yf zWu+o?`ybUMXpOxkaq7YgtW_y5#uFk%!7JeYY_p`H+{fWXbw+A>BXM+-v^(y9J$i`X|HowdUn!iDvhf*HDmuV;h|88NDSsW zy;xuRKH+p^m%;c=xm6hN-jeS{R$sfZFiBs+LFPh}d*}VCA}1tk`lw!V$87jP%A)Tx z4$$UJ)DulRQxa**ZycbIO&xM~n4KdWZp{IDirT*JGAA`vY%-p`b5cAmWNHCcEmhG%$sKk<_$<1i`h z=k!GgEcjt^ENod2WhADcIEr>+X<+D^9%8-ZeZ+mE%fQix^regPD>EXY-Ku6@ zMRO&MeX+lHY1x9*@yvDt6S59AWPLT2$4i%E_&Xs$z@3dbDw$pGvBAUO<-Ch&424q1Kz@EJfUZL!PBRp zW#mN*bCILsHWnKEO|*`K0ZGh(8?@J^K6jtkfbVLyNl4IJJTAGs-wN@0f01#gY<>2p zy)`5;D!@xQ=SZq7b8gz_!A|J+O8-HZsD4byncia4jAV-j$JQ^7(6=cj6ZX3o_$m0fjn35%_zSxBdp=A zuUoHOSG(Z~E;7H8iV}0!XwFJWqy`MES!$7L*$tmM1hNRZZRFj_+BxUg`9jV^n#lt5KT0?qk2{;%ALYm4qOd z@noqVt(52P9!_e_N{Hq&ObwU*1$!ue(VbQsCVIqoBQ<|H*YydmT=4{+CCuY7)zEim zmd^hxifd5yW9+Y1@yi%!!5`|J3$x#uh6xJjsxwfocAC(WUXfF!P%g2Z(nGxyXw8eZ}87?}-Cp3o)hlb8q=}Ral z5L`rSqpm`l(x0gi<+_6Lz0MzKMf4iOIuNfA zIGWo`G6yf#_+5&w!SawSHVs_Jj*Klm?Mt+ZesCk^<#5e*50pWMg#i2oT8yx^XaN<4 zpO$anqz_w~Q>j3bWC-1mM_LSA3Ehzq1{+n?SKW&wJnA+X={8BY-U&wM1V4gm0uUYx zEKZcER{~Ed&ps;9#@%fiW=69yBVlSw`|$Iawn$U`tT-Oj#qfFvg)yL_44TO$<}67{!&e#q9I8sLNRcYQ)?G*N-aOu z=07H(_6ShKaAXnQ_P$L}S^DWF(en`Kb~-d|w&XkXr!jW{T+N+%v}QV$t3~=G1wZC) zY(1y>;MTYN#`Ym_W@{qqXDe>14o~&QHlw2grq&<2FX>)(pY^#wVoP4LFshWR(oPLp ztUga_F@HPpy;A)3Dlt9&_IK7yi#ge*3rD9V+NAH<$Vu~Q%d3OdP2=45-cEF)RMwBk zz|Zw4o^#mpU?%ZVc30lKXEW1F!WuQIogRJFG0#{%bb0<}$Yixm@5IN+U#;HjN1LP) zU{!tVwZA7s0h4{QZ{0Vz&~mo3^v>rX=c)iEmlSQX4d>oWApHL8MZ^nj`307kz3=d| zy8)GVv_S8PQQbz*^qv;?Vp~#NRfEfx_^%#$!g}}PHu#%zuQJMZuZNpE&gR^I`JhHr zV`GwTj}9LE_~B25xJ?*7ntz$WC+A1jxN(=4oOQ&;WCPfO={TYm8X&y&*5D9Bt~lqX;0E8$guO`cTFtg}hW9+&@Vfr!tJUS}hSaCaNXO-EL>>G_b%LO)Z zMho*$QP4C3zs^=e^`X9q0whZxLDU4Cq5*AKw>WJs2Jz(#UOXuEXF_$4docTCX0&=aK;}So!MEB&;OL*(8Wu3-1uYF*Sp{KqnHh#7c`!1tXJVLnb>V$ zZG;4T{!vmuMFrmx8h9Bv>-%)y`I9X!*W@!iDQB8TmetJsHI6IJIb?)c-vkS_%QvIx zqVG7{WnLc{jhb_DdPW+2Xo<<$vJF1SY1aPw!@?gZhi*2zw|YvwW9LUo zfHBL#3~lk#n{%<7U-1|Ew4Zq1C}+${I&Z|JS(?!tX>VpTac59>xT-a&Bh@FwS?PCH zO9uq&a}1H6PPL>=dwmAK0O0|1bK?4H*m!DeoD09_EOYj*f{(wj5#5`#lwfL}wA-js z+2;fuj-z3JUyX7w|KK#RNKrk!9cX3vKAdl$Ep zvH*CGBl#m~dpTXA+!7ESs1lHh15=|ca`Mcgx1k3<3VhYUOz8&x0RR*hh8;nIHI#rG zogEhN3#^LL0cHUWL#Ueo9M=!YXXW)S=F2L%{1%{INI8O&Ufxzm1!@n$$VIIJYY(V^ zRMAO+Y=|g51})$SF-4bqEJMIcxd{%$^+cDGkvb%)Aq7S$?ZET?f2P0d(9;+^+O2%0 zlMZdcH^9Qd2!wP&SV#LH?vz?7pkh!n0^Zoji?N9th}D({D@Xx=09Cp#e!5G+IDwet zB9TKP5VT~_GYd?DA`kgVLnB+b|8VXWxrcKi_se+Qdp-1Ckdj}2-F@=e^dy~9{y^%^ zOsb<+h^ZnyOrn!Sh9C~70fic>`UzdWfG_`V$8p>SWo*yN+hz&c*v{W>&FE_>5+MSV z(R$^&<&%`r3YEpBj(<1k`1B2Z%KCPP?i$%kLn_D5AE24mVacQ>#2XvBh2n{;)&r)- z21;`F%q%02^^Y{1eiq_4jlUPf(_DDGcW3*wQ{9=mzV|<89?|-6n$&-Zmn@5zhnG{` zEbEjtR@(O9(fYd4cg}vxm($ZRQM^Z2OwPRj@T3S);xkgL`dy=E1xZiP^G8x*W^4Ku z0{omcDxR4?!+v$+uH8~5D@+pwG$E@HOxfZlwV9lu@rBqabZt{JwP_f1N|SY&g482N zp>1J(t)Z^d&t7RmJKvYy?LLcf!A(lyf>(VKBQC2C^&9NlG1N9{67X85H^WYggqvdv z@oeVsih?24kE8>I{I5^V+S{85VGa3F^?4LYUPH&S>PxRsFPlZ@>_2x^icrJ!rEoZZ z-F@?GCFxWRI{sIO`A|xnh8BE2u#-7TB)nhbkaZj0_SnMP_HIeoU?FHw`TJDx`LH?& zJ9qUpwY9W#)~DgBY+@dCpi$uKk5N(Mt53#rKJ)%6`NIcD|%TX#AyRsi6yCft4=gH76W4D2Ly{xm6jW95wv=HeUGGAndQrR+cgJ83e z9K|O`r$+4-(j17@pQiZ*76GL|h;u~=fL#p#?S>ju5sY2XEiykcTAn=4cLk`HspD`( zfGp_wS=6c!1pgM?3!=-~t?C2tzpTAtCNc&WqJ%Y)Gz09*tjp55f&e|58wNU6+^WON z1w(`}0Ku^cRc8+@#fOqxG%u>u3NGW$V+4o%aX$f}%CsN@2x=YF9YKqDtcjuz)gKu5 zI7|7z9(oQiU#5}4v_k2G+K4F~UK>xD!^}Q2wQPGyP8&`iI@TGPN@?!#JZ7c`6hLq- zB-YS)E`T&uz_i6PO~UUjQYI=C17CpBNHszvDv?GjN*$y5boJzl z8n2`_w&TdX*@QaC)aj`0b z#t#!Kl1m4OV{q@MmFC!B&e6ri_)w$w2)SHV+Dr(MF zIujj3KVk$dUOLOak--CM&^RIIG9Uoa1+h7s)&ENMdq zinW|{vH#zw7pM`qhZw^sz&uDFkkLju>Xoez=nU`M4wHGzr2;PxrSC5pCGU=CV;Bla zkt?+r^_3B^F|+Xk4G zGOZN}A3&ZY3;Hm`MV^?*g677_Bn#=>;0GK{%&*;bk4i#P12mi_t=G%={G=;I-BQjn zS}Q^pv#WjN>s8HkDg<6=*elK6b>*0n&R+6djk4P8T-<2yhQse%Vt)(`#Fo6(v4=%d zrbQQUk%e6uKAW~5fSdmka{7HEO-f7}%Hv*#>N$>I5uvu@8hn6&dXy}qJrWWUJmee5Tm0#pmrG4aX@>Q_$i=^MC3(w^d1#%X?X4OcG5moy}8Y?>$INlY%$JZAK%YW68*$`&S$0Uo!IJ%@ZVxTXHM#tQ_-*k$mSE1}uC3NL7XOuP zG%9576077FJoM@0OnHgv$Ev4`Km*1EV*41j0maNE<0ZPRWN2y0Ke;9y#UrFYV-t$(@K!3yM95c8m^Emj2Z~WD4B?P2VY+HX9+`NgP3) z@lhA1%01mr>D_Fh1Jms!R-uz3(FRfO95?~WfmmT*W^7=P zDef~Yw}*|2;^A@9E!A%uDnn0ziTbwLq*O=f+yJ?-0ucQYct%p%`O*3GgIM$qJT={_>C{kIY)_BNuVK-lWo>jUs07~@R5ps1ijd+>jFDTCrcjUE$i>$!x^uz|Jh?=uM+qynjhv=hiw|g)MMNL^1 zHw!>3Q>gyzI)M*lfW8Vom|%?u&bORCqMAUzA!b^Xqqn9#qnd?CL3jw1<+)?7kBD*v z_p**D7c$7kz3iZ80bx5jMOA)Ybr|%TT=rT*odwzlZ1+ciVU!n8S)_aFwECMDy-7tj zm|h@lgpqNjJDMt5lJr=@FUfeA zZZoY|YM0p~KanUztLEtddV%~q9}bd3JYQu73VovBRY&B@m5wUmC0}HTeHZ0FEoqN6 zif_s7yft4(a~t6?YUZz7C;s{N;8kcw%~1s2lu4=K&_%jPF?=Sg8eh@QF{@0&g@3hR z?O`Xg^`e|7SY;~Nn%8lH89;=cj$Ek_nVeLay;r0ck(snT5r{I*>7pub?WQ$ox^%)T z$?r*XjfJ(=IbCoW4gPP6>6fh=B-c6{gi-u8ngq$M>)02L3TzLMO1rugm7%O@>^^^) zHSM4L0vmN5cme$?!;?$4jLON)Bghm#6KmHD02UW=?=0vkMu`D8upL^igqp+0z4o#YNhJdKqICv;(CF zbVH=Hk;n{}3K!{W(KpACQ)E68f#%x-V6{E6*&)c-b#bsX(r4d0`I(cG^D?mGXs-=| zWNdZv%FD}l3(NmClQGREp~^Wym|n?J%s|C)A!)&&Qgp|qrj<3DMN(JT+sdpGjzo5} znjYt!824C6Gtqm69yl8K0#FKv7e+}zdd}7>Bvx)V5rH4CUVQUR$S})l-pIcwxQn~JZ(UIB8 zTz32{?kwX*xjc%0&h=5IWEOz!NC-fG_01S&CuBS8@c!>r@3Fv7B}g&~7epOVdCV09 zKyCoY7*De&e-V=)<#@=&OP`L7vE&5GFV=Uoy25n47|B#Wj}vPBwcf&O3<5Jj&@5RT$XxOp z#1rEy$`cEzL)#$sVz7&haF$~yrOZ|4=C#WI_!U&aZ11N*j`lJG*E>@bePsQwmunVj zqMEBzjV|bxbB?FYWP(jGWPj!&XQ5#7)$&<(Cw9D4Rdw2}jU$tdB;}&y1*O;JME}@0 z;o{_cijn@e0Hc9b*<@`z!lLS;E zgYfl=2ZslmDW83^&|~k48Ilscb>?ELu1z-Gbxdh^`AaB`D%)HsyN z38Omh+m>IzJ`b)vjc8?VwBjREx>dZIDP;`78!pSftFWe4h^4bmabG^1xlPyRw)f2r zO?LJzSi5V--k?^m^w0h1zGWofa{!odE{skrn0{wrW=+0R=$BnR-L5u|sfE|O_xsq= zZR@RxiWs;pgjY-Pnb;N#HJHNNDLb~-K1LL2*wC{4g){#uRH>Q6bXPwCG6SvquRNm^ zy%=ASVh|u8t6yaU7#5`DXx~^q{!n#|@&tf-YwQaUaw3$Upqe3 zQiagAN!+wsn{Zd#_)rB)O1Ss$j=`$bjCZkGW-G3=y;RZ(f*Ne9oQ(weQx+rDzBmh+ zXE7NkJFabSGih3HklNw6J{-L~2sdeB;IHRW>Sz$(?H6rMD-b&${1{nMWs3W8Bz~Jq zmwrv_$K=3U6^yei{s^9IiE`4p0atSo|LxhyxDQQEj2JAM;SpvUuNNY4&Q?(@D3o%( z-I-2Ye6NyAlK=P_$Jf5W@%{*0GUlkK|I&ZRCHP*%IWD%pKg%5|X443i< zE}7uRCs4MlV-$6}KbPiQe;;VO22FKHfnI+1DVNGIzdDHJ2$PvTS~IV@l~P46_Fq%< zGwK!N#*_5qV%{`T9!*0Ii%5Gkt`(hjzfyUX;Lnk*>ce>{C0#k|%b8oW<*HK7CEnp@ zeMXieCN$a`#=`~AeEjd#?TZ6%3)S{S77}6yn7q0Bg}3vq};|_}y?npo#%iG6Vz*GF&?sUs?tLR|~OGtPdEM zGUB-;qjWF7ln;GFJf?sA62$;PMgWxjHTlO7as&|p_*p{MReCT+I#Bd*CI(_f*5jZ{ zkS>I8F1{N@FX}alk%jT(b^&w>faD>j#pceY@5nESU!q&d5akMLq97-^nxJ#*@4cb6 z&||dqM}-_)U8gGm$ktXBDwp8VN(d_fRuo!yva3!?tXznI6wnQT5W4gvn)7ebv68+29EyEidi1A2 zI!YykbD94yKC7`yjoAuzA&~9m&o`{M0UC^)-QC?SW5e1AkTEyP6&3Oixkm>Hj9@`! za`noSl&vlCktIJ_3x>K)A+PN(!8GZgDC`|Pdxs?>s#nt_LTlYo@iKU~i`Z1|g(Z2a zlT~{b@6h3Ji^)0u@b!(!I-z$z`ir&6mZn**$W}LgiHq;GnQ$?7WSWa;}A5*$J=tOTFXK>W6l7 zOI^heX7?_?dG*(_UW}c)YH;M7LD)WtHE+CZA(habST|Kv;G{bFDLnXlROc1!qh$u! z{O$epnA~=o&ENe6chUdfsiuR{-M3;A*zc}0aU0rn9wJX~PG zJtdX9gYg$NWfv)oHNmh0XV@-Ax2X6`R*VBWN;%s%-m5k|cy*<~XQQPLT@L1?N6Z7^ z`H9dctFdU~F$UKfrf1>bLXdL{Uowte(VcQ0f@*+GyhA9p1r=|7n+|#))*$#AKts^x zGLF_dScZT)p|2305b%6;dhx9|FEu&17iuYHsMZauSSr{K2NNMLA8m7qa)CN%CxjtV zR1pOR;OAoJqSYa?O`JN&<8d!0*L~(N%Ms z9)FwVb=EWE`4h5O?`pL*?k!2>p*CeGOlybz0rF(|Nev2Sk)5UhPGtC2>i!0@z9XS6 zLY>YXDWM`lji+WbUo$u~H0jW&S1#DWCX4wq)S8LQ=KlI_Q2kFG?%v!PxrEbUc-KkD z2W~$+c!;?FrUs0Zwj93niXUUP`Xcv2+Nk|F!N*}JxGF9o3q*hSy8mOy!}cT(@AlHBd=bWvLJNJwFP- zD+vYBLwVrC`c~aJ#&G|j(a^fP-SpX`yv8#MiLR1-?g+K(jrsGM-HlU2 ziofVeqU6!z2XTK>ppz~rpzO#fhFnC|z}MYJxwfUFNa0n>*Q1<8w%_NqRD3p#`-lgh zo;GdGfv{+rd|Mgy+b|1-c`F=xz79Sl)NL+{TynjHDBBJBnV z(TT|qH`$TfKkavie;B(+d#3nS)f8-osfw;3Ee1-**O?!~a?RDkZ+{_%f4RND@cZY6 zzQzLK7mR62G9B_?-sC9mO!*)tG<*p3VGYk9qnNn(w)yWp5kUgsN4aic*)rshx6SjH-k1z> zvA1R_9jP;h@nrWGEt3INOiwd#1J*jbs=d>0jsm95j029YjLB22XL3Rej;303bOxaN zGmZ!hZ2gd0Pw&+|M|7d2tgP3Lj^;-~tgPkD0Xs$)5)BU*Qg>ihH9yCxRUo-Y(7BI4 zv|vcNi89}oK_f?(Rfg(d~wBhG-fP5!2D+hJI!h9trm@Eku+RmSOuo7^Epaj+Z_1m>FSh(w zKfez$)ZN>833;w`-krxNJY^JHI=^ulZ{3qJF}Br(UxIr%I=cLxD^1AaBUGH|uT8{l zBv5XL54W%&L#Dto#S$vtM;gSN<)G!BS>+{^MfQ+ABs%S|KQsUW+!uCiZNf{+5T1Re!sQpnT)65m@C{q$fyBOdMqzeNQm{ znxgOURe{OO;Bup@>mxP*vKb`J$?ZOKH3WfHU)~e=n_UGCM(HlBpK&Z@);B!CXo+xr zz$dM`58Rsb$#FZg-rduw!o)G^!sGkg2mNbBJ_vHxnrBiy#x*kpIRY#;lPaQ=v~&DNLyzA(D>nBQg!{9?rL!NV$p5EPXD%l(tK zY>e~GtwM`QiRWZnbO;uxX4&WZ{={cC(_H-8fV!mXeeR6u8y=8#rbV9E z16N(@V9yhbl!P54fiScs`*{Q*qcQB%vw_G%5RJ{}L9*S)QZ8 zvgJzWfp*UsRh|OovpaY0m`-W&?Vx3Az_5L?=taYwzbUL51i^X;xOe>BRV03UdR1}% zWyW&G{pg3YO)Rx!U1LT;9NWw0fIp{;2XEdc0=b0#!y-rFKEJZf={%_=I~p_I4@Gwb zE+;Sw?@zA?+ww2nyS6RC3L<2(ybpF~7b0G;*VmwB1;hlwU_PuESTGS`_CIj12w9h8 z$;=9`GaTRX>5>2sTUHf*;*G^49u*Fy$a!>ZOqJ=pRs+^;O4MjspE-!S` z_KidG^L7lQy6Z)3#`Ekp^X+tgp=6HwWh}TA<%;~Z2_-j~xq|LS?UA*^FY8m2o!zEHMSR>5Iq{y1A@;v*r|;twEC@sFY$ZlVXGQ=K zC%uV#!PA7Wzd&V`&uuMC)c#5fE#g%oaiKNoKsnFUZ@wVvgTs)y_5L4s;p*9fE)Ic% z!WpF#4swAnZ#yF{FFyWOAtb z%D5;3X(nBDfSs)d1H6pAdmBP}rxVL$_YbCbDwkfH%?~v|L z+u!AYSQ_>YYB8QDl`;?EM+8JmMwOt-znHEkkG6FmEPnBGRQ~6e!}31m=kCZCWV0m$ zrIs!gURGWPN-@h2$*M-YQYlidG2swbA#Jv%YK$rf6$Ip8zdqZ_G0gtaoRku2=}%V-OrwlmABECz4(u;B zyD$E!UJUT4PB*SR{eRIHV+z(fNJQ9i4tK8cV77=hcl?2pgohI`+&*N%H)BeA9(Hln(D3^x;hL02hC=ac^V%$ePbK5q8ca zNv7Ki*x!ix7)UfYJ4R+qf5I_NQy$c~-EDO)X#lvbje{D$H2Ubv>wNgIHg}dGJt}!I zkIx^|y~+=$V-yX|2(wuz*T<2R0)sBJ=7M#a>H1THAw4H#)Z{aci95A(6r7h?B^N#$ z-TOg4`n^HiZdb{>q1LXVy7T3eAB;TD2#Is90#4E~ieBU0<1OnBlYHlBuJ<{m0jP=5 zN3Qqf2N)UOON36Cv-iD{H)+<%O87Us3S$0bLUoOCaVKSQAVYRc7p7@)=)ov-5ztQur zgF{z(1lJeptYsJGmj_eTX1(@!-g{!@kgKVOlh|7?67`kIygxN^2&a^TYa}=v2=oy8 z?ZP5*ey-N2KpUBSFwqzb-hXc%wQ=;fx0N=>+-<{wB^~C(R%$1~dZhe(i^I&tzbdM> zj!fDmB5A@@LV{D*C$u_=#ftxFj0A=0poim%wx6XT4Go$0J(IUWJPyjoq|k2>oRRJR zd7Po*2kJ(ojGslG0Xpi=?Q`?lBKMWxnZ3`639s-4ldxuRj7-~== zC#JpcLEA};LcvU%-$}wQQsAc2b|m}!{G1w(RZvh+M8tq4VQYW+_wS$W0X_i%0YzGV zd$Wy5nIH$QrlzJSYEdnEf(mnZ#~&HFCxk$l?B|S(fV1sMq_$so6ek}}%79=8VLnF6 zfNuF#wZ-=nSH=5D)KEQaN;s$q@pOImI_{;M>_C>Rn!=)nZ2i-Qwtk|zdQbS+xZerj8GHT+R6lg>E)tB0YEb!HCKln z^d%Q$Zl`%nKK?CW9wVs5=k4cIU8|{eOu6p+tIFSEVj}j2v3wldv87tMdPZ|Ky&M5t zDY`7o9Fssw55HJtiY@EsT!ZvtK+Za-c-`9mQV&m0W+w3$yPvx}Pm}`gA*yU>PHQr0 z@8nu(AG<;kdTb9yE>e_k#B#5A^Aw=0k#NtLVN}F-Tv=y%DlKfpA`d{; zA}e=i{%mG`Vry--7Zo)!&}q}1m#*cS3iyV=kGZk#DPma)BJdJfeUsYXJC2-5ufe4Z z`RmC4e&gfJn!38W#zr+&RpO6w$h#ik-Me?}%0oj#NX@4*9rl$!f9B`suP+XAva+%g z6Lo4|w0iHMRa975SR0!(0bOSm5~PhS z(L~Y$iIE6A*>C@&t8`|yNsF&e$}6H403ybJdvK8A)M9jH?)EY5;lb>H_0_z1gYP!J z{t58ZIbi74;l$B&F+z>4u)LD@LO~Je;3lw@v z5gPYLFIsfU^bWJ`Qs5I{06`gdYFe`ci{UCCXx z7keG;MvRw`7bBCh>RSYapvHN3tm#B5A)-XE+uc9ZxK+i zsHmuN(b&KM2QTmF&!4qVK(pV!KcT0m=jI;%`Sa)4m=-Kws}u(d3xuQt%r7i(adEMM zuD5!AyG6Oa-+WHU@;;SPcx$wKCZdD3X@8TfQ#Zl}em9x+y-b`q;L@uh;(9*WygIoO zo1?e=ay+>cx?^^JMAa+pA7_J9sQn+ql6@_}PJ$px*1#P4P*Fk25|O&Mv+z@l!|J{| zq7${qm}$XLNz_vJ`f6)jLn#XRMvDGBQWp8J^!Rnuqid3;S+es13+ z!0qiVxIf`Q^rwpXhiNC4ZtLPn2;GdO)qrHlA1Vh?>B?V~Y0>Z5)Rl+hDVC5b@#G&n zDxTqy*(LTw%R+27=gvD2*14FPpgu&($UPy&fhB_ElI+)`T_Jb#Y0&x42b@zv0<2zu zBo2%Uu2hrQ<;{A|4s}{4UUvPXPL;xtLwx5A+Jmk|2mObXj2<_zhP>GzBqT&`0+&(4 zCXx;$|4K$i#>gmLmBquyCr8BnjrzX_$-fu(+Af4Y0g|neM7Zc^wG#C#RFvn){ca4V zlL|CwsWDGrQ3I@OZ1jjpNJ#c(>NkGXTH$ysf40)uVvC(n~$l#!&M(2R*+DqQ+y415U_*%BrJY>G+lce12y z*LeG;%wh6lV`GO(k`(qIwT6W2OP8W1+A@xgx^BOMTW5{{NSI*+;@oC?HPvlgJ$Z1X zDL~R_M*;K5kX;)X-{S|!@kAW1g1f3qd1%R4d6Fw(w7#w1O&hk z6iNB9yOMi1^)%|CkP~AOg$0}=d|vy{)F<9MUfKA<(Xnx|r7}Q+x`_m2_=Ns3(tsa7I|vSWZ=s3# zN^YVfCsykE<-SLbUry)he%44*lK>ULUU{4Jx9S!m{qcc_)9j0PuY>#v1M@L_oW3u1 z`164+ue&6)FI)<2l4-@N$I_zZZINvus+Fb912DSnyW0eF_!j&`_D%6OQ5n+t4qXnH z14W)pEs?JdwTH83CSE3U8#z=~9f_4f{GT#^pmG6H2)H3X874yEC5SA?hAY2_dayV@ za^?~9d-;#8!l$2Tqyzh(EM1Nl4ReWi<0i7-&-h3b1g8XBM18tjqS?DOJx~g1VRcd) z;Gsm3J}Z&f0$`vL9NAt;FC1Bw|+a0tDww8h1aswgfFdNycE$TRj|c-(#T z=n+!qG$SP?{Dn<> z)oSVKggZ&;;}IF@w49R2dbuCR=#6Dh#f7U44lY*+4qjjvsV#|LI`oU3FL0Ib&{sQk!mslhe6}Pl-2A= z^NXP|Q?h8Gm0e+rsi>n)BlJ%=XiB!ioJMU7&n=bS%!31>>4b7aN^BT9_MfyrSvk(O z9?)|dfirG)GCA892DtJ>5%U380iKn>F(~1Z#?I`@AqC@ z7yo7$_A~e1Yp;8)b$61neV7l3?n*pq^WMH(onZAcsn(xBel7Ud%cbNl;OU~#sxCTC zQ-zm=$Fz4eXatRz0jsU`_{deNq~KfDI(JOMhTOr@T~Mn*z}ZIOSXOmsD%rR)7W4p3 zI_57c-#XW?4){!opE�R9lQ~DLK0nCHuyjaa@HR%i_-ouwDk=eZPrMIxZ)>j&Aim z^dK^drV-oWL-yJUb+n#0>r<$WXA4G?{>q)flWP><1awzUSZI&aOmUS=tIo=JN<;rzWFtdVakxP3>+Z`hZI9bXgA-Yr}5F zLIRU64{Oc|`^x!F!O$~MaAAF7tfS8Zq0ulN<&azaX{1mjKnv0i07fuPCuKGW3OK&2 zcO5PYKcC%gVKXnh;$5a)mRL5}OOp}{lvz6nRerH7kwLQj>{#r1`3u*AT@hY%S~`R-Lg@e*5X6>{*r&{4jYRLwGM1y0-qtht zXDVDA5ZdyKWde&aE;#12Dc%FdvPaKpDX1G90;>=M5Sh53cx)qiIPx+Ij`~HU6%www zsY6$sG!Q-{CAg)R^BZn2hSrREkCyLiAyy>loD3a=5<_=uILq{0HmCU&y8#;C1Gonk zdH;9#7dCN`ISVE?AI?3(z9XuSowP%X*C1W}nY;2QOquG^XY` z=z?YDlCw*i6HG)W!%xdWztRt14-eI+9Xuy)@>(}>I`0E@&}cNRF}Pq>1KG_WCk@$oC#~J(wV9T!Abjec-FcR6I&7qkP~H) zE9bUIe|7ysBH$D=ZAJSQGws5{Z?3-Xud-0G0>#x#Vd8h%f+x#qCyCjW#OLGju1S(Z zGxiiPoeArSWSfh%G1gc60;T>YA@uIyPjqyH4+q#PTdLYX)Qj_}X*&J?fD0C(q0K^tdBlm29-g8oTBFFaS7d+)$hX z>8bbD2@6{!8z@^iA8Az1@rw@ zpVhz}ov0$%~3Mf+6G2yAxeSqPkiOQGfEvf|FbN zjYF)vMif=xQF6-|pKlXFh@KhK#i!nd{Xnw2RAca5Pn8=oV`JhYh1*N+baH?gLa>%3Psx5rQCEXmVLm&XmtE-q$fWyKPc zm;aN@koj76Z)b#z5)+{S49~2&TShjzK4h%i*_Ehj z!_B~(8^)FaY0ar04fD!{EWh#>I#;x(nbIv=%uyhK^#)Y+^ZsR5*h{0i8#2PMCM%zt z-|v~Q7u}0OJx#iZ4K3%R?e4I(e@o0h?)-^DSUKuU)W3FU`VewI#0r| zz18QB8dMu?TzS(BK4%fK$r7(>R^N2EED4twg<4=2B|JW?I>cz&BzycB46L8`JEFgL zy3QWM0F3F@&)ZoX8s#WSTc(`tEJ}J{SKT#3(++%&Uu^!njbNPMapJwnSiG@|P#q53 z)gh;L>Xdl1`@LhS1`kYL=z(LLXw!7*n{$G$x^C;D%}4Dw5%90#NU1;~h#$Zc(rkuj zeNBLa#9XCb;>^~0@&Pw{@$y{RRDuuy=FmFq?bERue=M*0G={Dt6|c+p57>j%ooRG%NWh*%r(}Zp-B)g7(~nk zjy;V8ph7=U1BL3njK+C7%^O2UNwyd zF$gM*y*iEkL}I%4@K;BiH?auEbFcJId$sUQtF`xm=KGK9bN1RohLTp9I_7N3EYS_0 zfUWvV>idfpUZEBmjc%TF|Z979vqZ-75jHJcIYtywh6~s z#yx({zCgNysRzc<*CuEhBNYSHBlPL?%S3zngO?HVC0pO{juAzMND9!edH}QI6Qhy? zt&mwj4jcf}qw4W>XHpg{+aDpnGx@~IP5>NZ9OEJi4BCukSzvt|r`a*}Z%M^TCn7{2 zUAMYzT5q~fU-I+c)N*Hn3=IvH4kHs1RBsyV>!-cDKhevk{G04)MP^l{6mQUD)ApcZ z@u7DufKyW?U=0+`napzJ2aMcYrk&kAp>UmT+ECu@JYYY5Vuhw;Ne(4mrHg!37^)Fo z8hNmvQIyczN_j8! z%^{g+^YN;S_3gLlY?de9E1!VVq19Gh%N@6KhpzD#pG^~QrP|CimE!X$yG;h!=9-ER zE`~+S=7L^4T-{*3l^xxryn9^0t~H$VKGAy!81$BZ>M6{2Nw?fSq4$f_I0)dycV(?x+MzGF#ya6Y@HBltRH;4Q${JvNrW^LP_|xZl1NleVB{u-il7 zV{tcfB~i?D_<5p*Co5E`CeDH+d1eLO7drf-sU3S081pT z1+AriE>K4%F_sN`5ogg2VNRi3c>lBBeoe{%lb6-4TV@lh`%BiB3IK=ldH<@0vHk{D zLtw{vUqY1vl0ttx1U_ zSibFrip2)M^Lf2@8h2M@9R0r9Ejml4?ZBF_R&a{nb&9_=615-Segzin60r2CE=7qI z+n}NG1QH5==f*VvTHsRf5R6WYjvZL`oeZC}Q1`g7#Vl~%T`<(PzFhpzfrY%xMul|@ zVYC79T{A(JsHm=KE|A{?khi+_I-3Rnm~KjbOFER$)R19(r?kTzBvu9QMPpLsi zVr$?UdhExvI1r^fEfK>1ER5p8R#Aml-y1Cf%+)gpIL)B%vPU<{2 zeLm};5zi{m-wr`+4{1(Jr9Ty4*=U~kf0MSCPj-2{evH$ZGdbi2qOA?wJ%!xXx76Ez z{!L&_5V6ym4|oBz^g@@o*+r z8@>^y`uWhVs+Bb7alN6QX17<>=W(S?0@uXyHS*r1&25zh;pcsa*8KZnZcO#I-{bVl zR~&Y8_~4`5%q$^?$%#DU7SsEUnwDFaB?rA%2&pag3F!Oc?L`9hAJ#f6HWmC~4+Ay( zPyG*tfkr#+Xw(rXI?Hp_P7VVP8XY$UNgeO@Kmh?`JoNLA9-ncBahr`54@STuKWnyDkR_eyiO z(#W%D8=Q0c&b^sBV6dO9C6tkUk`9)xu}d%U&ifmd-XqhNs{j^B1G$J) zDsGWp*=s5CTwSFG5y)&2tEUIWXd2yLnm1#mQVt@9#~~2B(8Q-lowoHOf9uX)-~B^y z#3?BwoB*!<$9KWVr-hozb*EdaAAIBCT+3D&+_n_cU$#CS{1qf|E%n#phj%tkYFbH? z54Jud25F(xLT&)4ki&;{Q<(`dL`?|cJcEnw>nH&zKwE;i@$Fg8v-PVL2wnX^&M zrjrf8OcBHg4?Bnt#0;&S_*!pC)GrT&!F=J-O(Pc6wQgRT+1KWJ06VD~mLp(q*`9jj zh(ipn27Q_1D7*j)d~e`T_yC+zFVE?YRt(Dz@&tMXm&%~XKq6lL2DwmSEX>pq#B^By ze1afo9$=dQGzC)l@bLi2)PfB3?|&zJa{>ndDnRPpTO}OvK!f@$h((?D-xIO^l^A5N z?+@yk1Mt?tw8Xi}3!ze*hpn+>1Uf42Wz4^4?IF7Ds>YOH%kzo%M04-avGW^USeI%}PcuN;o<0LMi>!>? zE6tL#I|Vw3(QXuR$*x)p&1<(I!gEr!{sE3-e*uqpPLoLRKTT}DN$}oeexq7S7 zQS8XYN%iqY@Ugz|22b~=0{Sed&@jN&Ad)VZ*wLtpz>7=E(65Dz< ztAM&me(dnVg-)`_#p`6x8&eGcXW|2ckw3bwoBC5;l2L18W8s7x6*qQSoJrmq*`!u6 zbSYKHPrC0~EL1lcvkv5mULtm_-JUQc)wH&Yo9~=|GnxNAWE_Pm#AQSS(^X|*6**R2 zHvt%ir9w3yfX^uIU>mRq+;{hqLzPIyLVdlaPDUw*MexEKG;uyUqXfORXww;RqM3WI zFi$~Q6fOoD?Wc3na#Gep5%)cUHSmy~!PxZ#*at|9Nicr;wqTv-v&O;zR}?jiaT@5% zQQ1R9A-y~TLcOD+1djRHr*Cx`%ZaY5ccaH98IjvzDsb98LQz1B5TJTyiY%lq0aQOU z6K3+1u@uZq)iELD^5JlN_*3{sx{ZOl=?saORZs6@1F+0|jJb?}g`Vn8b<9*K+Ym!A zJxmqg*Q=F%1s(Zej<~d6j>JmA`7&shvG75fzO>va?|#EROma8;2RR|=Qg{MB6I0r) z{wANl5%uxq1?J&N$L|#(pj_XOwy~0-e6bhz+L>I`OW|o#1kAZbx~~&!9#pU#7d+R` zp?&+fp>FWRd2wfTWcYM8l(%#++~~Y1bfz*lN+qG1$GNmGKylo^ExY(yo&|k`SrtC? zw$KXP!HF>FQQUk1p3M=YM}zbLhr@)c;qXzR)5?V zj*R2o)C5KbP5vF(iWRtiHz4p_pdq9SSH@8by>35dN3uXJfs&*2CZLp9X#Sm%xsD~< zB$cg;T{JPmbs~AfU-agE8Z~6LT5MBpM)r^uERm?LPp_<4>I1YeJ#*b`pcJ{$d<^`% zDBW8=Yp@I}EzNak!j4_^Rvda|elmExDBGuN-O{T}rfy{{?mqaq@5czN6SK%bQbPl-Nk+jBmYcY+9Wg=Z zsfx8D1gHm50GQ=7bQgTf=$|2ja2w}2vqX&sEOMH=E%BTi_*bf0^Oo0$2&8=^lZCBInqRymRJIee-;;X83)}5An&$NS# zz3t}q&?eC2>~Q~^P4Wi)WdNHQZ`AY|ZPfWT4!>S;Ig1Ui2)~i>SllMsaoF3b1lZHATGqO$V0}9N5R{+2iZ={&qwTCc^*D&dPtNCezkH_ zCH222?1%F~Xo^Yf3F{m%hNOZ(qtNn}L2e>b^k$$M{04YFN*;*h4oRHy3I~_?ril@j z=QJ`pBWTnRUppAdG=g-EhZy)i!uglE;1C(WMDKgY#Uk0=1uJL~atnfjghvU!AZTKY z{fNo;h+GGfdZcSatW2^PLQ_K*?)D-Q#Si#USZmIM2oeLOAfdq!id4;%Ju4K*2n1M! z)(WKT8R&u80X~dgUmX}$IID%xze?HtS`L~9u0byVYOMV>)NSNY5Ge>PgK1eG@0Sm% z8rEl!3cv{v;Gk6_NrJb`2M~Vp6cDbjTE_G8oZZ{bHoyS3DQ544&JzJmL+KPl#lei# zAj+!s3EfXeugwKReNBA5re5%qgXlmja4;2KFF33C-nBO=*|`c%p!$>$bg`tKqep;RA_K+4QMO z+k?JzXi)wgbT1&q&F(UuOcG&^>}Y2W-M;GUzw&U0+*Yo>T;#4^D+-ytIJ!q%8^U9i z+VY_z^}HS@0f{4ySU79;5_N5?pW5lX9u6W0<C0zqCa&eI|r#3S)z@9U&~KB?GE=3o@+S?2vjiN02}88qN>E z+I*F#D4(lmz6!8o*Ml69(!k=dUsE@V7CJy1 z1Xqs_2X!Awa$er!4UkM_plJu!Tb80iyEVV25*5c2PYKwkD-c6NDPWFzB?kZX6CoN(zK8oyi`Q>Cu9GQx~tMkX5eDszl+IA@wI__tw3@ z%!L3fH}@4^Ay&)%CL7@m4MobuFQt5KW`}}J!ITH0rfKQ}qW7 zF(wr5O$Wv1l8ZR&9ITfN@K@ZP7tmlj2K}S2O2Tj+nE>+_5x$UGytLh42EN#f!&&lk#?Fk%^Jq%@Qd&tdZ zT>bIK>IU)ttVu=cPcufnS!MLxO!s>iay&XM2VWXDCdx!+FJd@2!8Mp-{dy4s;%N}Cdys+0ML zl#pfq_Js&QCt5OYdnz6|yn4=qF51{~NLuDDJ#p$^P<%sq_u5o0q%S4vMnL7_uP5IWp2f2 zu*b=Rql{l-2?FN~M5xHJ;OM5odMq`QvrD0j8g&m>*C7&%gQNSVoySz?rG`ilptnkq z*}>tIpQBM?KHUmd`^vjU52wFM;)S4AH9?@;zDGsY`=-1hG!B)s4qp!_e*e~8d>&RKEX4>ufDZU9=QR@i=VB$^+xGM3G%HiWx`jLS1`$%zUmbh&LHWL0 zDWSn>CA$Q@h{=~tO6_zukQRYbS#-q;*-TE*A%sgRKDQ$Vv-esj{sRvN;>n`camBJ) z55KP`l`ofgtL9pFNi)^1J|#E-wzezN-zRIw{a2Rv)1YLQz0at#N$G04w%=NH-e33- z4$nq)fpE-A_tmcD9S6ra79`K@XDfcCAYae1R&b0O*=Wbo9r8@-%y?oFZBo9(a7@y+ zk?YAi5>*S2pkom}8WkAzyZ1Q8O)roC+Wy8UQKg9U*(+L}e0IUX35gRTV%sa$;rGS>T2;RKo@w3vnlhn%U3wu=jRi&cIi?9_ zWUbB2w}w`s_ajP=U6tt?Ouw=hGPdrJ(UzaD^>Ahx(<~)s=uM|+`y4cy7IQ04Sg#(l z?#Id1>ToZq)AaOeV5QH?cB30wX@l~x%4BU7IUAXvPxJ8xn1Wb4f7^bP+b;QP*CBJ9 zMb`SbJ7)7d4jibvuuf-(dT-5A_I`mhQF?vJ z+@>jiJRIA7dRl7=p?N$?-11JYqkHi6`Rf=&vADp0@4Z8U@4jnNRem8>kPf~oixJC)cP-1^a&`>rmiA-o8Yg#WM;=c1u%)UZE2O{YN zd09cCZk>Z2>tms&qsV`+0EicUl;C`3^qW%1;G^7U9%cVdDaWno%HMToXP29{-&_9j zZ3YP6&dh#Fb^aSmvT4(rTt|LJ$I_xEjlqR2b#5`o(>4fvvGzI0Il}Ysa`O~Fttw-c zTc@_VTwQ%F*L{6=*BJbjwcg(S?C(;sh{{WI-jxJVfrIU{%SNy?b^i)e1eUPy#pIDa zAA5;z+T~dN;h*2uvH}KnUk@9Ubv-XNdFV08>Re|PIiA1K<#0rvTk^bUf*@;emHp%| ztY~^y%+~C@8_}iv6r1%GunlG2yosR0JS3;Z+d+R*y6yqI#uP4H=lD(hY5fN61-2@$ z4Vw*UP*Pj{?Q9sCn-Mv1j3fPOd1e1_X?lS!p_{=60F4Hk;pMiRy5trN7HM13cy-4y zh3CaXYfU$3YEj5w@hK&&StbAUqn)rYjCS(Q^0 z|LLva#+MY-N~WCzx|^R)f7~myKS(E;qXZw6hKkm0!a#cXHDn}29fZXqwn^>z3mF9Fu?F3wi#-g)pXBl0-+l|F2@%z7 z-e@%9Z5d_z*zI8X@lHNwZVo>&N2i7RWHG+tpOJy`h0r_7ux|@T*INk=r|73wMB9@_ z?omAptg=@&U8nCYuDVB3Z}vNLi=L8i#M*35Z7(iyc7tP4;D}*a%p+`ee36MJEfB9x zjj--MrqO_*#sfPN%pzv9yVC4NA_+h&#pe{x3iIJA^;2VS-km+cRCoPJN9zA%B6i0Bp+v?$*2eNd|IkpwwLiY+)IP4U5^pkX8P+@PXfP zIYR#L9Y%cjvtj=bi?M5ZU_Qg2NBk-gZSv!bWy3?P% zpcfTQbG#N`sjv0YnkxAN2xTuA(BLO{53XbcyKP;`Nt*L|@;N+KScY|)Eoa5ybwKz; zlAj0E9;|vecV~A&@k0lD3MNqN=}T|jPfhqg2_L<<8vE8&r6VM0*GEFB@I;irLYzH+ z?X$K;zljEJx=7<|DiTC(U?Z;5h)T`Qr;gh|F{078zJ8P`i2i6z7uQ z{t!R{@dl_m8!7<`k?_#{xZTbzw_F#_3-LRFHn{VyL z>jU=#^y;X3IdA;7k!XuucKhJKSS5$atvrqLa5K8|&EiV4HK-L==1&gu)s%dz$Y<}& z4*nOgfC83??;mW1NFF&b@V%J@-}iQW2WRw2up#lGer~%9RrOsI!+?X4;SunFi}{H# zco=DP@UF*C%C`@&uP{=zXUyi?-Tw!b|J!E2!=#rymfP3!RcJQ7y}f>K36?U3>R6?F zTP&6XrOc0`LH5$X`C2phykwKo)xy80UsUv-VJFc=F8QeCL{izeQfox4OrUvtM5JSJ ze>R-oTj=$p?VkWUVXdcG?e3&^92ChxoZjJEBoW?gY1jg1C(><4e*uhh-jGR2JjEOa z8Z~@IOn~Z3-j-StqiFTD-f8}!737*TQMN^wm@?(`0o_8j2%vJf>Q_X>m;V|4C577H zk)r0m9yfrinmd2g`z$<)TP02HtmC)O>9#B_VtYPaKDNEY#?;3T;+1(`Stx~J!lmH1 zxDd08hm}M2q2^#NRgMoX6j+uL`tg+^-2K)qW8YteIo5qZh*&0Gl8W%R={ouK^yWIz zy1A0pP@7q}<#XmUgzeM_7dy&h{E~wjbSV4s=Xs-oo9t}^^N)%tCCcLBhhD5w)rc1j zTKy1MA9t=v8KJ*)^6+NTXX=);V1oHx<9qRz!Qkux^>4W>zgkF?54&ijexLy4doRx3 zm389+dV@1W&guPi2li|AAdb(oi5T)R(^NJ@nQxUNy?4i)lsEh~QouBtwdVV4tXvEm zBtcz|EgGn-6vAn%y;Y*F<+8pU@-j)+jnYVTVU1M6wEQ)QaC4oK^uzu51hZqaeHJB? zv`-w8HKCVy30riuDUIqn>aZ#~rIAg9{7CJWWLk8rQdC`*?G@dhKT%oJW-m)u8kY)n-IT;=7ngC^MVQ%)Q2WUH&bWax+5wxrD8vd}K4JhUMl(Gw zui}&jKc`&;A5QXSaFuilGmELkCpZ?s%d9LjiVBWTXQ~R4ptvk7m%-LELIQ9Z#G6#H zruQ?H{?w+sny#lD(q{h9e0=^mu3lGSW}P=slY1IE(PWmKzYMt0KmVmlxc-VRXN&Aw z^CDiCpZO{f6Z<&&jil3J!$432R=EFzVs5-FcR1PTYHeF*_r0GCQAlx5(dLZ=YQ_5m zf?kOuO7V1-ZlUAcTyOz)^H{)46}*Z&56fnI^FK|i$XkRN#_{p~W`15Prr

j8N7 zqkVaKxz=j#_V$*?QS%={=~4O9IO4rqY3ZK))rRc_Pm2~deGsbN2DI(zQjQi0bDige~{AVh^?z-XZ;_qj`M@T0}6?D72f$xz+on)LULERJVzs^Q}u3VbvG3 zRh}zxnCZG^+gP{==={VJDyLSi6cM%h?HS?iPp2J1j zS;d$HXYOAqMEp&6Jkf3+nSSvf)NbzRaFzvt;={QL)u zq3Zdt9|Z;-;ZNn{*woAjx9cY@eiU`MWn80NY&H69BGY%sMF4@}T}wLe6YgZUCVIE_ zuCFd-D`vw2zAu@9q)e|$E4(+`LkF31j0;@8tWBkAO~=qHFdMWt-8ALU=Oq8idg)%n z*7sRX_9`qm7(==#QOmNA71eSG@~=!+Xi)&=nc4z9*NJFj3TFd9T4nT;1)sqbdS)%1!w%D0GrT{%c!*rk*YLH1y2Qi1FGhjWlP0Zu@#GL z=EQrg16q>d3?`R-D1KJtCQDe3+w0f%R~g60y|pEuu+v!90kMp}M%ThGosCGH869bs1_JvfnvC&t0rLF!d)@GoCQL zx%3BQKOO&4u!~4bs3=Y;yi`-I$y7IW%3Wk8UDOXp)A1qde>;D#mx_{7wbR~I5A*p< zyWtxnBO`=;42wb_RzKr~v2m3;)iu%kUSU_`s)o#DF;usdAjd79)`w8fF-<`?;GejmKJPjGhExvboWrHc>86$ELS%*-SxVRXn-v^^_G0@iuV`Wh4+u z9A37R-sBD+2MMag*6ra&Rtjf^ZglAblHVn+#$$cip4fea>h_07j%e}8a4qMI06ms9Aq9LsP}GiN z1Qkg98#2@)$c=v0gpYs$^kG^N3_NFHm-wH!G?ulsL^=wN7P2<$$lPNbVO&xs!B0H#cqEA zrxYorr)+rbI9w%s!D{*UEsxn|*FcP{%7mt=XQgU&-N6vCVnH)8dtlPfFET)N#j^RQ zTCG>8SXSNZIj@ZNz*-XSZcPnYU##kM{=zT6>kAc1>GzMzQZ{~Bg-7=_tK~PBb5;aG zBMoeKiRM0Ty5*Mt1i^m}Oa#A4JF#t}$y)SIu%N(MtaGc z!tQ4nEYT`$y6(@ZCXNajJwI%vwT)X>&YVt3#%g0{>pBpNB8)Nyl78(=FKE}8) zOBnH!91Oh(phKg6qQn2c17p`k1brUw4M-le`V8Nsiu$zbws~VN;3&|9uQ33PI(Fp3 z))6=gZAiE0F`}}-l-5H>Ej^|UWEFo)}(xtqZhtX+r zHr0F-y)4DWA>ph%{gY<|{gbv}TytIX$%a3> zEZEDBqW`@bc2euN(fNM%PP+GgXpL4g>vs%I7CX;V{y$oPfMjA@DsZ(mvF!$}J?7qv z<<2;r5eDWrZcwLhZ&;exmolAxf)?q;P$p~iG2!f8Ke|k5&g}Xaiq!U}47ntBHx3vX zl~onf9wufPrxG!MP&%4W6bk6iP!#r;nK-t~vfz#)f-X`R2u{pxgyQ-!6~ql->-tpi zi;A|r)KJdvXC0Mn5SU^Z=ABT@9~o&eGghCfEXXeJBwpD#Kz2g}!-t_z7R#F;nXo63 z97}$GZmsZK?-|VhRMNYPDRya)CCqwb?I2vAfFASNxjvWK-&oK0e?sMTGW5SS->1lH zF0Tzgj5T81yE{K(Wc8pex?u z@Nz3c!a@=6)J7bHh*bJ7&vaOY@S*!#gD*~&h7I2Yy{nFw5q59(SrIX+M?3vNv=8-OW5|CvpFQF;x%{#_ngbr&Q8l>d^`uP zyLM@ag88n#uwg@!5qA22pRfQsWt#0(uX1W~ah8euf{W!sh%G)FPLTS~1+b@Ip(i(6 zbg$v!YNZzKyO&dj>*gVU*MIjLJ{EJ7g~kNuEYFOM+56onGdmB!m|EkN7%T`D9MlwL z?4*LksYm*U*=pVy&dNFYB|;!oBtSs$69+Rnst+>aE70AqJ+#c^pBm6B_3~TkXU!Rx zQmtPG=yXtn0`VlW7$A)dX zRpn3}rzAq*lL8KU&sy2~S<1XVG=*vi8$9H|5;-Ay7TCUZh;M?zd+T9b9h~_>-D9gR zbK0!_LQKizVDD;rU56TM3n)wLZbTI4&u z2+-YkC}%}!jAjoA>Ydf_!;?v~W$o6Yks@ZrnnMA_rDqFbh z>9)(|x6S8ktI1^RQ1!CE_=Yb=Sk{0H$Ak5{fyE@Q(`1{aV<)`re*A`=o|xdB70?q{ zXe4<^@|V2Hn;rh)!84(SWLm_h?p+H<3}!b%7SDY5DjE$5dk6be`&+wbeb%5t)kkJ9 z6=6Bxr%xXxWy3W_f6-`X7k6qNHw#Q0{n?_%JY;8{%3Fmv#~jimsp-7GIKum}DxEC5 zHAVQbzyIFrM>Aiw^3iC2Sm>+K4?pe3k$}+M-9!klCz#C5lfL3l$=Asw9>-q zaqC^0;1%VWt&zo1<(shqvZEHo+9e;Qf`I6dLQiV_`qRqYHtJ{O;mBn~HU8MAIBtHD zcgjR2L&PDaCL3htOqn2#jhP^#u}RN3Y96zr4wkgWbo}ViaF&NipB2MP3UtAxp?m6e z-7s**|8ymOLZ*lmiSb++&3fCf61;vwwV&zmdhFulYG|&AdhE{007SLW>pRKr=Fa$( zF716huliA?f3@r4Tvj_1Ey{Vx0KitHBQ$h18N4a^1~*G$?mx-e9S)T=F}?UcB&J$_ z^b7e;q_SBVOQY5%wlMT7@!@3_@piEDP-2tCaH@>{_1OHu3zq3ec?oQKGaw$IiYEY9 zzajWnrjNAD={`sp;~Ah(=b(4EI{WyG{bly(*u7bTH)dtpvV&9Hj%So+2M=C3x%1~K z<|4QD;_7;l>C?<3ZSscAchNQ-Vycn$*VSLv=ZY-?sZTs}#h3p!TG_ho-b^S^=2p(K zO(hD04RK}T?A~arbEv9c9Z!GM6@9Up+nn_f+EC1QWo~yk$(5<c3*s-A20$$oUMpRkX=2~yg?brZZP#*ua#Ltj ztyK~$FSoF%*LnV*Dn-<@(kp25MG;?pBu7nAd%VqPo znRvDQZI@sq+;RN;?uw+U$@uBy(yi4V9=d2H*!yX`%3+|Nw9$73J6byurm5LrK!plFvaRZ*+UnLFF&wi?p84E9L&-h@8Lu zXhRKh{1kiQ_^>3 zWv5F$j^EYyUHdL3md@k3IX+b%0$;)u^~#~T@tR6#?8S$hGDdsx)bf@4RX?V(+yY-I zRK4wac8>gB;~k1$z2Pe}*W*YOeC- zY}%z(V!>xN+Y+P!+|C1S&P+dlMm3^289B9eTRp`V(?onNq>yazByRh7yLbnLOT~jUa+cthWiHJ_PBM_s2Ekow#h#faPPs6(ZP;!H|kBoVTR9`T=z2Ji~p8m&ilgCU>2dZRlIGcuWC_E zekR1KPvv5ek!^fl0;BVSVXV|_MXlI;_D56Cn%_s_9|7A9|2v|}Uj94Rpo#ai?rA6I zr}*(p%=YZJi2+4zgOJ(S=u=HHbqTvD(#J~4w9lE*2`IHMreg;h)V~z-prtm8x8IW- z)Cdjq_Y3XkfEk8H#{E9LtF>VpWLjGwL-sg92^c}?p8PA_4z`U(gMZXA^j;(GH8({>q=WP@7=LKkv<=i?p=u)X zE4KcY&crMTy|H2F=MSnX3ee54aXVc!;N5f9pA~(*?}7%aHfuuVb+AiPcxkb&`-iBi zmalE%%fP+acE|f(iGcs-+~CvE(UF&zM+`y&i2Zk#+Md#cJzQJk(QU|htyA6@eKIx8 zS17nY9biMaj+2w;o*u76WBRxd2^)1w-X3rmNZ04VDVfKL*ng(ZM)zp3rOsx_`*MFK zBZJDu=IH5uOB5LyIX(-4A}yu)+|Nz_mTK#|Z4jhwAxw&~uN7U>o_34M4Bx0rNd>jF z`AA?BH@J}ZkYJ!xsLhXJy3G?~poIT&qC<%B{%pq8thZ207atYQPhALbJhkARkv00R zKR15%P;dZwY*|~`U$}lk$T&vQN!fXssRK-_4OO8UQvHJMGhoWrCh z)Nl<#8?mQ#M}GQi3o9jWn(}H+SF8E=TR5%>mb6g))_V8vkTXGlDV6n~LvW&5R~fRZB%2 zu|)URmb>t{ozf@ob)rxqomMhN-OAir9c*ihi({b4X*ILjIPNvM&whcqY&hDV9guNx zN{1KnXnr*PNw_I#?j0u9<4+o2Qp3R@U5JD!QiMlUxGoLv3$}Q7`XhKYy5}WS)5FCC zg+S@eTV)kbW9P1zC6KNNf$5}K9|=k;>#+4fTzhbFwNg9HsAkXfv@A$F6{Dt1s@(Yi zVOaI6Tt4)vV@fiYmRB{V?Uo1#mp#;;WwWIiN7a6pPHdzdpnMA;6m1^&FwwSUgBifqm?_Yy};FV9au%4KZbw z=5e|rae%s8e;iV#oFLT1MU#$UzF#SY#@9dYa-i=OnT+6EKt^&Ca+2u5mp*B} zC*ta&hqSj{%(@e19Q7#0dn_)ua@4d#>ci zXlT+I^hmBEGQ-hjz3`B3>-o};G9>Xdl+x^(7Tpa7%7tQgQ!;_wy9H6MwXuf+kgPZz;SSCY!m&tzRE=6`Sd@V%IFoGaJ@m#el! z<79Smuq8^$Ld`mm%$8zyWNl1EMX{@P?}*pr?{i6Kc%*cxWp65@pIJU-d@Xn^2tV6B zHq>;X-6As+?6iiN!SI&`>*G9*-9m7645+ zEP6GXq#M-r9fRwk zj1(Lop3j2w&KrITj|LMdSR@xr^Y@m5@ZtwC@eEiGG5z4=&CM5EG-#2g?`1iyyx{?% z9pLX;v@Uaf8=Lr0-^GiklI~D^HeD3fg&RF6CQhFP8&wc5fMqHK7IT++SMgY7S~1*vhOkR zm95EURjbJ+6R>p%`wT)OB;>c-_zjSGRn*ij-S~hzw}e9v3nD?_$*ckkt=UQ!UhcyG zvo9z_kRsw`3KyfjdjnW%JK{;g{gbiG*M;0xNwar>6o`6GJVQs#j8U$wls?iQqOp%EXd~HIEL6$Q{0yBrT~!XxFUX$%+2~|$ zur7*@>CCWMdc6mr2O#942A7tG36acvWozMN*-8xy^74kSA{Hl5F8y4f);U(p?%nX$ zscJ&|5-&Ud=l`1Bmy71vxOjL1cSYj)af2q0D98;yMl}l)6R}+e2fVR&F(SFYm&SpY zqInc6{3+yK^g>E9HL)=jkxoKpjEobHwjRYYMRfyPPjdci-2V<(0fwxf_}|w6KhKPO zWlNWyb-RdEOAwdJ-^4z;i}_5ZSgP`x5gel@LKa7Zd9Nb&_o?mL2!#p?A7MZ~u<|=| zqm}%WVQyFqkS!mHDMu;e7Ah z{3{z?p%N{i>4{g7@pl#G-hlx7${eMyAdnMWrufgwW!E|z<~OTVGQ}LvZ97j%(%$|5 z13x7VN=Mb(`HL(1*Qhak{22NUckARtHou{9*~Wq`y{_&mmQ)mY{D$zY2RYz;49Hr+ zK*vc^BuN97uv(RKt_mLcf4xx(Rp_w7K12Hp0Tb<(_3v)5CP$HAENOdA-PKsjRv4>P zng{|p%lW989ZkQ6m(PMO!$(dko|n`-J^$^{-bI;(EKzHXCY zyYkzL;GvS9nPn?qEwfqZVGl$^tKV(PX%Zi9b35Jj!T9maHmn}QMcbw3#-GM@!!l+s z(ym21dK0trSUKgLJfpI7`@}wEwkSIteYvO^O*jB|pgaQ73uLECcK(_hZ#E#Aa+kP9Dh2ZhxYV@ObK$e1Vr zq@*Gp3@S0rBz@91Ad;Pi24sP`QuYtgFeQJmp7CUT|Cz{T`G$=a;B7PM_bCOdw8_w* zSRupoB`y0(*~Ji9o?sOOEBd$L(q+3~xuI+yjS>CxX><4Gz-X#>PY(U)M>XS(V*AQB zgfpR%7CW5uE{VHis)FEp9DA=X<1^M**!#e?bf+!%w76Up%)CwB*tZ{{Qk&<+4x0Sd zUharuy@2 zUMKes8qo=Z*K>QngI__mV!1^tK}6Wvq9NYEfnns7$L<|bfw}rdF+-4m&P0M&v! z=-Qj+LLyEraYrrjfP+;vHpzomW}Wn()?K?O+c!^hWii#HbgM~{d`0qVZ?C2xCZz-r zhywF`wWT?YTN76eQ|6kGMWm#6u@)Dk6n7BJsf8iUy+-Jd<{|OlaN*yhIuY9KCv|-y zId$mliw>iaCMr*U>48qF-T}`I1b)DX9o)nkd_7$VDUqgEZFazKllS|quzD_UDp%>p z#%fxF5WBJ8qb?5Jex*-e{7Y}WISG%sP{z|Uyo}=b=y8Cx#u-)5@JkltiC{GCeFoEx z5|q%W6QQGxw9r(v*t%)Nil`dV5vf3eCC=7*5eMXesg$Em!e>TJ(ar94Zyl+y8Yir8 z!DnNx)bRG9fA6IEhapjm@20}zp=$n?+ffUy2~?o(>SX0*N|G;;!tq>T?)0zs_*eOLtYmfS7PdrG79=V@Q?juX}8BHor(VOP%BwgY5Ji{bXy{A_n2bi z0o$RX=@G~@yi08X5)uXqc%h!Q5KuBf*O+_LfryHH`OV7K_>YRJ>3YlyuhD#SH?{mc zslO@S_2~Sjp&m*yL4%)udV+^fPoFX3je8H~_&Kd|-|gi$^DkJ5raJsJ?hk1UHPI|_ zzl~tW(phcnid}S22Fe|6`-oP4-HB(pl&$=dSQxRDaJl=*PG%Jj?j>Bpv481sY4!qB z!oU7dsk3%yTXq#~ZCy};ai8aSttC~$Z;X7;_p0!Rx`+bw6;zZxl!YP`i?!cz*s$9H z2MWq24$C|%*Oh+B5_Y^5aBM9NKP3F*+Izo7l&m5=nbe6YNyUJ#*5bVIhc%DW+bHQa zmb>%5YV&@>M$hb>z2;FzPpfNk37)L=wO>4IrbJfyB<+>BVJjn{>;oeH7t=ce0kVP5 z9$-L&+_wGby;y+%Q@n+KR6$|Y8j(G~5u}(H?SXrT3f`Ga9*tGUT8k8fSgqtcK!Xa} z1#~-mI~_*L1j(}n><4d(O|Kqz2@sZv^6}*T9n7AN zk9!z*XrVxoaQ&Wp8~)k9cr{qUmhxs;Q&6nO-qSb{7o(mt_+P)n_@AWXP&$9|fEVVU z{Y7nHVoq37>po9L6;Cvj<61=T^lgS`6j~-RBb46|{xxFgDAey`@}q(dBijfq_m6RQ zt0vl*38)|tf&AlEE*XcKH90Vszzz7s-G z%Z}x*laTc7a^BG8mR{#f1k8p?T{7-d)oUP>VgTKTbLj)62UcS-ht_C<*Yj@T)-Uwm0y^G4neK^H*9r29!_4*#Op~N>7EKsDt(QKW!^U}@zeLvY$ck}YO ze37;3GvvWH<}(av!26L=1A9v&?s7fZi)EhG&0bKHXJ)^sY_ZDNhnR3=Byts7pCm_{ zi`(_!&`7j%Sa}|qDmNZ6d{|frHmWv6eXKl@AGo0;Rq%nK_0n?i*R|eN zXL*O`;qM|O8-s<|MZq*_^YLZ95-8-ZQy!BxL3XN5k(p)(fBz>xd2z@~M4{PH`r9Rj zJv^{&6NGt??CE;(`;K(?&=TuUS8S$yW5gx!UW4A%t8LHq?JkQs3c<%oJKlFVf{uZd zvZ#W}(z6!ZAvP;DB!7N!?HTR(_kU6@kA}d#L*nuvmw%U&=jq#M`NRNTN_6BM|A2Ap zEspYNlY-3cwg{`|?U+)t-m{qL%SdDB!)A4MPZ>e9p6SSp0ZJY6r1njB$QUyTWzfWA z;I1=M&I=-ugkmZ_>yEuhVqf34r~PrJ&wzWU(?=?W<9hGX@A~GT8Qb-2kn-ed-| zK1by)*XIEWmTg&J{gLv7=egf3`FJ6h+&reVqw`jSOf)6yqxW*bNlXVeN6ypKAS34s zFV-Q?SDn|`9Q;$wMJg z@Z0~DFw_SGKubtSAO#y47?31&SQa7%>7{8@ST(=>dvJPFv*0oB-?6(;cWSsW-}pM_ z?Dp|fRW0+8STEIobk$2Fg=~~e&e!~S6eN85IevnHZLAm{ilX?HSw-@Lv(9k;@M$U!C=j-gzs8y=7P+O*1Cy7b)wu#!=2 z{H8D)gz@2#h;D|jLhmPKfc$O9!xFC zT`I#tpqwnhmLopC6j51bON{wfjEn&g&4Sh5QRmWXbsZmna-q!lBJ~$>iP~PMRVtoh z{Us;f-eCrkf%QIJL@ZfCV{+UN{Bv^n+qzDh$KT%$I@_x0e4$Sz*dpw1{;3NM!fWx~ zKhsgwl0}!F|6$O=0>1q2yCp+QvBJ3d%#~jp%FFu@ol0`}p?%-?M9KR!CTyR077c{v zU1l=3IZ3)rV62YyM(8xoTILl^>i83lt=?#1wf#h_ETteG?lHqbZS{$cLP*H>VVL(XVtGt>or0Z-< z;rKmY^Jx3sRQxh0^ORp@e{gj(ZQZp}1Xq05hDc#npM5bye`yx$`}X2CBjKCXN|6Wh z6WbS}CbNDmvn6wPZgy`ik+q(%3_o7bOyE%=(BiYQ2u}t11u8ZRg7x^#OsZLZ`#5pq z|9t)JI{l=%2iD~CbPnf`xgY53(UCy}fe>0FUsqD%n#>>XJf}fIK6{M}M&NT6eGi>y z?rkZ4*TB#xpH=rr)SMx#`eSzwayPSelHr82 zlsD_$_FR68b&9iGA@WD&ik^m#Tl&U-K=CE;k^C8=dZvIAfDn|>VHhMdS`tLkI#DZH zExab&*pFWBbf}qbw>uO4O;oGjW|}Q**{EtSkq0?#Yr_l$8)85JMZpdfAT}#QEuSzV1e_ zzl}ycWBGF?V!2rU&YgUw>rLOavDvqU^L@Qj*z^oq_P0I+)13egv?iD*lV)wS4(95% z_a+d^ug`D!eW1-718Ar6%ObIVAya{Lz#GA4wWZM8(Av5ZtT6$fTX7=iTlP1#UBeE^ zlQ~v9Rf$@wJGw#{rz9I#8JP^-}>0%2%bYUfhWSN|jM2 z_;={YY+jc%sJLAT=s9q4LcEUFkM`TEv}_$niO_ODx&2sFgseOuPbIMAbHiy_Zehv@ zR<`#l9U6=RZIrDH9C8!O--JP-y!WR=4W*lysT?LDiNDd#fz2@5)#Y1?`-^kAbH`8d zM;)7`MC$V;QBjpn(lpdUbUVX{v2#c>BxrM%ekDm#{$_w5&PAlkHlv_i+JrJ%>(}|T zW%>0Y^J2<=y0A#_T1byk-$<0 zEc2J_I`Y0uAQB-pD1xQYQ?u4twp=pr8G)(|A4)N#opEzD}mf(D}98Dz&HH3 z!SnKk^crkD3F7AI5k_Y_Xl84=1Wp6n39bkzY)Mqvi*Zvq;AMl`PspAAI~~l z>Rm%l)|!LJrW)(AZ>AN8zl6^hjVUE@bMHF*+|cTL7)2p`vr3BU8Rl6^nR);hGh z+yt(4=ZS9TabiX<(3MQUnW}H)=igP#8u>lB{)Lwx?4?CJ+v^G<1WD$&J}(Sw+|#f$ z9%9f#4SCk!DzUnDp5a+*gGto2R4pAuR#9Znv&{qpNuzq17`NV2&6(Ni!Ow3SGZ{;L zd4|P39hAnRG*MR~&}rM{xjoOyqMYzVrWg7**b~iisZ_aGJPHpM1jqxAj*kO*c*#xA z>N}v^0N;Dim2B(lx9+=Ehmo%YJL>LWN%!IQ#^u|kU&(;|5zzBh~R}J ziQnJ#w&^y%+uSY?@gF;E1f)dMg4`})rA29=fNEfs^{o6$H8dqukTfkk zPgIdrIQHSRc@R7(>U6^rrl#Qrtej}-T9q6W?D?>;7WuJY!S}?boRRB8?&0jgebM2 zsu4d0uX^LYFcV)!hx{}9d2m3Q>s3F63I2-~J_DUsqJCm>g4_8X`D*y2<@*8>)BTgg z`K8Fba0f*QNDWi1ylZSE9OY0NRQOZ^aFlK?EzH$kZ^9W2CN!OwnJo9iCPvBkv$;Rf z^6qc;gqC76X13_iya9pmJ++Y*A|ZD>3+9dZiFOMuUZiz3oYcXbS9Exb*T1*4v@njA z3k#jMW&KWsE#50wk5=IPF6S-T?VbwJMl6w~YCF91_Izs`QWmLRHnT^DPC~zv)F~A@ zu1rhOU5u}r3!O9dm8Yn=3<_j|^tVh8%7I~@NaG<^U<3Q>LzVb#T3R3^-SCzLx8q`Q zt#Uq3K^7NH35@+5Kn8r0(j#D2=0EfI*`8iS$_7lL7SF5oc&mL6mX;wUf%38BI<#u| zo=#`7*v~16r%n$&dR#QE^zUBd*R5Hu# z@zeChs5ivfnR^1gaXobwKSZTUzy1-cHd{aKc1cqiyEO?9o#wnqeQJVtn*=W{NCY;t@7pVDWA*sqB-_paIiEG#t7pV`<&@z-Gc{N51+Maq>Xn44Nd)A zKGc>nwFOD1*4k}P8J|J~9PIDz4)D&VjelO9UBrcXjM`voL_fMij)($MS>O9oVCq*&j+DWntk`O3d~X1M3k}u zT?;rO%;Z8Tgpyl5?+i^B0xa^yeH%Hp?x!<xzY zsz3dqng1ft7f@d!LAM%h0-Vh*0aMiyp^evD6)`;bIEe8^Z;zIjy!qY7J|_gQD#E{P zAi~gbuTr@^n6K)2!e&Q(r)h2SHH8K*zEMt}=L>ug&ZJa`==xV1@b3rDquK0G2zcbg z5b-}hKgY+%UlXEOHVh370frpq|7siYGket=zZhvW*^EoyhNxs$KhD22e^WIv#4w$g zEQ}YiRTxdFP}`Rr;cb?AZ8t6O4qePTrxJU`39r;L*X|tgK!`SSp~F)~*HlAONP;J7 z=0%wDp&=>AKPs4oZ!XzYwW;Q(8;4aKsPNonsbSe-exttYC*rW$gUjpv*R8eW1^K97 zYT_U-5NN`Va0FliRVYB0ym@ln$NT~DnTx(|VW#OAUuTz9ROO3az2N%52Gi8z8ajW6 z|5!7G@Nqwd%m*)B8a1$T$`kS5O|InX$DGCe zV(N&62C0g9VU0#Jj*PY5bRP|1?Z-QIB3v9>>8*?n|GL%dM%KNuM^q^4H3Q0pPfz*= zaGp3SloYEKR^^6E@IK$(a+1HkkcxStl`fnJygH8ah!rZ7;gxd8l_W`n@UEnUJpxRP zl=PW&v{cF9Y|B2K+OJ(Dm91a<8-SlQ11!%ht$?zd`PJvky(0{E>=8%6df=Zur=T1- z4uMAcFWdDF4i1jTXxN4?&CSgK%mnC`0MChC!x31jxo@qkouGs~bTV4-JmG>!%7czp zq$Wm#3h>dhZhtG3qDt7W2w=!wD&n(r^(8f$Bz{Kg7htm3aRB8i5jR_Iy=sn9=qKS~ z9E4!XdwQcnc|1w_dE%NlP$}vmi?9ISq|*MhOHdRE{&Pweua>^I}P4htFvSr#=P{)YX>H)rKx21!r8Xs zswLBpHsuPihE{;#ZL})6>w$ppBkbtpV55whI!Z z+PIsozqmz})<9YH(3bi9XW5NL-g;*;LD;chmT2~_Z|(rz8P~`?Wm>xbj({s14wpJ1 zLXVsDmts&Y-Q)N%lrNmzOF!49teL+bvrD5R;x%*BV#`s=r@rmT`fi^$)IKa>&P@67 zHp}hW>go*}QZ8grLp$E~&BCNToH48dX-s<_aG~iO6*}~9aoQf?j|0jrnl;f8m&>M$ zSFLx`=8dpGEpXLP7_HHeRu384Noh&n2Blo2!s7#dm((ozk`1^Ju3>0;~FX%or6ndB>2uoD(T9de3;m8n2yZ08Ms zZBFo`WhV#6&vE~E*EMNfA6V14IMjcpw5yJs0$jkjFF0LL{VnqFpqG#8?X1 z@P6f|NkJhtT&NefJ=d{MX;5rYA;qdX2-Hr+%dWf4YB$1mm>G%J*zb3n{9GK0zk5{J zqs~`6a&P9ZEn*}MEK;-M#j*7m-+J&%hHbLab96Rk`X2y;L$d<_WYE~j|66#wwv12s z`*lMI`}Zgpz+@NY(9@ZTKyei0e|@UXaOE$t9wTF{^p|WumLFntyJWV425kseJhp0l z_)0>}@hF}SXd5`aPADhTJt zUp%xu=OHOVywl*uR@*AyWmzbvxllLlp4BQ(w)fi#UfVw(SiC>{FwJy!=JT*T*tz@F zDP0=6e6>4O4yCndG|Ua>*X{FAV}E}%pW5m*r8c^T+M5rfgv4`Q$yGGIc6SSXqy6ZA zw_Rxe`K?tH@q4O9e5#1I`G?;h@2 z+RD;$u^0<|-ecwLvP7u58=y1}g?Z4hN02f!Tsrjbw2{W-nCb+|O;eDJR_ZZ`(I_m7 zUw4b=!Ni3(G-Tx)fF+{waAUW;OE;-hLP3d;AWdhM&4Q}H*aE!kuA$uGFkRu zci9BQkYX~C-+qzBM~jPU0{>O}h$8pzG68T8eRE%s!6BIR!^}{R);a1_kk+ZOjb+Bw z&Y#$3#aa5^U1m4;eoa3~L^QG58Hd2XH*#eHr>Ofb5aHX*T;_u>SnE&b_ft zvzXMz?pbJF1sv&0rE87mAm*(`-?tM=BUdDvlt-RDV4vQZPO;V2UjGp-AM4-a)YjV` zrmeQsV8Z3{e&%e%6t-De_qATql|%y>sQolP#2fBSkU&YF3@sV;adW>3*c!j}+ScK` zBq^8W^KM;5%kHWRj7$wGV5YVomiM(CnVc!q&nU1?<*a@lKCfvP%$Obb!%V{7Eg%SQ zyHuE;?BD{^PgXEVprYWehrS7@?dN0?TbuKFDW;u{dF6`Yxytw%yI6-TX@Ai^e9URn z@!Rt%w}c&+`5`6P=LT^nmtH*x8ngq(;oHxa4Zx&$;V^AC>b(bE@`QA0^zz7W)qQ06 zu_j9E#1GnUi?Ba0!2I10)|UQC?~CBX@YPnIsv};bZ~Zx?FK~3FITZ|7PS>+f6(Mpk zDe=Uk4!7?MsHxjTZdpNSBy!QCYOu{y544D!w8(4fW>tcAv3MUokvLroSvb%2q*A6J zcV-KmFYaNwPhJp@<1n30dsfjS=X`Sw;ebk4Q0skmnz<+zUJbdkR#*Fhb^)#w(3iLO zo@$nMbKRtxtnV_+h6#aX5uz)j8<1akpF(RJvc9zjKqdg7+u=w7!ZxGsHdNb^9A*h1 z#@+kxi0wMppl%VcTeMw66#;jor{j5t{zcP(a@yL$duqITzTA`Fu0tB}OQ1@G&Zp&V zAAE)%-g(imih%L{B9-pv1OqK-R$MZdYGKI2l=4)j)2MXba;R!KF|`*kuFn5%&OlXH zRaK((c0i1bRZCQn!8`r9QLc0vvFy9Zp&rEkVKalg^L^Dp+bvboYe?#0l75sfM*fU- zS^5@_(emKW9VRO4=AM@=m7RXXx705cGNl~C=iuUG6eF`N0Sxt#qfbZEU#_1rLYTO3 zGnNOdpRF3vU(=A}ew$I?)P{D^8p5o!)ar40}RO9sd1(@BNE30r${m&ep?5 z+Z&MH%`8~>u5}SptT+BYk)8iU@9oD9w_!uzg!1i1UbI7s6}EEqCIXXuwl91;qhs;{BNYCcivAeEmb9jr@k0BAAS?h z*T(dUKwL}Q=}u$qPrSGOO8cco#;+(SCS!u7z%Z|OWb#>AMWa~d=6TA$BmSo8?ywjD z8-_08&ZUN9|8*;s$$R`IHIE9Ev-ju6MQd%j%8C#E)-rErMLAxpVvi>t7ST$^eL+Xc zVBp7f@0I{{dUIdHNOmbsutZ*KHy&1if41PRuD+eWxabq#uu1V`i3S zoUUddR^8Qqu)^wDzy12TwNX?f+86bF_Y`R4rPoL&65m9miE_3hyx*He51n(&`(cI)H-Da~l_P397Gz?Y@92c! zl)Sp^d)|ny^-A<4(9QJx@5T8~P8GD~UOQsd+P(eyHoNX%xeH1;8H{UZ3=WrznF*jYAOQ5}x8o@Lq1yf#y zUb}wit5|aug@F?ncdC)R&l?k4?SWsO?>pri@I(*!1aZ;{#C=CBwpxj^ztXoB0?my_ z(hBQrfOz&fd*VY68!Z>Xjose!m^Y7Nbic*-l%`oFWu5+Uoc^{?cd2%p)F11y|K%!s zz4-}6@I0eGKlMJb->As(JS9JKXgfK3i~oteU}I`&vQ!qxok)~d(;cOxad?+}bIXTAQ7h5o6k;bEhp zq2b^FCX4+pr%Q_q3qKdXAqD4wa};Ezzp@$Cr1o#`?(D2|`o7LpY6J9(Id$eTi6>4U z_lxU&df)p`1ps{^Q$V2Ib2U66ZHp6(OL|T8e?zlUj1G;iBO!p4XsbqsJ$2|jgC&YP zitr*X%2AoF^Fg4JF4Kp&@dog6btjL8v+YylaAs4d-BEN+InMEyDA!V%Iilu|W)vV0 zqCUav{@caXY_DWp_d~%BYh6Ox*pv*lS8+v~DnFbZARM_1&S9;QdSxMXVXrA?R6BOk zR2Q{_telYYDsdU@H=eJdFeuCy^n!J3E_NXM=#8^MJp=Mjnu(vYT1b5rZKXpBgWn?- zV$I@_`0oNutz5c+L3Ar7Q}hC6{_22}iLqHdEOjAYsb!CVj;lVNwZ+zFp4YnI(U#JG zkmc8bFo9oW1W+yS@?tE9+I(HJn=jJ^mr|pNSQ@hrBOC?_5ckO&Qw}^q0tlbP&r`~& zJty%xV-gh1B`USu8;%Ge7GJU)o49_y7PE#p;FyeVdnyW6M|>}Lw;@v9h5Qvq!1Gj;No+BrSi27-S$LBdc+PE-J ztBc?FKUe@>jr71DJRWi7B=+lfajI%(*tMqaQ|c+RD@Wx(m*w{%E_GjC*Q)pabYFy)!BWglt^9z3-5Fkn_WLor-OA(< zA@4(^TFB-$oI&(=)s>h6!INey@2+WGn@_FTXl}kX%AqIndytI%O?I%lKF=_3I^-QF z{PL+0rJ;rJ^3U$e(^x|M&Q?b%C_lLId@T2hVe$)hq2kDlyx~R=% zR4LVa8pb1O_HyJx=8KWiUw`O|a4}Z|F!70#aD2;UZ);2W3fs1PW^P)cF&s4?Z$#}c z=ILY#)_)tdFuz}u%oLvm#^7o;feO}@9 z;?bU0GNxeWa>~(8<6C*|EgHz5&ujdq)v}cyyKKGiH3N=zh{ABi(Ji%(d2gY&pXn$Z z%(vUm1Wn`0DWS7vv7S4P3o{^WeX?aa&f#o5AEH&R2I?|l9;5<3rH2wRQ4RH5B3bWa z{C_^7e>TU*!2k9h{%(~ZKNjOYolYO`?jXcR6qt5SD%n&!$m0$0=z-}W3@uHJ7Psfg zLDW8LfptsF!|7Jm?lfd%;GZoqnmkcN*jY?Y%y2XRskb0g+Y>v#FFpuSK`mLG4ndSW zzv)FtCH;N>M9?wOMt`N$cvVATv&&sm)u_8l7su9w-%Wg}6DZxkCKoZ~1m|Rwc3@Pk zmH}Gu^3xW-w0f%D!Auc$rP?5{jgs8EzT6ELGcMtws!%C_Fp*ZbQ?Bs@2a5R4?nyhw zNA^3jbU(0x$Tw08{bJzMZULr;Ov+U z<$%BhFmXm-nbYGw`F7cskk>`0)+9_1I_GWt*el=lP@(4f$5h-I-!)rZc%79-qvORVGVhpoA|My(Bq5Each*1P(} z%d3bYqDKd<-tO|cw&M;B@9PBZPDR9dz~2L+O&sj9}6qYEN~WyZOt*2j=CXby=w zkX>1JKj9D}C%#FNeZ3H@gpAye%{7;AS!O3&)0J1jGS@IqPsqfhZ~Mcj;X4|O=ql@0 zf^s?wJ|b3(t?XS_<4F-Np1fld>(aPEf>SmyX0CYEA_CRd3L7{==s_@d<; zuMQ`qxLi-g9nap!FN12d_wGDi4@1Ly7QY93;YswKLfRuDSTq$cx<5b2SK6drb6eyfnxDs<=$CScW~SpiIFdkTc%sB z6TfCnalB4+YntQzs6mY7r=d}r;9G94$^Be)y4dhBC01{cF|WPtBPey*ZgwyCt&1SU zARp?W^-<~d$Ejlx9Ky!OZjW@|ekL8{# z*EOAeZRUc+Z7f*Rt?Qihecn4(#xN>_y|bbn;nRGpCSDP=8K_Kb%bZ`w)+*Fb7fp3` z8We3nLM1jIAzphnyP6?^FurB=Cv}e0 z3{r8wJ+0g{E@I?=5&AYo>GkmP){5H4|Cyy+6Lj;N;v59rtzJfR}qk_~Nd zPtD5pBMYJTc?-O)D{ppt7SyE|w|Y+!A@z&e{PtjaUhOIR_ow<8UePpXPHkORxs(IW zjwVInnaPq@$oFc}U%LBZBgbS9wxc`khw=N=)PIVGBE6qbSl@vBv_f-5`Ec`ESqAa8 zcC8U(O$qLOMu#45UaeczWMi>7n|xb6jvt^$eEt$CKx`u%HkpAfE~iH33U?@on8^(p z5$fvtuX?&Ix0m1@@Y!!uLjgr)hCQYQbj~)6D3!<`ZM$nT9b-J+kCsU#wa;(YC7vHn z*87gh*WxwPNdC=FHvVw&ZF;D(`vftsfqoeJdU|Q;=T;CfKZ$ zig@-Xs^1n>J$8P*VHF(OAiW|$Ke)|@e}~w_>2}o-n-vRFJl~<;_}PgKbbjwVn_NeV zg6vaEB1GhAAbej|$m@8qKGTHoUkJz)KmbzgfOK0R&?ZTTeGYKWd2h)A#0|YC<}*4# zGzc`mT1^Jg`TK;%1=`%TMw%_OIphu&tS{Sy71dD_x5hUR8i@YCsVIkP(HQ2G=} z1MKe_x#xqV;*xd~*)zpf-o4{%U-~{pj5+an??u4clDNp^SU~k?W^wf=AA5te+}*kL z_-60==5d=&`X}!vL34Nr*LCaqcUJq}3dVc^=Q6pqde&X3548)l-yD49d6<_Qp{6G2 zzaAN3BhOHX|B$MJ^8?auTOdKLEKvRsKFor@kqmh_dAXd}uc7xXXT)O48AlEK85(S~LgoUD83oesg>e(`#>_8v~A@dk@s5Ne#uLH4u};S$ocd_O0F zd(&c_kZzV63*(ch{7*pnC>J?boXdlVk$HdTVoOG+mvHVJ~^j7t1X zKjY^%3D|dRo#JO?rP0>c>TjQNfr0lEH zroi@vpf(c2`6aasOg8m6sZY%MzVeZv2JHBTO}rWB4p2jn&w;p8bq%)%-hh9A*Rf`{Zv*e!{~-d%;saCq)zx9`Af?ssD>^=C_KdcS2g3aSk`t zSCdUiAfc&`9<~|0eJsvT2jqjZFrbpCObPKZgk=3^@0D3Ti)oqp)Ma=k9Gt1?t^iT# zU}!yQt?RxksRwzR5o5bHa}=Wcu+~7Vgp!Za{EmkcdjwqxUc&Kx7g?M}_XgP7q86Js zsR^ijo#}(k%=SHLGiC#f#`kbuJjKQO5>+g!TA5i64|mfwgU-!OBZGCm+4=t$dQF?p z*g<*CA)DjtwSOiS4=;zcbSV%?!X)4L*>;Nt@+~hd2b?)7@))sSj6D4Qw&j0_kquSMy#X73lJ1<6~(VF^}-OKhAf@epi z(Z)fbZiRe$W+d}|M|b&oI7HHVVW|`a6GSr0d>V6L0TN2m(c#}I=R$@8b&tpV`Eobw zlGJHF@uOW6IbVsTU39MsXMMRRo{pyF@(yY+$h1eg2_?f#{93%T{oo=t4))!Pvjpeb zoN|)Ccf*rj-ykde%g}qi#UYBQX`jB0pIy$OvVDaIqWHwBw0LPqN{Xs_E=GSY-qtur znRes~f)Q(T<3U0zygaWFo;RD4kkE?7<(?1ky7s+%{{QQ~lsM9H{<6>f$zx6vw5SrkN z@9h{XV_E*&?eSG<_`_M~JTfvP;m_|+2wT7NyVU#%_;|s7TZLnV>4|6*28;CvnkI)P zfP`$S^}6;g!*KGGk4C!qdouC-tJe!dKqzHzC5^SN^-ka}@-+%_93wbZv5i)f=U&Zy z{2}_Mig_}2D8ZBW-C8Ivh5CDcArdcWX^gbB&_J6t?A$opb!l!bH}6}GnMIt(tEl$~ zFaVSID*;W^UWh}l#YL~uDQe=B0dup?2eBHghHK;*8VkcPzmn`@krDcVp^djR;hICk zLnd#7*jL5sv{FkRS2`Go&iLG_EF2~@*PBG)7G7t#H*6G=VRkEduEE7=c=vo8mWnLO z`LnS$`M&deBRnUNc(Jfc;%^<+_Le*Mv+r$_`;+`vJtk8-cxcyHj|+W@NY@QdPcHbw z^BJF3WRhx~8kVlVE|#Uy@TI-=D@x@3=A1Sik#=zEOIak6n-mP$NsCBA`L-BK3itJA z#>Y(7|HIWeM@Rl_YrkXLn#{zujfrjBwmq376Wg{iu{oJI9ox3ew}0oJbM9U5TD?~P z(fw`is$I3~+0Vx!K{}`QbH5B;j<*AEb}y(v{Ft`gBv&l#m180Tkqa#gAzEFuotsE? zD9HqDzCbEPzm{BGK|%NJJy3THZeVEAgW=bj_dcG4`Y;{s2#{_E4lk*$lZ#bC@!?~@ z2Yu9X=>z6=0t(34tn->@v}wESK9+*R*W+RjYRQS-t$QYzhrCVwB9|y4ZRLi?QBFF0 zSq@S*6Xy6=u(IS9a)Cm|e1v?J^g5Rxj7^Vr8`_dWUB~sX>nFkyi&hvqk;ls<)jF6;bP9nvFp+q9Ads3-!MB%K0l2?##Wjp)z&v zEK!TsB8AWQXZ;my9iutBr8K-%ByNU8K4t5gG6IDsD9+eRmx;XxM=dt1`_ZIB1cpE0 zic;-+%{OiEi8kCOKufVpQ5XkvDWUM$!b*;HWv{yZR5JQd=N79<%I;F?B5JHi@}U$I zL&_II!DLCi2kkmd5RF*pv*rW3G>YzNMUEKN<)b!u_1U@EFH5gv66bQYT@a;S>iYKX zS%v)uBDf}(QRT>RpULRNnEa96hQ_W^K8GowS$N|;kif!|#EqV`lH+`Z))VH+)V)m# zI7uJA&&1wAPi*#XpXhuIvf1q3VZx%rk##qO$#L;XMiC;Jj64{nVfu;psNN>1VtO2l z%h^1(={?hHz448bmWTPt*U>sez5`7!%YV^aPS97d-FoUIOVLg75mcq1mt$Apzp8P> z<#0M#OS{s!$R{dYMa+0&@)VUf8*`6Qf#3b1utqb-;)2U+*i}yzSFr{A<{LMTLdz??IMJ>{$32fQ9Z1*`HeCb$g&vBKy3e zkJ_zMBAf2KHyppWw+Br$IK{a*;+HJnLRf!ZP}xMHhWmIT2CYzB+}1Ym(~hG3l+w;W zj@)5Ej{f6%ReQxeLwWkR#dP_NIh&1J)qiP!bCIZn_*Tz4*J`+&(+CIqrl*-S+DBRT zyas|dS>P`GrDifx=&3d@&2iTjd}`^$0$^*j#Q#`V>*xQ^4D%J$IuEZRG!(z!tj-s0 ziMXnFj_o&_O7c4?&7@S+Sh=Mngy5IK3rdbw2*B;bK2po|B=pyvMvDNxE13~H?%}1A z=V02Q9j%1s^+Vi~`eQmc^3Q&Yu_<(YBeQ)U0kN6@q$VQst#=jz059|2AK}{x+MyWY z_EXJk!^7!{@C%79{kbvnaNS#ZB5v@WF^Lwn2qWS%FwJBU{_p|7q5oRiddFjS3biM( z1ZLX@ur1&*aR_1t*ZAHYLfrBfvEI`<`wtjN{)ABmdP3VJSa8dX>S-4(hut4xJqlg# zCBMKOKfO!F#7es{GNEmU%W57TPPBkBW`LbpHu|&BwAE8RuM|ewozSN7J)ZY`^NT12)?$9|>6!3Dm+SCctMbZdA<{VpcW~yq5FSsFFJT zDRGdeSa@uiH`1Yx#h(49A{cFa$zAFGlnidxJ)NZj^TJV7+0#c&DoW^on52V%cxs_q3IDq1LnFOSy6R-IOZ-011LJf>)=XuMn$OXe>&;2-+Qb?VY6WE;N42k(|<-(qz83P*G|N0mZ(SdaFCJ8kqehC2d98?pV&KfZv}obn@HaPXfQmZtQ8=g+ zT_bUC?Ba^qB5iK!R?lN!)P}K}bXxzW>4Zky8LybB$$?Bbb zrE&`Dnck$b;mF5K?%zY^sHlk$!1oji3h4sKPn`O{xczsBL2c19I3(E|>UMZqkGT}8 zb!O+I*E{+{O85s8ZZ=8d&r;t8>oux=RDlZ=S1bqWjp0&F!agE--!YNA&T`vtUN$an zpw!d6OFsIf(ap1*E@1;gZVIgLSrf<1XMwvreRB)*3FHmcTL?6k>WFyJ*?gc!wKUg3tfyY*6A)@)kjWAz<%As5L=rawbdNKLCqU6xXAd zEo<>dA)y`2rGZ=Y!ziOTx0SPguZuxJNeRJKNdo=9KSNU{4?oZB9cClqU(dRGY|-Kw zkv*>p&~i;bcvQs2PZ}2MA*f&rzEDPHvN_l>fPcL(G%g^E@Is|gp;STr{w$85DPGO1 z9#fE~`h$XgC2i5WY;VlLR!U{mj;<*V?fuSbKhJr`fG^ghtkK3wb$3`0lqqY`0lmA| zmU(rZvUF&eQdGZ2pZ4QYHL7#QYDxPOD=CsLGiAdgoFwdgt%*=M?aZqbET9BZ<+vjb z1pVR^-@lRV<2SM&cG^78>qrF~zZ4S9jJ9)Eo=wcIswup6{_;*%GHJUIOTLQ!C)@pR zT>m?~KN(0RV*V$jK~`gJp#1i-s3CRx2%o#u{a7`Dop;noAc68HS!2Ti73z{3^Qg7! z@hZ7e2yWNcHy0Be-F|FRMT6eyq8yknQ|LhGwCYXY4tUCOYqTDrT=;E?k` zp?~uY{`Xr#f^WkJk4|&UXzY~sFr@f4B10)z2{QopEDlb!-7V+k@$X40-)uTQ42r?A zFrk>q0yPl>(Ideq!k_?bGHv&3`7y!T1b9vrraHHT}WlqS`>KA&f^^o#KNPrTRSW) zSWj*dp_-d_TmSo0pzw^UPNJBNtk%OTuTH#FTs{e~3>_(wcJzsTW(%IV*Yx*EHEWTj z{-5h-w9nzLgte@Mip8k4MsPU949kXVX~Xz}zbqoRS z%DgJ6LNqK7tf?P}E3w#vH_9s%p3bAKjevc2uz6%l(`7gUcA6WP<@cjmSae~H%!Oi( z&el5%3~hDzW>b?E($&J2HU4E}z!6pxO9k(pQB(>FOx0qg{(~3NhTdF}LepFLHG&Hx zZt3@B9qZ?5W=1?}Vm#*6(k89JR9-Uy@`RFIfjB86Z;$p@x}AM9DZ|1FX^rR`!3@P* zU2=dB2?0?&B*|}0#JWNBK()e7Y}0*>F<^T7^Rsf}v?}z?sXk1R_6ZFg4{f9dQ89UX z=Sfb{@@Pdy~u5B>}yrhTMkmbwR#c&`gyn#Rh?1fnOBipWdp8cnw(%;i$lUx2TyQ zBpsqy%LXbFd)W->75Qlk!AONlwhker)h->Nn!y1HLD+ymgg}fs7)Nl|AeOJFxNEp6 zaQ_X`e^ynnKWn8yh3NacyY6P3pO=LE{9T~3w71uKjfGzcmw@3pX3*2l!^a7m*__R4 z;g=@+@rI-%Kt{prm#XYK);e53?L-l|iVyzP?iWJsfET#8&;Muvj%rIo61OAP@Na8@XufL;&^Xiy|x^5yXbE5wUj zPRF{O*4!+Gqmf=jj|r@I>+l$%q@u+8Ip>u>atH%qxgEJDy57H~AgefB34_|QYe7%- zqQ|P23Nu&m`gAvi=ryhJVyR+1PC#9hK3bzayF-V2RMmZH`Cg%6LroYy;Uii_fyd=d zdaI*C_?zH4K?>Rp+mPKx+epgc;lBTs7)_6ewy+N4`h{u(b)xm$MRB5U6%n+iw(F5G zf3r-kQJ0Qs?Biv*IY(_b&)g$y#PTQh!=>n z1wjYq4{Z)`?-dZjg9V2NBQT^80%HXO6b3a3K{;X=f{B8`mrc4jAnF2K1BZlorVx)1 zbivyFn_u00)Hv+{ICap30EMqm0L3f&Y_VRwEx;58E~GAmI9r0*|11Lj`;TnX8kv~f zHtBtRLj^e#&KUrZ0)-FZ&3F#rziCLNzsei6N%rqEDaPp~r-dJs3ND>LypCLTM!axd z8-$5?rN@qugs*mQ%5SwCU!X;o2bJ{AogQ=j(bbVt6OKtp!Zs)4L+N~_>mXzzr!gyS9bYeZ*IFYxzjYSbXN_^hYyd27Ehhw zdV_^dA%(N?$7<2k>!avQK;HI$^8`KuN2ZL+o=)Rpjei()qqdGsDtT9w^t(`s1vCU8 zQO-Tt!M8|PG;_n-^UUdL5mw7z<#&NG<0P-!pLT4)>WiwUy%Lz()Z0#{huBysGB%-P zYPl#B&~16&Nyn#w{% zHraf-6knmTVcCi8aB3(irJ2OH+LcFQaVZ%cxVZQB6l=V4R;9F5T5vLs_YUgPjkm%5 zrOHu>8oZRO#~8C~gAlJS_}hLZvC5IxN%`~LrH7>k*bEVM?;uR;;TXi&7fJ!MVjRU3 zaeB}e^xZ$Qps^ENXjL&xvFfic(q;(m5g?pd5HX?TIFodY$+V%BIBUU5U} z-r7-}Gx##FFM<3_;J64$;Dr^)x&X%@qFz`-Do4z?S~GXbJd`ZRGk`@eb}Ni47)8MU zj3k?&WFdUO7E~R&CY)_R%cP7UQymg6Vkh(q_&LD{%fBYn|Bj@ft7OZ~ZULM&*q^{+ z;dpV@Hd`n&Y;@4v&!76Ld1JFWvC$1gz^FW(9_K#%GF#fuyYDiV&5pie0P^ye-1nBg zg+hG`Xun)@y6R2mRbZjLJWDDP=sxpzoPF?vaj+X zPPrk|!v4?qoiZF{Dcm(S82vCdDNMYD3M^yVSF@F_LCwR1hj(Vzw!lXeqt_#sgfb(0 zci9yUzSob?D$ZXYpSMm3pGm5}uRzmVQsU%*d2;A_Q=Rg@jXw0@YR zkUKj|JDgTt@9W~RTztzb)Vjf^$ESL$+WkX~HMR`1XeY2`yAk)+Rc|r@eZNrcx>TnD zJcwFbG^zyzjzC#sZi#TON+H|XxHV-JBsj})rjV2E%`K^pJfQrlW;zjU>ad;|me$g4 zZyzm#Sxf!c7P~nZFmH~pJZ27|&<3QrY7dV=YS(I9J|vIw<&0c?P`DbYUf}@g-*=>{ zR4ZkOVYHawlg>I?Di@D^DGXdkho{b`M}8pz*mMR~go)qoIBU58nvCe^kY^lSyf$^F zIm0cdo9g;m4-{4PZqF3#iQzl@76)Ke(EOYv43(&&V*173+RY*khQ9Ch=T$ek?Z;^} zo=uIYjz&g&z9pT27UB2(<1-ar^FL2EwR1;&ZrLz$wPBX<;KI{YCxjG77rNvshT_o8 zd;Ha)XID}t?$e?%u74{e4#HPgL#xIJhqQ&X$Z%fi7#C7kD(LgK2Nbgh>)yPC zbrR&TAo<~ag~|Z~{0t-kGJFHyqwoUPhz=k&rIiDbT?>=eZgu)u4aL^XIk5>9=x6u04`Etp5iM zO^+mr#op9}dKcH5uvv+6=x25$%DDNY-kJ5woz{2e>Y4Ar@uEVt?R^0Y;z(+B{kaJn zKi%xGjr)g0zc4GH&tE;5ahNV5uPTgE5oo^CXEzh8biA`^ zjQw>=OFw!}8QW%~`pWlJz18^W6M1@(*j!oM9D;B{vyGizSsQt^CKg)&^i_D$VeDLn zg!Q@X{@iYQwr@xip~~?@g3?l zDx8G2CqAFYtIjbg`+*->2o356R8R>uxv;pcR!1v|SvK9jl?&|${x+hAWZ(PVJ-3+@ z(~%}3o8!y=N=8Zo6I-9m?q~+k(@A*|ChFyJ#}*^L$o@9+=b;eH zW#ln4O*&oy3XI<)P^r{4Un~kIxZi9g%P2=)Gpbg`7<*%(dS%Kn}$ z+yzQ#<@t46H2bV&+{hqGHW{>Wo*vXp^4yp0#LdlY?zvDa5WxN}LOQfbS?)=mY@N;> z#f%xE*NxK;rTECxi(0ZNfU)|%xRDGoJ+hK77g8quEH8YwBdb{8w>b?%QK2itFb^t} zmr+#gE3B{I!-aJbKPjXyd!&ykgc>pPsa5_y$Fhc!Uyroj?FGjPR8{=;ZmS|lbNRRH ztUe#J3_D4tvZ4&!_iMYl*|J5PlbV%!HHm;Ctm^3%J3wsfc@tmb-S+YH;pBFt>Rvk~ zpv#K%|5m{+%K-f*qg=c^vgdZ)9cz$&c< zSvk39-;Zz%%E>J5+Z-s6x9t1tBP|V$`^jAS%lEmtIl$HP8^~FsP$Kr_`FUR8=b*7( zrzbNvx7YDZ@%c)VPLs772*+@9b2FJM81P`*_2pq=a#CJWl8T-ls(0Jbd@>7UYM-8) z3pD;+qu+&(fzjamd=t<>00-HR5>iuhvFILjB|4QSxw|(sG)PTj-NdE{C6G}km4=0u zGa8_G74qoz{y;tNrpr@exBZ?t9bMt?k3vJuoP1JUN^3?&F8=3{$Z8=eJ&DHnq>-aY z#n&0k^0YFN1-xNnLQyoh+{Em1DnFzfA1A@NZh6O;0#K9F?X4s(?WBjS5{HQm(i|z7 zg6rDZA#twHjnA%cFQ38bynTog9AIbEcGq2&7Gxh( z`hAG%FG9(cFP+2c%xl317Pwr}pKm|xoSI%GC~Lp3{m|Qg1m?6bo~v+9_jtce#j)xi zc$7Uh$PHjaOA$PaMwh4}{vHX;&Y{im z<&RNcSeL$UKbK0{^Bm`VzD%hAvh)oinjk%cVc})+shV8mGf?AySHU;} z3ZqF7y|CBI0v(Qfx~B~C5bYzgKhDnPehFQ!h8Xy;0fg=>A1CYkTqx)lOs>#_kuNrV zSfUF6Y*PHq=8*>#$0lLp=VDkxJ>4xM>?U*NgA-;F%!+R-K+3k<=&swq_#0O#|NhKK z8RTr(nh%j;{3f5{nJm0S$~(gS8HF5%5h;aVS9hLQ?K%3)z9|fJPU+ihjh99Gqg!UB zeBHZfB$05*RgsyxN=G*;mr|84L8(L@hfgK+8p`;>aavV8hA$6T>hs_?L8psIkb!$9 zqBvQx#g-cldaNGmRhe@hA8!JkWIgZpPAVMHZ#xioVx>`Mzy4qxs@M_2d{zxoPsDJ$ zysUk^os_6NWN~S7xX!_5Zu7UQI?0dcmhwN6saabO?uUEmXCQH0@C}NHeijcQJFCD(% zRzPER7EW}6nF$QiY*2?B3^?ZJv(oDr;~z{~(zi5(zYND_@3U9-sF;7g>pq{aydMG8 z1b0;`zjt}xPDis2E)II?^JbkoVcfHW%yeCrt~K=CH)%sv>Mg~d|C?W&&H$l>V&B;t>)&)stTOL35M4fMfvZ{UGYLHtBrk;A z3vIoB_gBpjvJUDAiwdkhXyur128<&Q0zd@;A7m0>!w280dTKy@g;{6y3sf&~Lr;UQ zg5pq5{tJK>I#)Pk1(l-M>(2GYZ~&Bs7*{nzfuy}_zy{g>tW8j14dBGY2F)ij z-WyCJLA?zs=a=9|0pB3*(wI%K>9y~Urb*e^oAkT9p&%j0GdXM9RsV5e7%Hu*I=kK- zT3A?UZ*K?5-aOx)oZLc;214!DTH^<{VIhC4fLkJmxjf&Pxw+j3LBN0?ZFKSG0&4}V z*IJ!U7pe)^EETP-trZnfQJt zar)G=-qQr9)$P0)EZ(Un8*H`RStqd)E(lKRvM#AvVv_oRp|^6G{09@Cn~tDx&tsTY zckr)SLHnlwsEDB*qst`XYO**nx8nZYTr?GBPvG_RArUhO*V){};(`8-&yf*_XsEh* zlFw_8sPAr*K+Qd^S%}g!?9fJP$f?q<9!Q8h{M9-A6ZRf)3-~j%K>YR~V>BYd`9@Z9 za6cqOYvj0%ya^+C7E$cJ34`~7W^=V3^Ak8Rzrxc+f&U7wLaSX1h0GVfdI zXTxLo2jk`|wgunx?sQo_5v%#T(kOZaO?PuRhc^ z**&^RH}}rp;ofbLzCR~k|9=LV;FpOEcAYNorjQV*umLy(1f33dJ9qbH5TG42^gyc- zd^|i5CgTi*oB+{qnAq8oKu>A5#fu$!-3ppFuhngHetkHvuV(>(pg8lhJ`x>qxw(QS ze-X86_Txz<>^C~wIHvsI@&DtoJzaY=pS@%4!6$-Y=DMe`Gt;`&$^aQ@o?uWd&zkz|t{e zE>3KVb!8TrLqXOZmlQl8eHqFY#3!a{YA(^zFi;U--IIzC0#~GBteu&Kb8&vMhMq_| zu6iji|H;kGms~>6UX0gfIzloh#EPjpkCon;0InY+*?&-BZPiq#Y#i1#N>^QxSG8Eu zl}`d0vb?;HUWbK=g|&qF-QcNaVm0iLk98SfS=M|aSi;IVdZ{j%f(a;#N=pDXek><7 z$hJpkPyaprXdw;!VAvjn`UxJgCfviZV&1eVh&hV!$b+~FbB!aZJl1%$B#ZC%aJ8V5 zUNySV4YtP#;h>T#G^Q-8JNCB;)#4KH^tN z;?=|eSViLt=)AK!Q@TMUfk5IN@#2Ub?3Je;K%F36wXvf~Uvz7(H_EWWF*rICvnLR)Z?yWTUGC)P@NP=MyeefZQx1bfH#*ImmNLT3TA8 zR^P97eHgUIF{h`e2Lr9oRdGPJck)?W%oG#{59h0wM8#!gTmc`x?Ck71I;%%VM}Pig z7}S9-H8(e}&ss44{RG1M%$6z$dR`fVSWyH0{TCqJ$D>P2Lqi8B*b^xg2nh+*YxTQa zk7rIuzQm@crCC~8d4bIKva$$4d?PA4y785j7n@jIQ0;6EWqTg(1;(ellUL(2cl|Ww z`{l`Q(r7!-|Xi3jUWo|)ICM_D^(m=C?9vqFf~wcPA| z?u^$az2{7u^Hd@iq5@>?mAT`^&2qohmi0)+ye4^%%2db^WGf18*3WQpDG zT-$u4Lh<*rhvGS)H2q3*cy!bljOf}z=RuHxNNHdtf$tsQM~-=nsIX+i!G5W(Z)4-*YJ1@0;{()z(gbQlzPhpmb+WF*j5@lz z>(`sY1+Id?fSsP#!PSpv%P!+-%>MrV^T)YnAYc(bv9uC3h7d%YBpK*>bg7AVO;S=) z=-~Jy)#uKK5Ck01x%x6FE0n8xf4q4G2{BntePv~3{RdbT6A=L(mz1=3ax!Yj1hO4G zJ3F)H#Kgcz%*wL0vO3<~#lXR#^kHRYp5EBV88i+bG$j@X54z(2Cf5@{K4KiT&Rbw= z?lNT1lG1De<~mu7)bTSicYks|QPT88m*?^?5aOU!BI(54!2e_8({|Y{d93<2ScgHgw@c5U%QXYuk(zM^}p3T z{Px1a`XQYu?R;`@DwhuH1e}foSv6zLc3bqCZrkSp7JCGd%6db|9RW5N%?D%UxnRH2 zd59!F*+im)Z#wr<^$*{a$0{Spm=#2PWE)d`{2AJAh|I0l?p<15@0y5sZ30PDl(m!4 zymw~`Qy)K+FE<_5*7R=zHFfpX({jkbg{H;wQyC+@AJ3Tm7ht)QQn8C6H(FJDKhr4Z z@9l=Ys_m8dd^+ss z!_?QMvbkjO`FT6YVDNjjGo9YcHdARE0)6fuE+P=P1KMB0$|f$ zY|}tTTN(9!d!Pw0OnDyWs3`en(Z%i>95j?ro@rtUpkmW=-;U)Lv(MJp+*3>8c(=Kg zlDaI9A-Z3}au~3G@7{B@lxx7kZ#FS!a6dCIDgBOP_7f_5{lK5G1WcjaadnA7o43n= zfFGLicsuC4gMa2va+lF-;#eREZ-LY}w|`s^RreLhU+N&CUJDSx(i!rS`Ri^|So%Hn z%Gssho&T{mHT#CLe68AND$|bjgC*^hr7SU=MDd2DLrT}u~e)5rRTLAg>Yk5RVj_=Fu)DypdWs(oj&SDm_;FfHa)#2DWif%{41zk zLTi+zKBL{*+aR+?o93>#mVbS{Y!BJJ4$Uxhm}kM=Cmnt6 z)ZJ~@Z6t|^Zk|sApHsC%!$W>JK_&L`VI!7sTB7_YgjtFRk zYzP41*1OWxIyy|Rcl~*lRVzhL^l=3@F4_@bC~$=rE`AJrAY9qM4J#g<)mu-JWgbXJ ze{zd_eUqGuyR7r>>>3$jp!t>X!FPPW70EYLaKL6RBG?p5Jr1eh&_PVT8a0>N-^{>U z;-U63EW})YNx1TCvC?u!Tf))*^AL~YU1FT5W;{Pqx!;%5!3BlJyEcZ4k%j8gh8I51 zU*%vX)-`;aw&vR=$4-(4_d;+xGC>|0if~`$mCLCEWGTD0;9e;|lJx6J=~I`A16ZTr z73&tfE7+2dfFpXEzM3~S6PP)eXb}1qz9E>Z0}&x~6ch(oija>G>>ilDr(nY)%NCU( zlq(G6J%-TYITrB}A{~@BV6Yde7rf4Mv7g+Ki3u?coGEb6!)&xPu&>uK5Ah0GoGs`7 zh!LKy>+0*ZTOIR%|Ni~s$B)iVzDTv(Og46QEI=+0CN3pK{XJvEbipqV(ZFCcDe0e_ z0+M#6XJmkgddf6@hxqx$#l@MKI@~InBd^xh=|b`B3@lxT@q4KVgX+& zT4iNra7Arxcp){l`Po@kLBW9S{tytMeYMF(Lrg3Lw5+T%8UVnCh@=B}czFo|RcR(h zN9*hAwj8^${*Vj^a_jI-+lJ#Hn1Gc`h zf3i{~Q9$eRqhEm`AR0Z$T*@$Z*(^gA@I8pnR}4G~Hr?;u!$IP)4H~;FAx4na+ApbQ zhp^~}Ljo*I3KGB}(?!&FL)}`<&>f9x$<~ClIPA=d*MD{6?xo^L>si5`gse2xW=&3y-i-x}u@MKR_wpsXn z$W+>#6I%gQc;=3GSUF z{vv6)>FTk0st7spM-xOZ3$W6jO+Fhw$Hb6A*XXC;`1XZp0?3_-e4-l{y|*uk(LbBN zZ}gfU#qCo@;TCFa%7T3@ZamXPp{r*=>~gxY4wY0|{Zo}`=i{et=nhdt!ZvF;*X|az z7irN#yyK$deqNXl61kP{G>^rc<;?O70Z-TVWy@0r1Ep@=ZHy;PYg>|ux6nmPy}D{T z95OmJ)+#RFnizPwp=Pn6uLak0e;h#kePEGCy6H|%N(Ztu;&QqFbvK#pclr{(qYzX7 zlcoOserM&&Od78RknAd*9tP1vpi|9h<%eHiu~1vW08qz0Ryq2N{LbJ)yER^!zl55&z5eA_r{eBD6y#MJ?_>7oo+w9(~u+YJ_ZUh<=v9LJ!T z^I@djWNZ1hm3rgNd0L{;&gL^eb#Wy&Mp|K-$Xtd23OCai{`-fn_1NpH*uiZ2#TeRyF3NvhYyL?dqSWsPDA{Ze&QslNR(^|d zDDJ}+H4tm@7q2*4DWMy!CyA;Xe>xYZ%m|QLWw6g>P(`APCZ%EX0_qIFslZ`!o&+Ie|aJYOjPEKrIFG>6U!iU|CYo*{2Z z2CAj$j^X);(|Qy0c(TeUH>^Z19~5#Bx-m1xw7R{SLz^wxoPlE4^*GDLwx$7STI&a6sghbO( z5T8>kj!%xX1!6Gd05mKIIlJqf!SL{K(B{8d zPn8#V?X$Aqj*=7)azIZ=05`na>H~3~Pfkv}yu9X*6G26;(~FCoq$D<%gK^Lkpr6Ng zuZ>MiKvlK!ni~Cgb1f}AfS8yVASES5E`!a<#RUNYArgdu2C*mdgoAbU^eU4VK&I+| zyZd`ErgT{dh+xpGL_|caR8O%*do(C6E@*<9Z~8S#`yGT&i!&B;svAtD^zBY&julf0 z!X;kDrnMwsv^O%*JW`ORt`AFB3ES~hhVA&xSK9^>WtX=EH`x7-r-z)li*M|PE_r$a zsyY{cC}jq3NW!EMs^GT@zQr2Bv)m>eO>l@dxOZnfTi@IjI_!jy>gXRIo%s0$9ZRT@ z%~VxhX5f$ZG`Jry1_&rj;#9b0xLv)a1hvm6AB%h@zaNq@-aNZFBb=OPBQ>NF`DIPa zU0&3GE?N6bXnWiqpFKw1O)OiUr!~+%7&;(ldzlK2?Mn|S*T`CmB7ZfV_oVNlXJr0|-*7ltLEHKakcIxy<54_3tH!atF#VWNJta~==Q95`x zMDT)*X zZtUH&6lzx3wVXXIV>MUI98IiS;K&|fJ8g~M5|;elbr?bc9_LYcKe=?+RS0r+^_7Zj_Sb~>5d%?!o+JxUVOKE8ZH1WX)0_Au~2m;rLc zyaNCf?s}Q<1y2SHaA+!o0S@P_J^(-{7yU^;qf)>sAVCk`-|e}T5$9x+>uknxbIhRB ziXxFZMX1_u6Re&3(H>{99fURZB`T`;i(P$ZDpS}u=uR)$v0`IcKUYALciJGAaj;7$_p;u`wgxYd+|Dwo z!|JHyl|?M$l609{Sv$r>m*;o?caJGVFwRe8>$X06~deR zL}yMid6F82LC1hc4~N^8BZ#4Cg*n=%d7M|W=*dqJq z_;m>fKyK8SwRMvyR&z*`%8piy8Hmswo~dB({Ke+5iBZ%2YjC``4{$UyUU_S9QcwK+ zBW?F=iKmQ4#g?c{oY>h-KU?iS@y=K1Zx9CU3(DD+6V+W};dq0S)>a;FTfovuk1}A2 z>vE{W-F>fM+ZsSSwZO$+0*SgbRPoMcD z6&YR5pJ>L>xEO=|cE7az2 z%xp=5fwm$7(7}_*UgVqp$LN@)0v#AMPj!F3ca>i~ooQ*Xdm-p!0x}LAwQ2Ia=9+301{=gL*>gD8lF@ZPpmEMtW_cUJZR+A7Z zS-lED2Ky``9;)_}8(;|+%5`$_dp!q=8D#at+sWm&Hd`8a!td%lR+6Xk%>c;07ff`+ zeF{+UTRXthBS%qZSoKt+xD&*p?yUEUaCs!n1pkRS;p1jHU_cbZ)6xF(&3DyvIYFLW zAoKAqvTYGzVr?ovbn!ia{ zSZ+Xmk)VHr5dV9nirPfAL9dAwvq5APossEkxrQL)OfpDut3EGi-k z|IT7IipkC01}a!Xc!GS>Hmj{{Y(NPpc5J_*vU0QC?X+tKbZ!Hvsi~o+rUspP(o<7g zO&5rno0mv_e|Q5aD^TPF5z@4_}d8>PByD|Mh1o-y4q>CscFC+ z$o1P_4~o8AZWzCgWcrC`MdtUe(|jLIWIq?k zlQYzs>~Z94*kyb9{**H5X3%VU)a?*4UvSyaD&yUxHp<1^yjpva`0@@^Rd3>Z+4`yV zjg;SB!M-n{Bs-0NSN;`2Y0cp^x;}!Bql`lC+x(s%Debu(UXAeYgpGqil#7xk zrm9u~O%5a6!@&Uy=!eR?G`Z1@>QC4EBBuyaMo7DcQ+k-b!m$nO!de}*9JtEedcnmF z2S0}ef>34%QXb>gm zb;x>~$fz5F7``)%FOIVbVLgbr{I9?6yF%zI&Za{2Whe++XcWooD3vZcOve1cKp<2_r_ z)xB9u2QeFo*~!ub;ijILYECBR(_%JmpN6`i4N4aGy7aTdq1xY6nzwTh3+wXy)>G;W z_3MXWQn%bMMpLH!q`4+FtDv$IH0cauCIE6NANh@&JXB!k59bs!0zZDa=T6x^3-)I( zNvgscY>K&66;Ny0x}sF;MJAJn5w8x$Xl?UTRQ#pK(C4|&j|TK&{VpD=Wa!*>U0OZ1 z!mritwvh&}kO!;)MK(qnhMQV+_b<#DMBKUp-vR|}%qms074|W9;;NQ`krF!fs z=1#Lclr`x%5C90=ANg>%LppAXaV5!fGA*h2u5uJkyGdbUMeq^?mdA=zMSXcL4HYv9 zGqyE^GsX!|byIW2M{J6S<#CjGNl|vI6;NWBYF4Y{NW6@pTntYy*~)b`JtMPDbqv2EpF*X4 zlXOFy9DJdQGqsE!DTj!dTMP?Qo4h-6hFPD5-)8-iZhFwup#sZLOtbA+D;m{eWj1%q znblTdfpl$wU#_VF;W{XVSX`?VY*uG~$#(x}m%SY=nze@vWjwW|rmfy`J)6-_^!A&f zY?zVv?j1-fH%C%79}0sRyDOJf5)p|(h1TZ5gtu(7PswgI`-XC%A&(ux*$rf+BD7`v{g^8KiN|BRo>QS zDRjTt-}On_L61zsp$__rU= zP0YzrEhwM>t*XgBFL!-rG#UCE&I^01D>;s;yGceXHlrxcT2<@$%Y$@~Qd=Pxi! z>4xtrT*!TN|M#@RbQ7AO#DBV51-Ukx0p9aN4bl?e3kIwF;QtY=3w?b<^749y!=M<} zmk<-XettVC0Qn@gwzq>kfY%onA7AcIS=t=^%3ga@84Y4^nMHJTbiSfKJwMxlJc4PG z;UU2VazO_+86dG_NeOgsU!Smgk5nSXJ(|#%{C6r$3No^@&7S{9*INd))xYi93BigN zcPYi)-AbXw-HN+QaVL23;_k)WT}p8aQrz9$edoUakGy;4-81=;$z;~dnzb(b9oKn^ z6wD^&dkepfrSs^vIs>6|t6gZXvAJ1mv%pRo^+EhHplYh%Ojc4}i`(d>4nhZ|vPYPxfF_NMkF4w{0ja&3+#d<_l0F4&}dxj8*D zl9G9D+eQB3cvC7CgQ&Hl0Lr=Loe+o|0b^O>wpvT^R|Sd&oF~H7bb$TqLu;jbjU>b< zADRx_4E5U=sNABLK#HAvJe5BkfPT%qW;(Xe?oQoJ9d@7i4B21(O5SD8K)E`dTiKnF}nD|h}q&3&_3cxUEJJSbk+mXl>|P-?|=ZI>ep zm3tkKCZvPs$|dZ{VLS_yZv_MtEO*A(v~_U%c^j>x3FR1wH2_*i&w47}>fBZhm2`CR{mcbAG{lsu-SRjk zs{K2R&zQpd=kwWS@wwQnjNW?fBAISi6;(x*lrU<5(r31K^PN{Oz=(+T+Gg^!<+ZJ? zKdS;wnO>(W3#o47^YVx%YN2wuH_aPS+M`M= z+oL-yiHeY5Mg& zM8LLLAr7ar<$TJ;rIrp56$uFr98kVIY@&LYL?p+7V(!y;H(UB>>ZAgy7Q zNf7>PTbAumGgXPx0;k34@aQwajaau_OG^hG!oj;5tE?F8y}e{K{-XQlL*?kOE7*eE z!W~S2O(7aNP7Sf`n59R@LtE7ti@+kSk2oABR^_`|6}A3%P)(N!fJC7Al*euThOjO1 z@wE(D=gt`TM%L}B2yoz9J_L+nAWbCRrg$A94wDp#~EgUrRBZlQ}KByrR6*Nov zCd{GCl_(kRuU8`9LC&JhXUeE0$xUX%Az%$}#DLXBuX#Hb!jd)rr@@c}v~z$k7P*%c zsW--`lJPjZ>$t*PH~wXeK0V&DHh@&nvQ9Efr!g?8)8c*;az1E>u-poeB?2UExpp=Ppq1eha# ziJwv-;`jfv3v`6u%vs{Ekx{t}p6g$*&?nQ56M8SdY2HM9ifRFb2hi@{Zxqk2MX%!U zlhfVymF*}09)PTcPkjAuZDuEp+@rFlza(a*TpD0*^^MG(>^=iVM2I4PQwke|{SoPh zXq%Qb9-b4#AQ|CNkJ zfcXx4=0WJwe=&OZF;}L@ISRlbvZ3M9lg|I>_Wb)>E-@)7=?+#pm$mxy02F!_{_zfV z9a@MMAOwYmCcl#4;T3fn&_M?y|9lX_N+qO^lme@&>NA)=%@6U0_RtYP8^+3vxd2VN4SJ$EFODOD0c*Jg^@~Qr=m9lJ68%7ET2#@?5nV0UA zw)6}D)T;*hdIvvBUkkNs`}naiHmxQ;jrsVVWqPe%dO(z%Qk4VUyo#8;y2rwI}egT1<7G%Gxxf>WcpxeqaJd_&+J&LhWZBj zmbEHtTjP}Y*aFcKK-#zue)VQ@*QfYC6iFpzL2B%nq$wUQ=GYg9it?z31rJU@NLYzf z%{-XBrqO6`XM1{rrG!RANcVg@8LhEP1wfAVJ5aydQP<_ADpRCtf9*On#CXtyZ+ zg4Xrg6YJin0*S0O-#H63uO$Qqa0>81CVLk@i8P9eiqR*7>Qz5@`GIAr5&E`SONq&w zlYUC|W;ZW|mllCW9cx}^2<6@1?y_Si1T2mJgn;?)2S z$6xHy4iKX+oj;siZ%@Xw3%EE~PY;jv4RndebdDw8t~z%iN#<-Dg_Rp=cL{U)yv66S zBjB0#PLaXxRuXc$HWg_iX%&t{V>vMt(t@kj`cNtdhEi&Hsz0f zl7|7zJwZ-th%IzxGKl$C|B5bfU4eS4zB3F0!m4Ec}6`}AU#Y~W@@^5F= zR@%SD*bRoK`wYyhofX=RmS>xDoz`p2@>u4uf~B2v@?lg)NX5-OD3AGQg+rvuxnemh z!rebR__}uI*4x?Pf0}gAy)c>!xLO$_bpJ{X#o}Sjqt|wlJEq7+;3CRb|J#3u_+JIX zhDcAc&*uHNo8Oa=5eEZzTlDPzPba8P4;6KXK&jH_mlr%{9nB6dj?_{6u9w^6g*t^S z!IBdtOhnjEbfc1)f+jNdWRP&@gQdx0t}2zo5GOpoca_GSh9IMyW#}kubJc!8|F+rL zGCSfOrMcf4{9YoT^G%p|g4$5FEing;qxbq){@68+{j8U36x~6?mfK!5*L}-XlZ;o0 z%{BM0QN}UUrYHmtm3SOdUekI}mk^ z321r0wx2|#pwn6CRku1pMwm1yQkM2(>-Z$jVSQLjqV%r45#H8kaZJ#SkyUB;!ikca zs-|}R7gYiM`XE3tC9Q|Y-t0m6IdyydGS|A+uf@mw7f8X=Ppr&7adr?ghz%kJViTtFns4Z+}a z4!K5BLY&0GRd1NWC#SLo8BX_8UC+Pob+lf#Aipt@zyYjzKiB)Jw8Y1w$F(`QdLRsTf@7y4{yX#b6*#+0r#fFeB| z1|}+eWW{?K6_fdSzw5~(j6QzyvBNE~7t-kFoVWkuazq81)llrz#>2RDun!I2Wrb$@ zk@}LhT`xv2cw6Yq`Hs1YL>47Hwz$D-#KXy?^C zq9G^Aem1N34u-aFg^O%&uTN@Q_XDk`?}mN+d@qC1Q$ys70vROtGqt%mdF6I4P--Lw z;O0D!PITK`k>94RtAsTv6*AJhmpN+RhDHeyN8e2I9a7tj9*Y}q;cci_PtJs{y3?l^ z3ESbJ#n{=UgUi}k0FGLltXlE()kNovkFy0J%$&yKEDA%T;68bmVrFmvH@N<7o6u!9;-)8`J)5wpbsphdPl_{C zivN+|frZH(9yFfm=xr+y(yE;=C28NF>Ak%-I%cD&`SG0GD=C1D1E1braID| zkv6-uN9QKP>NJ zE%)2J8oV3L?QI71n2CocMv4ypWFlg*@15U2hocAC}{dYPjNDyc!Y>!YR3j@kG^l$&d{Xn7ggcxA9W4&gmw$U#$}p3JdNzYDA9TSt+BmZo-> zpQm*0Qe}S8_lvb2d+#N}=^7;p(_dZ-1TgsD7E4^;BZUvpF|eCF-M8R(z7?A#(fuG@ z>&h-TnzUDBP+MZR&5%vWAxDaGXH-8>Q{o`wDYJ6&;3Cb-#w1LXUhukhFu{qL%*)ok znj&@lxDk_OJ}KwrTEonzU9-QgUd3-GmyFgT^JV8Jkw=TQun9GuqVPxH4-eA(j1n?;adjmd8>pR~ zC&6Rk&$(r<94_b@wCQ}`^q45_)n-^@DL2#IKZWvczV${hF6o~Ttuo$UI&(vd#oK4? zyG}`wHfdg-2aVHym4cT&zvdt+E0HoWk?&f_AQt;V%T3N$g>>RrSRmDD(o0LyyeuAt zER#B@e#dJ4eSXm1^2$uKw9oU$4$r?3>|XC&d`WmuHLWeKN1;!OF#*;~=R{S|IqOV` z#pFanmZ=lOm03@Ysq6ShxIVz_YS$i}bU8#q=9$_KJ$q^9D zKXZ`#yWG9gnd=iePjw9rq=}p3RrUvqOI}EfB%B#HL6dWDabKnFEI~Ra345_$U|U~F z{@&ef`r4>b?%6C&Xh^yh272H)KabmBm7xdRm1lWl`)xczTRkwb^!AbEh}X5Wt}eFb zfoJRSnlnnJ1Z=onOGI;20DG0vYf=IghS6)@jht><&aDn9f@N8oA3?E<&k)Td!@hYf8NW^*{Yx(DIuN z_8NiO#B!!K5g`!u=KQerua z*$m!jPo3e2AI2d+Ktuj)ZX|KqEC{oKL!ihyYqz*piC}YI)Zv z?HZ2S&nP||3`Ef%DJis7mq-Sd>bwFy%TMcU$o57X=8(MZI>ia4{-Bezl}Ys)!Do8d zLCA!OaHAi#nqLyFAshv*u68TSmuS&hRfgLfSsw-2C70X&jPi-!oEC)9X&E^WPV0aJ zXdho+j)8%pFSSy4X64H@U*^n^Q2#h6V^9$E^;Kugvx%WHHd@b^d)rdblVeFkr$`C!y?d{HY{IFybEan5L z>4>}%w@0udBTX;)@!+-8d3w~P+dT3*o0!7`b?>mA9PS(A-CrsZAWJ0v__-|g33s;R zT=w*@$GX8%M)%ryyK)p?!4owtr|sr>jW%^%)j$O8#u3Mz9-hN5rQaJ`Ag^(Kt~q{I z^_;E!nL6Xl0B_oxq{PwMjg=_vvsR|`c0cPsb^h<^jIac}Xm}vA2W<$E9n|LgtMgu- z^Oc5cXWE)~Jse^20|5OOmL@}uBjkQM3%-#L!PA85VwF{C-J1j4F|WV1S3{;wt``L85`0wFeC4!KOCN%ZxJhnS zxy(+7P?^S8BZdm)h?hK?q_pPDz3M+vqL-j7Tty{b5r7OZMFjS%+WLMXpy^D=PofO?mv=d6N)J*@4!(@ZB$6KXBSm zGd8mwndxN;2NiyxNRL*T;98%01?7+u+T`=QhLF9v3~;&?=gas$tD&P!cwWSZ2|_z%l0Dc5$wccFlTWb2DJ_$& z`tXRW_nkuuemLdZ3;Svo%tQJtHYmy;JKC4BUFaUGexz%T3}z3Irh9(RZicNdBn6#JM4DdgtqQ&#|(&#N2T)Tw4+ zL;!W*3e#ynz;2Sx?5}lC$uN3h zJ_J#cp{^P9Hcndn+H+~+4^fd3iEtGgt<37E&}AxHeXeA- z>(Cdu&hEZwc8LtT72Bw7-3sArq(Kwyd8bAa2)&c(VY>7x&o~BGjrWu577{)18sQ~V z#nb%r01vrZ-mCjWT)?NKxpSb)@Jc0zgTC{Dn&sA5vLG&t5?bNwaogs?8G`L!-+q4t z4qv=}ec+Rl3M=6&2r00F#cS8gUxGXa*jLhR6 z%}i&r937Mod~Q3MfFqEqc|Q;h+v#pU8aeHk#db%;H8Ap1-p|QyAbZ4qFZp3=Px{kq z)2>ED3ziq;+yF!unziahA~E?qX#1M374gFV^z?K&z4eKue6d2r4}v&mmMTRPQc2iq z@;4)P1p(#ELi8Pn(`LRM3GaJ4bHIA|r^dH?N-i(?n2iWQBqLH%ZP&kBG4^G2!x^6F zhMC!IU(HvJ4gXL#;TDMq)H%hR);|a<7nrB@+UU1t-2J&$3N&k$i4%v|FBrF7hf=e@ z0}u)lbwx2err?m3fUhRF#sMh-3}pzaukucnv?forcjCU+ceU|Z0-ZYm#~k)M4ypN< zX~HWBtQSv(BiWsS_U0N--^-krIuca|5l%nZe)qRmlF+Ct(}=5hqd)~%>O@uDl2D6v z<0aoUW4?;F`2(4j?$9&WHw(Ikh)+T&+g;5rJGyQ>Rqs1NF8cw;^BMztHfuzAL$hDS z5}K`tKTPHw2*F3X@Z?+?e~5vsl4592W8%klHd zNyS)SXVvKzM@~g@tV@@Bl=~c#?LCnt?@{rIE%! zGdV}RxvA2U&`YwYl&}QSv)WYyTFZXf@l6*h33ECHGxoOMaYA%lgy{@{We5H9oZ#nM zDG1wNe$2s-i*ujuhiRMipHW8PeBozjT<16k0Pb)mFcl-Fwt!t2JkxaFeT|1j&l zS|=z37zH#Ih5ZHN2Vy8y{sYVrQ1o`qM1ut|a1G8>uf&WQjO9kzm%~Z|xdB9j2&bSc z@LbwVd8bVQbYPSpC1Sl5pO~bJjsmg)5PpTbRr{X!-ul_Rf{nCV3MTF^Hg68h4~s*e0u!QjE%Pxm}LZ$L=_br9UX1mHuRA*$u5oOVuxh1N%!ra z3CE+t*`S~9ZJX&nE6f+-CJdA(H znZX1SVJoiO(ju-zM;u>6ZGmzgDHZYvad*R0VS%BUDtnV5miEHZIvxQRv)?oiE7Tus za3_08(KeB(&t~;G8=+%m=SE?^TdEC@Lji5k<4Si3Vy|>W+I4(aYc>TaKKI*=Lf2^E zblZ%S{Pz`Qa9%L+)?efQG@fwvH#Lx?L>pssQJ_NV!w(A8#FF& zef6l9XWW+5-(pBK-tUYr*gNtS-`8^P?hY#$AR1`92yr`4;g&l*`rf+`mP@hci|m`I zdM6`~Ap`4_3?DAk@a`js_?#E7#=DW>>IzB@AtadS(E~*uwZ`uy9VUXx$>3!bL zuGJPb@xbYADyN$7u9=riFc!6jGv$x5O-@A47GpAk-;ZtNL)qewh|DF4UtTr8GGYPb z2%P!Prx7nUJXAg$y);^>*9ll7tbQ$zHfPcLcg!SM`!o11-J6K*u!>9~&D zv&Ck7YTUw=JKj~F>j4kPgtgZjK6R#B5}s))|7q;pdU43aKa&xL z6O~`!A#1bnL^mDNXtG`NVv|!p{wA2*5TiaLbTcCCk^KE7!*)LI?|FpZINv#K6Q^4X z(OzJqbT$m;_x3#!j)NTqc<$IO(wI2A$BV(|m@-YcwPyZDC_!+#_POqv~Oc^ur#1R2zTG$)9|$ zP3>EvslnBs<6)hChegN=t84(o%%@7ebwjyik0qGkTD^qAx`7iP>0t?sq)AN%K$E>i+ z2jA1t)cxWUa~86jio^ph@W;1Xo$uB0LUpp@d|%#lTB$osbgX#PZb+4tL8h!$rW2lj zIy#*iKUQVY~x7zlte2d5f# z9Yn;%;0t#S#|~B$g`WpTr|noxRi+V?^9e8T;DSVe!J-(7fO_~x0Jo?Bs#Rruxu*b@ zG(lCch_&*qv2aP}F_HnZB0vWL& z?wcgH$7lQJmh{bu0b*(5*F`|-_+c)~d(bd^b>*IIog&$EQ^3H6=XH#%>a!FbAg(#i zo-d8vqHgwWX?(PKq)DMOVV~RTfXnRC=B2_~ovI8ez|!eX%6eWaIpB3MJ(ORbBqMTR z5NNQv2sIlWJNNB86_K-XID1^6l<)ZR;C$8EbcTcszL;kd8FQmW@^Wypj-DU=UEsGy zpj~{EABa_ulRIrT(_}0#vdO&M{C=SNY{j~q32WV&*W@KSi-d6R^9t&_FqWJ#VjcLq zjMl(9+xr_0IMo92q;&qMd9KwZ+MSJuGBL{p4;u8dooSSvuCq2pj~tDb+`zRYrIko5 zQc^*pvT7?1M+>(s758-ROlxnDL-0N4q^d(Rbh zI*w$SORoG{rRE3oe^>GonU3^Yq1L~(cbbmgT3`A`%0kilky7M4U%nh`mht%R?qbM| z=l2T6eD69VR1taG88b%EGi4Y}Z@RlYa(w9?v4BI(die`>`D5#Jn#t?_L8d6!zUzJZ zEFrS_>+Ngbd!#G9>Cjc-TI2rrQsQd_A}IRT#-oD|zcIrD_GeZJ7{(o_qQ&uKzYhs= z?w#$)PHYC;-(#M|g=9Z?T!Oggth!rMcVKjO`5zF{rVPou?_qjHj!!$Jx86V#y~t^C!~KaHtI$EP{-?1m`L z%b~Iyw4R>Ff5)()sdbN8oqSCcGNED`e0P+_O`L2gLeFG*rZsW z{SXfh&hq+HC>=5p$-Pn-!$E7#sJG>xNm!RH(sItoC0D*wUolsFfn0uDsdFxgh^;43 z#}_SAM|h*TW$Jadoa=PJ4#q^@O#Eho8mdF#C{*-5J;cLFAAtl#YQ1jkkPoUp*`4;n zD<=;$_z`c8P)ox4x9TO-wB^EZ!1u>AlVwn!^%j?Fkc)3p&M%Eb+tNd!pmVD3 zsm}1l0ffP~<4G$&kR+f6=iu`kP`+EJ8w^%C|H{#Ppwouif& zR98=QcZ)uGBARYEuUxaYx%xV>r89ZpYcMCS96(YA8Z2XbK7A1XXE6|FV@+0Xsn1WY zA*|DfhF_w`deYum;p&$kE-bPPX+6W=3$2T;caiI53acL(%9njY-KITiZnA08`rTCm z18Vm#uD;v9?))5JxokmQLdSoJqTPld$M*GD{}WAB5yx0->p0>IL(OG=GiM_==4J*O zZnT+u!=YHWRq4Nb0D5V_0RUH1B4x~CH`YHk&AU1>D-7^vvmYk+@>LYb_+9f5MP>6m zWFV_!9X9g9qlM(MZFuSMDNPu_=Y)xQjR(`tmF?9|%2p?RsE$C(b^}80dylK+=<|bW zdj-WjX@tWx(+W)@hf-dX^Rgmv#t}diAJ^U0Et0t`aYVK6PiB+dK;i6>z(Qv(UFCVf zu5GBiHEqXtsEs_HHn&)I)($eo@CHKEHE-;!QCztkOG&9r{9k(}fwky2w9ixdmc&H9kku63IYg2?Jpq2guG=V`fiXtRpw7KM4aaC;*(ozlFV&7ALN96x>qCf z0pzSFJT3O1{w;mq=L1nfxX0@ayvwpK=j?^E^#)2ys>9FWnQl*aWX$bJpu0b>j(5*P ze+sB5@7YknZ`Awme==O8z|CdM6UC4wTzm-Mr3po#@}JAuLfA+b3*2KfkwvC|o_L=8 zqO94Sl^$&VmNud41mu*t-qh^E1K`slX4JD)+t^W&!YIZK#*3_a=!pX92DF|k zb`-@Rm2BdOi4QmucaM4CU?*2xM#7A7A8}U61ZD{MMYmnk$G|vT*@Qz4$W|n0P`|0| z_W4M)DxY;`(3fOIU;}%9|X`uo&cb>tMScZHG{T6?BsaONb_`c zw?NKn{2%bWU_f(49J0Yj4776~K{rAcFkjVi01ykt_z64^6BlPSb-6e+oh-J9`eG_g=|zyhA4Cfi z7@;%f|2xEegW;17W&qcTewe^0-mlR$LN$tl7^42CM1zL9^S*fKlJyc)2%d&KOY$F5 ztfz~}O!AsTY{fo=`P=z4%WKwSRrIJ{69dDlIQy8O3jFgyF)zSIr1ZW;k@Ux*HFjk8wYTU?J( zE&eTPT5@LV@%hgY9N4eS!#IOr7=H{598-4MVA>dk1Hyd(*nA}E@ws?JNn$u{;eFL= zxbq?Z95838(N358P0hfjCL})fiv9Zg4lqR~mWp-;!dUIBRhv9i;z+{5U{?T#V^e0& z|7~5u{cCxx#_=MM{=Mw*=wj#nZ}^Z;qj<`XsDfz(POBO;2mr!%d%=#eh);I9GNv^xjNHj% zg~p_73Wi+vE9H)LgLGbGG2F}7GBj1~ULvAWMs`5JV0tspUo4?6AO0x%QxTE#Qy<_+ ziRIEAy9fQ=(6nX&5-%Uy7ehm%65DO{cLOax3{Hm<>xl_*rEvkLy?*_8aYK2=dN>J# z4%1k$>-dCPs;ljPOQn{~BJ0x)YcO4FjIZwP_1bes(*!h=Z*Mo2y0 zoy6W^oiM7MUlEfk{qv@%@^|vq8D=2s;0l?6j#0p$<>g9#XeoiMuldnaGd_IG8TYbTpf$h=Ra0caC= z^Wx=VHWE=%_uS^N8qmt87$y<=bT11N=2-85X7kl(D)v)=CN9gpx&B#N;hja?@z zv$XoWDqTLnkB@+lnC}2|+XhZrfYD>w6jq0|^D?gmZVC7h(U6h`piEM7rY5FPMgXTs zX4QiO&Mw;O+LOD-qIP2CY9KrhKm_JKSexUgP5LMD=}yLz>;9fNS6#w z$>}lzBUf9a`d>ttFfz4;jtNmg&Ew4bf1qF8xDVr4 zvvlh9%&F>6aRgEWclc31>s=NdT8 zC(VraUicbi+2c@M@TrdKQ_2$}!<*n*F8v68Vbi9nqHss_DR-Id>)7JK&E%bQp9wVD zn_3sE@Q>P{Rb8H;jX~d`3?9Se%BFPafzh|KfL@KWxI7~=QdFNr%?1Q=ChEv*Cmazi zk7iXBz2JXer4L{|cmOXVz#XZa!E4>K8+RU6R;f@2t`%eqo?1(S_zMHTYNFBM+@7ok zARVBp!X6RrTM9Zp$d4{i!3!qYUu@^PO)!V#r;;6P-#17Q#I9uw09XsJMM=<~CpFJW-enVPBHqu#_X_;C?4YLu_QF78Ni68k=UR( zfWLQ>tdp$6JHtl9kZ=wDZ(<7t;#SMkc_f2E6Uz#x(q@t?;;7!0st#o{FqO3^_$qP} zRU@?fF9E>tq9lN9p%k7P_!=lW_)iXlrnh^~ACZyAP^D1YyMXgE8ur?$(ARhR6s@C5 zzMDJ)DTaA5Yh9-7l5&Ytdk0_-p{g)F41a{-30< z0_EVI-rwp9DXBW$>(vB+bU8K)@R+F+dHf)K^5uJB7FEyt+(bi5Q;FD~CR6mduLWx8 zG15a*fk~MtBlKl-qcRBe$uy<@X6YWb8SL#!{eQY38Gc#m`m`#V{4u%*r5>nx6HaZn z1qo5SOe6Bsyl8bu`giuKd{7#njm}`j;i@;H&Gq?A;+wzfZ|BR@uJX2FC66MS7~k!c zl0fZrbvtI^R}#-_+s^vC!lw^}I)KP)hNZc;%pBUQ2;sZTU#+C7DobyR-0OJ#OMzbU zs|;mtxX73y7(p_nA22j3!|0g^#_)pwwpP5|8Zseg>#uoooZ5=TqLicg>_wIZC~D?Y zXuwioeP&#~pC0PB72vkmUjFrxf=lW{d>bWvsc4MKd9v(sX%dtI5M4kXsuD@GWqwc4Bvw$B2Hw?$P`6G6N zQV@^TO3uGY9Gd5B$3`75a{rt-Hw^;j2bXybvS_xP&-2um? z<%!H-?E5wIrcs?7mUF_CoP`UUZp3E5wOZjvpq(^Z_M?vNVg+CQMbvAzCm1;c<`Sr3 z@R1y$-&(Qmu{KTnvJ1>H(WqlY>QJkb$L*`uVG8me>P9mV5e4P@Z}Ox%l9i68h(_yMm{*2(V1o90RgBKjKq)|{(O>!px3|4R-#>_oMups1D|JEU{QxX<#%^L-ocm42XsxG# zijvJ>uJfd46Fb(Lfja`%N->EJGB|3qzb~zPMx#oxSwE%ouwcZ=!?87C4^;$&8yM}4 zl*~;kd?cQbWw4gvR<;0ey5G(wC46M$%BC>pIXXEQMAU8Fzsv92qTM8T-1mm=JtVq6 zolY*f#pDedUWQa43p{z+clb$sLS8L2Rf^(+o1}>CK{v@yd_tE=ONoP<7vNWN{`^2b zv7p7LYAr(q66I6kcI8HyEXznsjIG_#D)E+H>rp170{+Iqy)Deni>jitpzD5NjZOXe zJ6Wmqvc2i9OzX1PcgUiJ)&kPV6vYv^d&2ity&Vu^=p|QBw1}P>pn6VzI38Fmz+1%O za9pcavC{T@bq7PE^_fD<7Ly~7k85{oAMGl3|@RL!Y=+h$N&Ll5p zGl2m>Q2n6^127yEsewZ~qN;9B?f|ktT!z5}3v?4hGYkNTQ4tr1CQ9|$na$p6Ni~g9 zLs%&m?yJ8+&V3>%?~u6-e-P%7PQ(uC*2I;_wSmA3!=l=|pN$}5K=%_twkn(oNUR|h z29qM{hU(a0*|a;+&*KYVgw8b#3_eT{e+5VuYl_Q_B__@E#=X4{0k)02Fm#pIUzAy~ z2*5QXbHdC5yI_)}cRa;rj%a4dXa9Y=a`~ZxO6(jQWhZ+7#6d9x%YG zW`h0IaskmFFb@ZMthP-p^HgPbNR7vWHb>{!>{oZSI-hj^;Ad>DMVA$6uZ({{;x>D)RgcR?6Z zba0q3fK@-FlWv>qj#N`nIgR~z^T3Qe!sFsIR_;^LpV%M~&nyX^lZC$4&xqEOzbmr( z9S76CsDF(8tvFD9jBe-pI5IGabflNLq}h3gl2%}>o~=CtFPsA{lE?Dy*ZJI9vc#KC zxygvdrGh_E6j=8>83bGLnJj{f@~1rQyd`60rz7w=oxd>>u8y(4UMldc;(BYA!tUZh`D1%part(!^)$6pL0I#CxIq%ki?f~`f52kj(Fpub% zb;8O{!hrRN)+T1aF#wO-ghdWMOCz-lq79lmvdGhC&H`aiK%^RnxY zkZ6^Tk>^6p;=LMb;t4DY?6SX(0fPgs=)y~?W}s%iXT?822yD+lO?CD8)m2@6y-t^J zhp6HuZCy)?hr0SKLTH2KbpONzhA4EE-onBnTi6G>!>OyN*g)g*czu9{kFTk!s`?C7 z6n8$FJ(((ySw#6gHB~DSf$Q(T_!_kC*wW#7!w+?up-F&xGgyEA+>0Qo(zm)Q%*J*x z?Fco@%T0l5zCwwLIae0MH#6ZAnV(P%4|wNh)lhX#ZI8XEJOj&cw8!fengmVhRR=wR z!TGT-2pI%4_qIKw;QF-5yuCEJRZs8Sq2H?<=-v`+q4fcUC`Tx|O%5qV@lYRw|D5qZ zXx%VpOWMtyh>0|G-oDIDYQyC24mPB;-|T-X%Lf&~3%+HX;9hw9@i+Df-7m#ah61?z0PqyIcLideg(|iXR2*%voi&)zm#xZlD8${x8y87|_ppx^O+pWafP875| z%g+@uo}F*<*CN99(E0+p(9k#kg;mTqzkl&8w-Z(VgB_zruQO*Qn7_b6dSz+|`K7 zt+BZLqSr;^TyZ6lnYOMkAr8%CLu2(*p&CQqO`)PXMk9@tVx z$1h^Pjv9PPTsC(FBtdJFE7w?+WPC};{|}1r`3{DxQ$7agi#d^SJMrzaZDUVP=iaELO_hpC4woIR z_Vfe5iG0as-}qY}f8bzK$%Ok7>mn-s|M7aEx0clr0TTU30q5GS)A@oVj|UhcP~ z!Vh+;wV$cqe}pYKH=S@C{4gh6maIv&93AeZqWa5x!3^9Mb$^pdF7>@xQE%Gw#wHc8 zG#iPURM}w;$_>ct4Y>D8{UkcyayQ=@^4z`7IACT=EJL5r*AW(;kV6ek$|;PbfD+kq z7rCHtx~38D<0*C09Aw71ypoe{S3tMTx?4`NGGGjW7_2V(>vC|@&8~gAd{G0Snn&*U z?iT>9+pQbDn+*UaAxEg36V);nMfnWU0}!3V3XYIEU?8$EfL%eb-BsC>>CFkajIMS6Cz6^`8blP=(pZ>*9+Ks+GEaqq59v5ifcMRF4W;A?BMZ zfC}7j(m@`Hs6o&e_HWxBL)Wsr?Dq~Sx}!qx(>J?59$kxb*>WbNX-o#({gj&BnD>p1INy%2KNY%T*|{@MR+pJ8 zIgNNX%D(Q#{$9)v2^;ou9ph)Gab~iyOJW%>>NzDmY40CXQ#YOt=W5U}B+?ev_!tq! z(O}kYKUU9X{mswNt4+`@R`9T9BT1z-Y{w2}_QqiR4u>}tkvz7m&HDUB3-p1eR5$G} zs7+^?f*B#|a8t|1*6s8t)Jnud*4ps=^-s-bmS1o!t{gCE-fQK@6{u$OwG<-6GtW^w z9#Q7U8H-UwnAMcqy8f~W*YbK%_H5Lu($L7*;Km;R4s0)Yeb=Slw!g7J zw#F!2Ho24?45cBSxJH_*H`J9I`Yz&Y(w|1NR_ZLBp7{S|?i}y)rv+RyJ#xaVClhkz ziY8yYeRR_1(}`;P7HS}sC#yc|Mrme8y5o+Q_*^wkM~(8RBo#Lor+zDDq;j++1cJ#k zF-dN;Vb))yKDSu1Wa%!q9^J3#L>*Fli%lfts*t^*jjqR=pIUXA&bU`~a~$l8Sej(t zqTveii3l-Y-%g&q1@%h--w$}LZh*6Psgf^gDMVi6_HXjjql5azT9WkbESl~=eMe#1 zc3pIKF+EEdBiYLuzu$hvKzy^aXtaFlQ+eXYedAP~IM1jZ!UEi^@3+&%5{CTQ5pHfV zPe1svk@@AV2lgj+wcQD>FhH8xz5P5Upv33##`$Qk&%c^nU_*EQbbW5qiWf4p+;o?G zTq!Quc$v0YPKk9nJ*}VJ<{=P=01h~R7=9zcMn9dYSYi!1Um1%)_#+@kRJri8Ps)6k zWJY&!#nk4_m0e7EzC)a$SWm#qg4e`sHX=XXwP@`8;&9nk)9+gf1mcP1@}L??y)l#; zd?r6C90J8|`fu&Ep@7l(#t});wFbO!JbnXdbBUX=*#-R9{N!vu7QolVhpWXCE~2Jp zWm>J#U^zUd0ok-g8U47&&7P_e5NQNdQbfct{mpFFsB8T7we$J9c>=l2^$4=Y@kvWj z;qlpS{VL1W6YZLn#~}Q2^AAhq8a{^VB`j>bJc=%t^H4D#Wg9yF>LeJ`Y`6Ez3cte# zWzY==pptfb;+c)$ADV`kdpwzGTp=7)_9Sch~cF?{DvOew_6ygd|r8d1uW$GwZoOd2Dbb-TwAnb+a_`_7?8#7)dJqJFD;# ztJFnfTG{z3$VHnF$NA=O>=Fn(F}`7d#F5*n$0v~>BJJw@gi<5i3haOeYigR5_ z;o7oOjadgrKu26?6orcF1B&0cV5$np9HvPO%V9!Q-^<+M>#Abe+s{&pIVk=s_YQ=b zS(ZmEW&vl=B3t>*YA`v>#gn%+mDIq#v=MqHh_So8_5d$PzZ-1WF1?AfL4^T-4BZbs z5}1{sH5JHj2m(1mhpW~nj0HN9y?2BIr9(eLNd#?<{lRWVkqnxibHd$+mxa4klw=rF7w+K`ScOF-0K0paqCZg2{1US|DXL?!DYr zEqhP}9Xwvz3(q8!BeZzZ$ua_j6aj>#nyQJ&038fEi;CE1&IVUEK5jcj2L1>{tp@$M zh%pf`j}#2j>UQgv@Y3WqgmHx-QBN$BY9meSNv-?F*?ECwiBb-i2n9k3ObSG0LE~1{ z8ao^*Z89Cx5_$1#n5YO7u-Teex;HH_O9M|0cSS+_ccW(W0aC#?^LZL7yg}t}(N&cz z@0Ym#2~*jy|24kow|SKZj4vLd0}iu|jsH&FcX(dp#>6NA2m&YXfuF>944Tyd3~X>v z@~beodu*%%$ch*1EK2*Ht`Fh>-hiFm*w#g+U;qhl<|!LM?f}e?JQ_M`YH9`s*&Ib~ zZti;9#n{~9`uh6n>gr8|Nnn4GNn)Csno3DY=@Uvink%y~G5NjYd~k?9~c-w zC*nSzEm1-G`fFweyL+|Gg8-!;a7dKT5?BSOXaHLna0WQs-^aN#_yh%{K$Vx~ItgdDZYys&wfFBu%B299i`1R}8;$liJ z4P1y5Kw}V(eE-RAi39Ma2kf+Xd0T<)5+Dl^p;7bjJOGrT@$vD37Ob$|KY#84@eh>B zX*XmrXL2p6zgc=x?$56kXKGOh!^-YGuufJlP7C-QhACEI;;4xw7Y_SsQr~&8|K&fm&I83 zu5YS31D9^0l}|RQi%0vt^(aB{HdCb*MdIa6p;TjwPu0q2|Cv^-^t`(M8wUf9Nb|V0 zwPtE>@;z0M;LvJ$XcbvV*E(VP?iJS$bSRQ^WlV5C8{a)NMi~yWYZij`JDN;B{pnS>bdH@(?iBRHml(h?GeTl z5-kbFp*7ymP>hXcTaN+-e}sXp8NH%U(b-*Lw>Xt$J4^TRAFfpqJShFS?f_@Wn)Auj zV1u+49*;qw`<3(5TXL}Erwf05eU`Iz_T9{Pl%QIFE_(XaB0?&M?_`K!pe21G#}|9i zFnqQpJq9?f2D4gH`|5SP-aMGetf10Tq)-)x2Xa;Ecui}{KEZ=0G-(Dkd#y0ZYo4l~ zpEPukkis2Ogd-2&t8DKL57Lw$jP?{9xcn}kKnsSQ&FwYHqAH3YvV&UL*2`u&ba(p9 zT0JM0@N%a&YQyXFQ*SS-w~gRF_&>An1(mZ)OW6VzftF?BwVc3X&lxlW3>6eZAoRAn zbt)H#sv9MnFB*X*a2x{PjU-v>KrI52AVczmj}D|IL;hUJ;06Z=+5+>Fp|e2a1hFee zks*GEUV>(Tlo||2_7XtwqPHa{!R;VWQjkhFuOZPSDFRe2SO$WUUb(me-bVmcm(hll zwVh;jiwC0OK(|690^y0^KrmpTPadpr(QlF+cIcqyn=qk8XrLi?M5s{i4rPfh3eF3f zN+A52kT`o1xgMm{ZAgYM2^|4u>@HJI(DAV9c&gY!;LPCaq~K%%2Y}1K$k_-iU~MRH zw->Euiz0~{NCLP85OGcgGXdRp2@eAdmIqtd%C-NrQ$5fPt1e;ytg z0k}`}ipqslLhvwyu3UW6_o?mR!IsZR8+t?>*aUoJDDL-z*#*s6dHd^JW%9+W&wDd zO#zd{8|ARc2Wa4JpRRS#V<7BbFPSRi;NS>i1JT>0Kf zY0eQ5_#LIjB$CBae?Nx0V}i*B!k}I`DD*VaA9qxkKVe(S&M{WUsJSXB2Z4HDg(5CQ z=xX87Nj`kc8YDq70zne@1B;%`LPu}8=JYkzI~Yubb}eRLK=%{{{`T$&P_Ei1KZ1sb zqgDM^NC}0z-}Kq!S$Tr)bfKjN8&sN~aSps2KYab_tto~*0j6KGG)lzUYu)3()KEzqkOh_MxIp|5GP7z?kn_kcIO zcOl=nKizL!zdZ*diGs*JUK<=YpIEYbm=@7jUfdj6YU*ay)s1-+?BFKbcAkDNXrw#S z*IlCb@(ZTE`{L5?b{weGmHsoF^5`TjIF|`YM`2l@DNB0v;uF2cz$F5RVq4zzK(>r( z^1~(6=W(_6FD`=?R|Op^a6bwBKJQN}lnxbS1MA+O?1RLlS8+QYdE^*xERT>9BI&|x z{p#Bi?(woqg@TyxT@|6SFJZLd&zF%ihynp5LOatpSG3N8Sx>(EXI=cGmiKtp%0hNm zk-%f!1|16#KH@0IvU^z?>`KEz@(1l2)C%oOhUX|zA#o~_8z|Y0!2;I{L36~{)YCdd zenxu+UxV=>B#!TsXQGAH;nkskc0X3_Tgh~1U)H&s9j=@YGu8#ILbw7EO`$>sl`XuX zXajN2FE&Tr>QQ06p;ti!*(jc{%^-<_#ujvdy909@kSh$+4R;Mb>34}I0d*s=lr~<3 z2-+W>_xpAu?jv18Aq6eZ;rP2H&9|+$Mj2vzK^tHYrh$1`V5>ok+plQ{#>_4Xsw2BJglDKhZ~=s@`W-@cyO5U%uLB3M9G8RQzvWx@T{IUU z@;u$&%I#)NTc(XG*BAScfQXb0KZh9)M^ zfjx%61p`p0%uG!kTXaEc&equ2*#A&e_kXol7Z(?O1xN!a^4LKDufOHpu|pZQM=Ksp70+Y+I(fe#AEoC{1##GUP*ZvW-%QS1+#$ zJ^Fd1^w4lirO1)ee6;@-*BS!CX}cTgp4Po!RjUg*rOJc*& zC+m5-${U8x<&C1aTBUDOhq|A*nINz8nI_XALQojmGpaGj28p6}5}9I*fdJtO$85kc zl=GvrP}d8MZ+|81w*)!Tx}ZKI{{E$*Nfgy-Aom00=n zK`C%PVtJm>xoezYwje=Q>z7Q^SX(8GhFlKC_O@&iLmKa|KZ-wK{r#ueyJW^KvZ!Rj zA5VQ-hE#DsRV0`Xf~0kL8T1-{jY}$2QvBU9+oQQ6N8TwIW2O3d-8Qb-^rw=owH>W3 z{c;c|ceu2o!ht&jcX8Bpzhs1F0^#P~qfe`ZnV$#TNqYr9JcjZ!&??1M!MyLE=u*PBH7i&$921LlmvaseICcgvG8 z3+9>=lNBKa8NEyfS!C4aW8Yi-ZHw|6d2F+y-9X!_l1fN!erD0JtI;pyvpeJWyhEMX zL>=Sxq5eAH&7-dFsloJ74lC5WAKW{?hF55myEQmHG~12a7nxh7O%bW7pBm0~LUc-Z zgIa&TsuWOc%W}e=PNFw!%`UHfkMVZdVjbUJFCwtnd1x)y+Ds5is#j~!etmKcWqM3y zz$_voznJUj7+(k}h`*2#WW@!%mOR$Koe~T*Rjm;#NzY#s z$K4<2b9I+1(@`;RYmXnV$L;?y!AxXdF11%3QkDLv%V4uIJWhDRu1}UajHM&{cd3j7 z9`5PUiD#0^q}x@2h{`oM5XWc{bKR`?B%(4Qn6qp=BVBiMr-;MQa`{*H*3;P`(;Ay_ zR+_8V*f$lhmNeQj)9Jy>^BpbU8gr&TOYct4V`oml?fSdHvbL+l#w~9)m&|m23MD~k zvTk~B;@gtP%PPM-Zk6T3dTSw((t6hWgB(9^0V4kQA;OGXz1GXvV|Ke9;_m9=A?nk` zCw2#mS;d4PlxNYY<0x&nlG_I=$2xiXaVQHxJ>Jqzp>l8`2rUa2!XB8JkR?J`GFvSl z_Y9ulrKbSf5w6XXphC{_7@0xl8FI=&^AJV9x%XdkaNK~6M`Yt~5#hX&_L=`t#U##J zcT!(JoZ;`iTV0Rk)|VC6wPK%k1I@>ZzN@5O(+)eIS&g7exSdFCR*-61A&wl+f3}z3 zfZzZP4Gow`kB;6&wMxY@(9!~h@9phvqAe{0!|e2QDVB)FMNoYb+4tjP+kk+#QbwRP z_N2+9r>nctXs3hwLrquLM2#%x{PzwrVsNpE!5nI$FOaE{atw`*0@F32fY41rP?wpR znVPB%^yDi1)Rhv-TT4@egN3{NUx2Li-nX-+#)+6ya&UCC-Qz4XCnqN(BcrK_EA)2> zo%Kv%B;a3NRaK?Q(AdyWv9+&iRSRU3>z#hfB`VZ>d~Hv6XI%RVFhG?cyV6y)*3xeQ3F*C2JrZTYaqsUep#%E}b)xEZOqPZsUJ7Q<`2{bXL?BIL0@tyHg-DQqe5 zy&n-9Jm&RY9`dT#nyAwMs3iCU!S712%J!J~=M8TFQ%&;Pn)~cFIo~@AMm`Kp$~#pS zod$;I=S8_tB<-K6H(dl>%nIM#%O#gfjyp{JYH=gYI7gfg2PR;5pwc}|t@r0Cc|8c5tOL_YF z!aR%+ylqOQ{(Nin(~19HCu%y$!`_?fYPJ=PsKz>t#HG&x9?OOZ$$VKW)CU=AMUT12 zQB5aBr-Xh8D%YD>c(|Ul#Tz)KGz8Uwzg^HS0j z(*@4th&(T_l>rLYDR0+@EoCj2;*Dr=cPFYlOiu%CbgjLqzws2#-S}!}E80U&rW@ks zD-@D1B_J)T1T6@S7LfUC0o>BP;S=u*lC$r5FUnfIDvkP$Z!Z}#sq)nZ z9?E(wzHJ7WVEyfqmNW;}5mf4Do-T*!#dixP`yiOFRg_A49~WFU@aZcY4c7M%2`DeU z3(1c)vZjk7-P+SWnmvCvYjAB)HN~mdPMy&EqmU>W=C60lQ?*||I}4?y8!hLMk0-f3 z=eB$lKP3rU!3mv&GDW_a)PYs7uPJM(_3E_W)CtRcM%0-?r8vuZY-_&OQ7}5gQ1T1d z8W6FF+l407@${Il_7pOj1AeFREyx=j@w*)6U%a%VwZka>aqF8Bw6LLAX*qhIDPwu$ z%1J!g_G9|S(8^vhKMSA78{7T0qekI&5%tJjA|A_a0F+_c_Vk7E$uC>1DFZX1)`Wn5 z*&b$jODP%09Sr*IJa_nvQHA{EQZGv^&a_%Mtk-(!*)goWvyJ>T4@(!aXcBbA-Osp} zhmI^-EbumqDn&!sfS1*}FW|V_Oersp$%U4okS5^q>vNjTt4FwBhp3n-0$+B|n}+SqNZaZmm=tAQ9i6|$QaX@O~W+`H$)Mf7>NvZWS2a#Sv2 zOzwF_PmzOmhra@s>7@IySFVbNdHeCPdQ`lP+RFFxPY?KbmveKkuxCWfFE1S$7|j90 zThr@-7>b{55hylfBdIrb90j}=L8^r0`yfTEB{lLnLp2*HDZX3NTI#FhdGy7EObFasAC7XY96<>~J3^0IWf z9iY_$>3ecAKA@zcqDCHy0g7~ge?OMY^);P7_1NIxKWH;x_iAbefZ5#uu>;U;H5n4i z%WVPmRtN*Ix{gUmcp=-m889_9#ee^vikdo(VzE*WkS*3&Q)Ee^2KKHHgDt0Wzfxek z$ji$EU}!)z+1c5do3nYIpdoP9)lILhtp$v=0T*9Dqs2i2eg}|t`iyIUeWmp9=`N0< z;4_JWl9Ey_pn6DrA2j9QYz%Yrw6r-g^1m|KgKiKP91HM>wr`b5-==N-WIm zu%Ec)O-(u_CF6Ug=};dBuIT0$SFI5yFX^6e($Yk+%`XgvkxH4$(aIgc=0c78x1Uyg za@?+?aN0t{7s3y28nP{g*9v2-8H6?J`e*tgh&J_j6J8&#a$&C-Ms(1#m#@zffMg}^@@I+>iznF+tVwaZ?Nw>pWeaozW&zG z^0X>ZiX>v(lt`$^c@uC$6ho3+0Ip_9JmY@HgR+MB;aH@_rWi=H{#GS%^fb}MYc;lu zBK)@udThG9L)PcD=@*FMjr^?6l>$?80KXhCB(%cc6+W?JGXCOn=?x~6-Na8_PhFhQ zK2i1sL!ya>Y7~##aW)FSd3J2K3*r*pAj-Qug|#nKF1f3)g=U6r8+fF$E^hvD@48Fb zkfia5Z>cw4%+w4+o4E_VMw4p0dW!Km1AP{pY&Gcm|9vKCtSZLbj{TE}b$zq;y zm86Fnv{8EXj&ZD(Zh7s#0|qHx9Ga1kW4pG;rN4fsP3!f`9c2ZjWni>=;^`PV@}CRuYPWq}guw(&RfHbE+cTyKsI0$% zbaW3;FSMDUwTE_Yw2dOU)fujs}r&PWvnt@&|wdan)VRvPLzMtG;Mty#`PI8 zt6~@$F&pTbrd74+R;kUju#=SGcM&Xg+Eu8Lov@Fl{F;3$dr8qIw1 zp-JG^YFI6BmUe=rV{UW{^&>6-ERO>dohD9ZODx1I;8Wy=F8(+_G?KLp+*ds6&7IeR zCil&jzJ2HIa`RH_PS0g8n9Y*rVMTHynt!XC%H5uB{IvX$`<>qcF*H=b-K5y^lTKQz zI2;lPGe@u_z`}f(;0(s9fxR~3%azTZ-b6k$HEGgHC%4NkH}-oh7Ye^!O4@# zDJX-rCeS331kq~k?(6O&&<`C%^GAZ6$KKOTapM?2FdyyAzDxd$iZaM-*nzOOfCL2s zjo-Be^-IG>IehzZxELBZC8R(&Xkvjx7e8a|y(EhT8m_}eD)p7qn6k2yjCFQy<+m8u zoQms_K{+3>rC@Zbh%1c;=J9vjt2w9siXEr=_tqJs;OQpANR?#K()fQwa3pun{8?Ov z1W0xuN)PpCucDU7dj|Yq#i|NyYXQ<+Cir#i+8qC)y+=PUK^#Fw!EliG3-z?14~X>n zDs@(}K#;;FP%x0IyLPv6TPF~R*PRj$zEPZ9#PM9EB3^k&8rhF$fh+y=usaq=K$@Br zpP!%4sM83Hcc2ikEj3U2gE}*TZ zrKL3iT5@x9b8&I8w~~{SldNn6kX}Q`BxTApqc{N$Ij{ieGR4sM?+8-gtil*HWA(LZ zaMTt0pJ@Ro=Ire3H*`|@SW|87R8d4dRaH|H6LPGuQZ^$AQPJeIG{X1q6XWA0^Z%;3 zo~~#CQIrDf!t--j6%fWHBJg8?4FnI5%di_nPhSRv(`wF1B#+;V# z;mo}=GcKnqEn{|lQ7Dn4VXm2-J55(<=80am$=;l~$0JYi5Cky|lIqu&o0DU#hcbtK z=|?tX>Ubw8oU?=}(C5o+zVd9oNCuw$vgBWs@w3^IRf8k<_n3CW#?D*kwOg;3w7S9~ zQHAl$(lk^coU0w}rfqgCQ&=PpY`ZB!P9>=NIrtw6i3lasF=Hr*U;1OcmqcaclryCq zA568GVlw4QveGaxbJnZUKEycn^TpCH=B9f-32o-S5u^}e3Dl@{79Cw{?X~6`EUtcX znGn}V-=f&|*3z~~gv|_y{_s1xyiuI$3rs;2>2ev zA;ERuQLfM4f23IiM!m0%6Z!A_(?7&`)AaHqNM$Xlw+e`LCA%qZi#@Hy4-z59Zk>>-e)d$f z!1h|H;w}{R*YwP-7M6(ZAIC#ipGgGg^KT+5gg8r#8PIHs+umVnFtyVg%Qy9DZ z6bufsn1Tk$mha3qr=_U+k&NA?_?;==r>Hbs;|v)N2N9>~M%bq^A}9}US4k`Fq1mFx zD@{N5{mQ|M$(XP?ZwRA^Z(~}yF1zLPkl52S6441T*8RnG>Jue;jz9ow2VLisH(;w5 zH@4<^qt?+`81_KKlw)eRCiaOex4PZfreMD5w7RN}0|^u!F4G5*aw_?5kU9X~mXQw6 zDaaMg4TNYB6zLYa?5*h$cx+L)V|^@9Pe^h$rcirietZh}eX#ac{&FQY_0(O=2d4>} znfc*pbo8W^2<$O7i!~E9}9KKK^4Eu_KePImkB0>F#B}!my(Rw z^k2Sck}aEn@$}9vHcU8o-iSQz0By9( zpphK`|7XohufgaKu`ttWy?y_d+B$FoKl8KKWe@1(d9CZ)D=;j_dyI$l70Uz!&cJ8os_k8N~kR|-GXd+W(xqFUmWfU}A zc2*{R%@K^#a~Pn*@P26BNF}>}8%f%d-~x#N3JT&CVK^41j{Gg@v~fi3;}=+-+PevQ zY8-ZinEhLk7CCZhq8}|fS6V9ON$zUQ#?dr`g@OWACqo6bj(*trW^q=FAPSFz_{SD2 zOt$M!b3x^^<y(LyWzWWE37!-6fYT>*SnzNV`84H~qxTVVN*tI9xmf<+I{w`6E z73NJmbhT<9RRy9n8ZzRa=dA`y01*|wa_O_?hwbfB@-Q{12HXl)D@)d$E*+K91y0BqG%Y;xGr{k~)coQx zQfiWCrMyNKX(_FUN0>>9?O8Y`0}iWVr)72N+5{csZJ~gw#|lDmf6a|87Lb6)srFfO z$F0M~x5Z`h!;yTQRnS~bs0WIP7*X2hV=TQHJ?)_UbsP3z0rk2{n%+SF$Zpvd~~<*V|%5AYA+gobASr9-^yd0d?mX=!M~gmXFV7jFNlXI)&* zfQItjsS`I*ECn_e7EEwe24cB7z`twOt(2qyG7)7ZrIO+A|B7Fs4?R;;tZZz~0H2!y zx2(l3=RT8$iD|9Y?8iU3k5E{Na47hI{<^d?Av6&k-S6$wmea%e0ApEa=Ph={Z5*_= z?~0sMt>XlkH~ZB|Nj%?Ch5~!?(eQ~+@UO0*8Ys~5hRO@l+2RLW;lz-wF0X1ko7sJG zdJ8*Z!p|q(;TRx9Dv)+!(a7oBT9{2SJMvnh#>|s+4%aisrHMy*yvn1C$ODmKw2-f zPl*J=jep^TJ8k%`^t?CF;!v?HoCuLb&psqD%B znFUY}PfozBo~aE&dt`+42WKZyw_nQ9kijuYpn^G+E)N&C%2G(%BqOv7CVibk^51|h zN|2Cu$bXl^w!b1LWK=GIm9-(&l;72IJphm4d^m5dA@5&{8d&#X*)WqpgY^HdNLfHd znA>v@eL;Z$wBU{IAmbW*h@)e*;vY%+Spe9m_zY=oY2o+2w%i#`P^Gg(q6MOTdR?81 z{P-SqUH-op{a`d7?>U6isxFh4vR2x~2S`Jh)2Slal}45}y^{8`YVqaO3O`jw8;g-z>^fvvr&Zm`I2 z8ZVdj+am5rN|nD>P+x3D@=&)+-8^Lv9;F zyRSJN$quIi-c)k69=tYyS7Ev(7!c?C`%o{ktIuYs?s^{>?EEVIV%IhK zVN|2j=TXZ>2?fPjw*-NKx= zEHV}d5N`XY<0)moBibn1FTjzLd%nX^BBm)90B*`7ZW z+r>3H|6{GrXm4y(T-sTe-dK2~QA0y`(7dbc?ZqxuQC+q4mOn0omaH_1J(s2Ps>P?C(h>>HWVPFhEQjLoMZf7vO*EN z&AQ>Mt?XqcVJx(uy5U0c)LpN5I6$_V#R*fuM@Y%s)uj`}d8OQ^HD0 zG2(g3bkDBe`!=Virvb?V7gA9LT1LbDcIkKF>rAcFKz9T1L)(4!`B?iu$77htn>U8DB8LJ@G7NxdpBAKos(o2YWRGNd{K&a3qIea zWEO?busyJ>C5`aEneXnxrTaD=5_;9nk2IC<7o4B=>3q6cv>Bss!V}=zCQ%Du1IBIV z>}FRowJG#QL=mIUIt+Qsk~hSjblacgKO*NG-KDfjR}i64$rQD@1&|l~DxyAZFEhyd z9n-YzYt?f3=DWRSR9WgCv4|(rc--t;QK|AB!||R9VSLcX!hG>|G7ajK0Tt z`2X(p+X=qK_Q+7|z)R0FI|OXp$V>+sSg>Fa&h_D8cOpsnrmV7lZHdO@tY$N9qSq#4 zQ|!6Peq47QeWpC{$aVg@nd=ebf1KMtw3xN=!s7qym%q4SR!as=L#pJbo$bu4dHCJ8 znmbMAGmDysFZ0aviP98B@iNedm|`WSYwpO&ZGMBC7K>{Nank!0Gm!k9r0 zZN?@dk{4^^fc#F|%6oRCei%=Gd-?XHl+hqh-+p!^zkQeUHci}Qj&#{yPQc!c(Q|YK z`{(sh7m43Pmc%+O%-bS?h|%n{XunwHVu;9AUfak z5XfcTQ-GkKgD!&#UxzTDuAf~?;%9cP4XGyKqM^AVFx3d(9i;0FKppgg$RIQfIBsyK z+;OG3DB$>D&jdvnB-fp|OM(pK82`KaqJCZNP0*Im_i_DzOqu?Alq1(!$h=UBcJ`UL z&2#Wve*{hvBp$;tGcoADej|Xq5yks>J7u z{NWVyjrlAV-p7@OyX$Z+gS(fp$rJS=s6P}{6bcKsJZ+DudD$a6WG@N$G{=%ljf_H|cxPLiZSlyFx9Cq<{p^wco8HTJ?s4-eNfhnR4qI*`Q`$#*}LXwG?`b)0V7#eTz$W)#ULIy%>G zb*!H9kCmC%35P9w*o%{PA)>0RHJcO)IN&h5S%iZ0%{bNdCyI5+r!T}YjD7cVH=hbv zjXK$y!VUlAD`RuP>yf3 zNVzW^d7%X8geH|kxmQRk!u;-hZIADyq>s>^cx8@)K+Jc28GGNzBgGjSl#Q`~t~Vaj zG8!Lsc!9S?tq^ve^JxO{KzZc+xGv9x&B0XCbT6px^Wc`&Ve-`bsn^Wu<{Dn8w~n@v zZ_Me9x=H;~NZh`TZ&sX2n5lF<_)}`h1JO~k8AQpCPjd`Wb>H%cvr250$zcbNvQ0J z@_ql=Ql%dw7q5=Yg8FU6#7nW&%UIC_eoyQ>r&{9<`O}rZnHM2PKJQ_%P~LT@4>x2K z0yS1r=14`mCx))H@$z1Z11h z2Wv0t*IGVi`rQEabvZGoqAZM8a&(q6g{}JL zNK=(D8S1> z`Qz-VyX#4JPy6@M}e%ka+(7?1yKDV(kL1aQ%KW`)5c>vC}YCgYv^kU3h zL~bNF3id4I!~B!}dGET#;$hpWv;$g^Kfix1hQqaQ8C;+&_6Wpghko!Y;gp6$;BB`H z8DTYyX1Di7R+T_*;TF3uB}gB%i)wYbNCZ_npTPoG4;IWm6zEkCCvk@!0SjP2IR?%L zVxH4Gw$Y9yB4l?2VL@;$pPo7rwlllz z3#7m}nyvj6tv|1-nwnr68T1&CIA?DrdKafB-ufMTTkYN80pwtTD-|qUc)} z9c}Z_^+bKLPLjehlW`SNyaV*Ux6sxNAbib+7hukY%;%=?|2}hr%e8?(TQe&iPDnTV zCMz0#62Vu3gE!s4@g1EFLRf+~5ZZ?`#~k4x8?RZqQi~O{@A4&RY_2h!c@$Lf@N}kQ zv71Ti`KpCkky8)}1vs?saVn^{*SQHp%*jpN9JCJ|C9!ub7~|?*jxL4j0>7%fUOfEr zIPc`cu~_ZoP4F>2{&KbtrOk3Xm5G@`@}uZmz@`CNNQwPx-qmYr=RQZ3cLDjr&dXAyO#A5=kWBZfqI1qg0d zc>i@J609dD*z*3NEU4m3LV@F|-9&Mj%l74@!MZ{y&S(;`2154O%C=96Zr1hsKf?ZRu4 z;IVJ1O8wOH;`YgMMYZdxsOgKDG_?>Wfh*zL9>x`kc5UCWhP}XBt*z-$fp#Dk6|1i>zLTx%E>s z|7<<}f|7w$G%2A|z(Pj1#u>SkUGr-5Lr3KGJ4AndamVw>fags%IvB4}9+^8v?6xRA zSHUneNUyEnqKST2HMWfg_4%QTZWd_TP#txO_?V1Jr>NCE9+>4=IojoPN;u=a>W4J!~| z)05BWgQ6d8$bV1FM;{khSESK>8LbQnSpIC+&7A5gUwZc!*^Hg@=MXoWd7+Qg3!X6` zB2=&P`!^^2#OgX88-oX5HoAxx5FdN9=WmiERT)pYY?k^{>DBM_j3I?}9&BS@Ys18| zo`|0cPD>zyL|N)x{A}PsgKfN{H}c0%gVZRtJ|4oG{sEN{*TRZeC>gArx8%HyHdPfp zxj({{P54O++`~rbp+P$jG9IrARk@q_>*xwlpj?K{=9>aXlAaSD=K-xNTnfbRl7Adf z8{i>i6Al+YN5Jnc)fY)##F^_@4k`;|_jV;DJ}-F;Ft=~U$E4%j&3~nltvg*7lWXKS znropiQYb_|N)rKnPd>5m9%EO*(`YJsZ0&3d0!nw=w z8z%i>aup$;Z*(P6PW`Sc4PIf;zA zFPFIm0n9W3^?Q0hO3X7{t$tlM1xZxtbv^lGkwwV%SK(V4=l^@B1LB`~JtxjuvRm10 zGK^phuX#5)7KCYdA`ahZWz66ytm>E)gf;_67Z-nqW+zTw7C2X z-a}jV{C8dy`|2IbrHNf_M-xfK0WSKY{2BB1l2vHhYeQ;?`BtWt5}qrS2ZHCzK?WeTKp5ws zf$s8DH_dJt2QM~AT%{#hV50cS6H3F68z{0c+90!T6+>=NHaP}b zzS@X9BabOp;D*8{YG;Q+X!@ab-v>i6SGcS!M*m4&VAPx##b1e4c>Qu9a9f7owt zikffhG#oBo{F%v`eC63;5g!Or%v43nKAomRv51Iq3UKwdVzmiejc+k2r0hT8(eDya z%3pDR?kNdTTkqiZ;8Q9$`E2$JgNJompj5(!^*#$t(tGnXUafcbfJap+h(c=`wuw_Ty5#QN187J-b>(=+OUORb%>zr zo-K9m8)aEEKi7D@(zPDr!;d`uBiXNQ@Y$bD1cUCv0b0(nziwS{<)UtlGrX(?oX49%IAOqzc90} zdKyG8E{)>9GTJKtwtQ)nx0nHUw9)9HCsOz@AJ!iJS^Jt%>$k$JD}!g_R+rJ}Sw)2_ z?=cZg7uNh|{GpdLBw@z)^D0yRxoSfjdQHQb$BC6QEt#q7i`&r!rQVN^pXJx&$hA?4 z9R3eoZxz*M)U9nlAz0DkZiV7jptx)C7I$}d*HT=IySuwffFi|G4!z=;!iqStVKMB&n4XawB{W$uHfY8u3wO(-NW}c7nQsX_lRA5v(9Hl2TjhPSEo9)FJZSl_{mbV zx9JAm`zW#&?pLPWI-XzlUwQ2D>!U#a^2f^8&*!~d{J_pm+V8n17UknVHK=J<Bj55cZ?3hGBNnWKyJugy1hP`}CQL0U8JI za)vyAWefq$GbV!+a{-@2u=IHu!y9S5wUFuTeNn;{rMsR#h4z`W$X1Df*50G&x8j}s zcUw7hVzHO3t6lfV3%Om~SrpW8`I89}`n1w&{$l$P-G zSFa1412>fm)|kN01l7+xbA0_iq((d^G3h3M8pb@dB)(oMH>}6~DC*|KN_%c6!m5nn zt*0IK8K?@wP0!s><+K%##9jS)-I_j_ll4B~!`4(qB>i-cR{p%%MkuauEF34y4n$WN z9v}2x*wM5M~&0-vTxYZHume9Oy>g~)4)GwTqC?Kk{-eyK|az?~zVBUl8yW5xOkP^%mnvO;Gv z2C5Bv$pKJLUHP%nB)2=yN_xFPsI4AIug2`n+?jnhB2&kV2zJ(z&g-hf)2K7UIyPew(SZ%w5P__6?k0e`S>@& z1TyK9fZx%QnsIj*b(Kf~8TUy~eP=;<0Vq5+{A$g=Lp@@y`i`-))lL(E99Cf0)Tno0 zuq6L*f8}*W!5*@tag-8l>%ikn1o()GD-4a}2$oBZ?9+BTt=36@;ERgvh7WjeMYt4W zE5hI7cHa`mPceMOg7v2P(z6*fu1MyqQ?@{5atY)1ymzOAJIt+LYIwfBVYo#a(&6wW zWH4ZfIz2OGXipb;k|rS6+|s$ZriqyIU*PDRSNw&{)_VfIG1E|dfFsdk()HGEwt#W; z3wMv(+Kn8q!=>ZzlSp>+757}9PhL#$jVWc#=> zha76rEzn?haElmz{Q4g`wS0JIUF%(=w17#%B%_W$Clav!!ZUeuy!hMM|H37rYHWy! z%=t7YVF4L1#2lJ>o*(m@w@Uek41C`%41O#EMP#MG{GEMYlk&NS+r&-7gcFHFD;93n zN=tRo>+zU^P~r}+`e~>Ho7VeW>vk-!cbcay^Ut109L)dH0$2~92Ua%D(74(E#ugR- z`&;+Qq5fNN8`U;G<-B$U&AmP8)+8?Q{?WsM*q`;`s#)rc+Ijd}2Iq3QS&n ze$xBzm4*95nsXf=xjlb(ds1yTnZt}dbN3YTZ$2qOgQ!7Lw8(3Mzxi4u0~NsEoz=ZP z@~c-_`^nb#O-Xcc{3@j;8T7@#+O47S%&N%yMMAHO1`cFr_SJ|Q(o|`Rgm**6J^Q0q zu=_56O80usi5xK51-#e(!0miKKLe0m-r_>g2QM>3Px4Ly7+*=>(oE$bi-eaa5zv;|6h34&rXgB zszd6h{S%jw_XhPEBPM1FS;~~s>Qmn+zClLwEg9X3Tf{%vTolKR8DY5Ks0!z&16rwB z*_397EvFE1*dD!%Zf!pc&^24YI~#;%FmMYRt@w@V4rU6(nya7l0*M72Ks?C-R+wZ^ zk7}~auM}088C?K)T85o+2DHgBSP|+OxD?`zdL(lu&I$FEw-3K+XSLrwee6a8NrFj} z^tC~T0j}eub}-+Bk75(+8HJsqRS~cQnT0WcT+?x@rkH)%dm@W>9snf(Yz1C5sV0EE z)SXi}wyIXtU7ZmD17t=xr@*GBx$x>nPlh=3YcLR%@PYF4TZ-b57S4TI4^gm3x7egRXar01hk;Ln$Jmw+NPXWnD|^FU-%> zgz~fE@*Fkc)EGx4gSf_cpXuMjF=C6r7KYVcPcWN4`S>#;NA}eR&>lYM>gjbrO?&qE z_r_|SbIb$U!MfHH729?6hgY$}jI0_sZ!2W`;IXe*!gugUbKeuei@CQvlD6{}q?ES1 zeeXBk=OS0aJgXoi0Okho9fKp9+Mjt$^?R!1XBjm zKs*hMZ7JiC69F}?D8Bz~jQ$^!RZ(Oyo5i~=#gevz25Z^G;q2_U^`99%$1zP@O&5(|DQb9HhC$Vxv6h=r+D9_^@6uQVa%Z|bPLTu^eL;fK zbl+bWo@Ij6I*L-o>a{#6ZMBDl;S73D9r3Y(s8<$Q6*|p{pdKs+EBXTAgolf1n^ z_>IXaiw8ye{%RE;>2lV#Hj{MQ;f0QvdSDH}1~_JTNM)S6@WLW9gYRy;mI;E!5BO13=HZ%C8*#TF8Mmu+T}fF0C^bD(WC!W)f}Jp z`D|Sd*cs`YsWyz3Q9;IsW8B)<#n_)WI@Sg$qYpgI#t6X-0{ww25W8HWbI^OlMUW5E zVO73b&-5`*y15K*EjGaV?jCj@?1Rh*&pf;9heN-_`~QTCzLlWG+Oy6a|BRh{^*w?$ zu+|Zy-h&kDA7LoY=YLQ_qCsCB7*DhhkY~1J8mHEk;&%6TTR7|fESTqY8d`n(jv4H! zoiiEg<-Mex!KOO2kIk%(gI86Sh&)nmgrAxGdCsZmf~OIwR$H%iBY(joyX9gKRAqH8TWTO1VN*7jJIYyr0>RGkOU@LkK9>}(n zwPds{teaT~v8IAsvPzjmA^AV^J*$kRv5C0we}8`*Qo=nWc@|YG{Bq-9&oiy+)^Rx> zCCo|^_%sFGn+Mwee62nu^IPjtPf-iuRW(+IY|fP^)u4M?%R1xQaysuo-7>kOP{c?= zWCW=a5|JxR=P?N&CUu1LoBr=8oyBPHJ16=0}t7|2pZ1l9P8*pmLAl)Z)k}xhTu+J!}=rtfKGF zHaz@MQTxTbZtNct!U59A0U4C+4uNr6niJ6SnCVR1aKw={8(i8KG}g34ZqxB(=RFP2 zx8!<3lj%4a2^HBBd*@391j}Hc5D@!} zL71*CUA%0*qfj_sUSkMhP15pW12*KwFYU|A7FnRh)*C1XJjE?}GZ8EZpt1(Ue2#rX ztyF%OP6E7R1<``X0?QYoEZFg^ZWzY13wGpqP&45rK~V4=q@y`vn6Ku4pP2s~oX+4y z#dX$E36|H%1Mfa-??kbSWuN-J(bxs~TQV>58=O~sssBZdc#n7{=vB3xPIsB}ZrcWp z2nhWRK>qF5ni`cga$bb0&?9Itx+OHZol$bEjCt=p(~&rly}G_mC*;RNqh$Tln7nVg zUOgdZQhK5EEHk0nKl`b<6U4~WlnGD*f05)AXhiDx2pA}y868O{oI zhDsI$3zk%igSB>DqvrIKq~j^-h|4T>Pmw|GxB}@s)4}CpbC~eEM>#)NTexBBx~@sA zTV~ty7lBk3xNnYi&5!t6w2VLdzf2Yvhm?^P&Y#NOD!4ItU!K{wWEUkSk3tT)%pNM$ zaWC0--q13;wAqF0G=}XlFd~VX`Ybx^z7)j|M!3u+W6Rv6+5PhR1EYo?Qrjsm()8&c z4Ad=t5;VoDL&^M0G%r^V)Y!>i^5Z*bGD3u=_=*M0VQOOw`H@>$SiGTaW??WU?6|~7 zDxVJ*W!{kAOMx$JX8X<6ScU8qu!EF9ka8K~kTPwZiqem=kbQKdm<3roEN|wQF_{{2 zz=NRJKn7vXT!6Z(khlt&!TH1KtHjL%1OP8}J)7yrihxz)64vfY*xPf)D{9c@#=?sctWPs|g9QWLIVk zY=XRpKy`W;kh0~7X;0M-NvM71{}sXYusA%hu>v$wU0F`wHOynB4jAR`+ROi6pP7Q; z%#d|Y>9I$1`(|JUXEeo_Qy7vVQX#}APKa~J%Vsa*C-dQ zi1!-*0fNvfajx{OC5Goe^V5;v;U8MYxj`3}Uli$8bE2wQUJZKVc8cIn6j8r_uXlL5 zypEwTMrc}crV5Tez7~ax`iVtCFjG5E`&)sfuzk6uLD*#inGRe0L4t>4W_nOEMh+Js z1J758%9&hl=%zZu%XQo=Mnvh_>_7FD zM8A13F+}x2x|_l&RDTXmVp*jHQ;f*d7Iq$2^3h(BveN=Oe6lnVej}4r>k^iRqA{=n zm8&k>?hG0Lwhb1SY%n(uUwbh}wAI|icZ%JTkMmm8GqZmra~K%pMwsXPv>L_N4!-1x zLRxn*u$m?Z0+`QFWZ%kRrcOb>0`Sh^4B+`ItB^cxsa4=Tt3I<;?0WG5WV07FHMlQx zIyILca$jMf+78<}L71`EE5tQ056Q!L^+}}2`bI!^Pej1 zl+o%*OfP=)g8c&L1(F3AHnIoZ=v#UZU`p+yI40qMO$hUbx-j~p9{T~$6cRoxKUfq* z1Hp*{mo$~Cv!@G4UCE_^|H5Jh?gV^xc!v*n?NN3yq9gfvE9`%=pUw}Zcb8StKe!Mb zdH(NX2aWWbNnf)GGE&bq9#|+SZaqcd$eCn(qo{CGrHqDT#hO4d(zkhv}kgZ+5h{-&3n|(8uh?t1yxI zBuw*;K=3l#<-ZTv0FZzTlncNZ>8&3KGJj?)w*QJl5ht=IAYX%4Xr79NpGaOu##@o| zt?34v5!u=rY@7beml$8Oa>jG4Q=!w!dOux*R)|Vc61P_83Dr@}FX_A;{lHktG+1hv z!#QeJ>JAedw@dK++ikn(dPj~m(<+@~=Eah`-#R0SeHCZC7aT>nXJXpZ7i)NcmKJ&8 zv|3ecK#jkREUVBVb9$jn>sm5ssL4Xtm|H@WaFD-$SwOsn@z9Un^v~K-HwEdhiZaWT zqb3smU%SE1w3ZSQa8S@nMSpj_Q#eWpZ^&md_gd$F{l+-+ln8DOm3HsPr$1RQ1hkGu zXJ8*-Ov*0au7;$mxsEjS@USUon{ucEx)-cG@^*`kaY57APFD)q0*$D~U(e`40U;u) zn{3t#q`Cz4L(-Gt*b>NF1(ya#(;e=ndf~<`qzXtPa`9}L&72GyXy$t95ks86YxybLU zc<(7TkN|v?N_D;mDC95|_8ID5c@@|!{9Q}ZmK(ki!4F`z%=(DGkL;5_U4;fFZkniQ z>q)nrle0J=vW8x69tO-)c=ssyu&r|`47d(Jw-Q|1ElfMiKmZxAp$Ngl&)<~=l0m(~ z;=Z1FycPf=2%Jmv6+Tm478N82F9^2{p@=>qMghJo7(q{bFg+)<|BmkPg{jYre+qB^ z9Y1gjW(cenZk9o;9@m*NFh|WUFf6u#;RKw>(OO|EXEa7l0^s=P`n3NicOyiU_{OoS z^qu+r8bTP5{-2)7GQ`;N#uY`JrN<-k-y!EsFY4wvC6^=Nt}U_qrXhSA`o~Uzsf{Lm zf?D^f{&C}EIEJ-#Z>@B;>t)iq<`Nh{VsBOFkN-jeA$!`VtG3qi&Rx8`=H&1`__KMt4`#jJ#NfhDi7{N(R&h~|kzTctR>s-a4SjW$ty~lrz``I9vHXa!^ zX};odd%8o5*Xi<(DMn%3{O(4Nukcs*>T^c?+0#m zY5Y%X;K~tMx~_CoiwF^H*7_5+M)f_UK__3v2{>fK3qkH$tSIN>}7*(Z-r5=fl(^o-o-Dw6hu*Q@KzpH7SG zL995^{wF*fCPRI5PsiYDxR1$wzb2Yw?a{Y#gOOhmD_W%6ix%9UA8MY;HP=rpeYfU= z7%_(Ok81NW6RvuAHR17pknla2jKv>>wh)qajV{;t|Hbt%q0(FLzQ1cT z_+tz*4@@-D`1DG1S_aPv(Mb)U<-_a5i(R4uk=s5E#vVy9d^;M@CfmRIb*Cn??5)#>oMa>LjT zOeBoFz8myH*H7-=6|Py*u~!7_{ANDNel*1@)X6443inio{+UisR)-3_nCaNFyaY*` zE|-6l$Wu(9m-P1MQ@(Up5**`=Ih@PBFyc23;imlbZEeANHqq1Fh>eOZoRx{T!RiVT z&QeLGxKe-pLLUi+QdZo_($B-m4J09rY0-foAtTHX7gq2Vos3XHs5#afv->yj7y)EFx@ zTrl&)sXTz}Dp%*Jn_ZJ1P}1zLP8WGaKl4DzY;eM4iOaEFGE@-i>Ws5SU~fAVa(0Sh z>h$8(C^kPSv@HFINO?-;vs-I)IW#m*DwMbq^_E-ZXa+?NVcvZ0MjG)+)Rv;(S{xr+ z4qk#wBm24=rs*x!I@3c&y-2ymU$L2m`KL;LA$5co`J-lP370Nw#%AISqW z8Hn7Z>WGSu_zW)x4WT?6#18LMQBdGM1cU;pjR-EP|72|s|5J#sB3HtP0H6T;b6BK+ za$n?hTO7D37*g<`0D>v9VVL6pJ*#|0Qx7fgjh^%>Eo3?15quY*13~^eTjJ~swe02q zAnhTDxjhJGKz{%^IRZWq*61fxy!)Rfnd}Cm18yyBH|V&D+nA0ZeU?76TOsI4F}5XQ}psuAiB(5Cgc3QB3iDUR*6AKfJZl7ZgR`MO8cn#?tvfHRpHM)mUycs08s_Ox zBUsOXJQsNNOG1;~;qgv_;bT>a4er zcNL`bf6-JpOzfxH*Yb$G>^cg{VEea?R~AC>A;ViURR#JoJK>ZHw&`zV8P=bzW=gvB z3qCh(>87*&SO^K*YPf7&-;Wm((mhwMmvJx3W$bK0sB~`YupX%??#awq%f{8w<38Gx z8E;wYvz97uG~dnCdAfKk4LJsRWF_t^EPMGiOm=`4}lA1@LEXIfZ6 zg-xp~Vq}uX&6s%c!T%^L@|DUhLcqJqw&AlnLoQBIVC9VQk~i ztQt@Bw(wJ#PkB}>Nogt1KkE}Ax3`O;v9zJlewi&^e?5K8pUVvYv&Z)9tp@=F|J{lB z#NyEVvlW6=rEh5?6S2?M2~`u9+1WXZ?-EG$SC8$xJiT&4QY#+f)*d{i9)^QwdTI?3 zv*FGjZc@a2B47;So8L03cCGj1h?^|PRpYVUov+HHV$uBnsF^ZjfG4hxK~QgH5owUnk?1?;t(VDTg@cw&>zkIY zAW@0)+n$w5vT5%>DvD_GNm3_QJP%*geEYxpT=#|Fc2i2mrU?W~=aE}n%q#cm08vj1 z%}lh94&To75<)!>r*L?9EWf$G)NmRwXzC{PIb9Cr_?ndRhlb3Y=VXO|6Ip$7?$%>X zu*K4ytp_L`B7O#iqUMYhNTf{$b@w>k$|dYf0>Ke98L!Vm7xzDd5ERW+_sh~vfQtg2`pi9%jET%H-L#RBu)jU9SSjeo0^XH0^hjF;6Mo*W zYBoFU)V6B)$>gWk?t#2HH^kk|$dPdL|IU)oKhQT?^PuXHA@D^^Ab0Pslk?R(kxjma zx7kWA{4`!URM{5lcC|dwC$L-|fHxXO^1FY45j^1XoeJoMYAt7`20~BQ_D#%WM_buR0fRp12l1~+{A1ppfDj10zuN9GTrhTnA zH*XCGf&@XL1x>)%1rU9`=>wrQ*Tcqud;tM+vQ~J0gl@PP5PPFVJYFr>0o*Fg>41M< zOd(JY76rUF2sWnTqObzx1Hj}6qyWc;b)?l90FVRH;vyv4TOyQl(j8U)SaBS(UkFClr&O(6pBM`x;E z?2pL8^IHzM9H2}ZB|}OULkZgy;(OEUhVhx(kS@MJya6{s2BG1Qbq!Tex|5fJ|KJ23g3rIClhS$2bo;UaQ zhkk;NiK-U9SNQ)8g0T&2mKB4wVCr~|=m4^}WZv{ch293-=c6MC(W^9-Cv`Hbysdht zDCy=lyAKcRGdH<6(cj=qGxWm%T~*l%(^mv30b5Iq+zjs#jy|z9fyO8xx@Aa*!{c;4 z#O6ls-uLjAY_Qyo-6NGU z(q-qXZPE7cAK?dac8Z+Zj+sVeJqQSG(0iB~7^`1d1>@HL)xiB&08sTRCSTCeeE+uM zZDD&pw^{ytnqhi*#?lXDZK;<|>@7`tdp2_(bf3%l#N;Z4mB);N$MoVDxLtWh)-tXA zx5KpX_>}!wVI1?V-QdE`56t{J9&?k3MM0_Dqj$d(9-$SbX2L#swrB+{-|&vnyiI40 zRj0z9O5FuArzH)HSIc@K!qH1uK-ObM*JY_#hNJs!T+%7c~@ z!nuWL>~!EfL?B=V5@jC1ZlzBd=+v~>Z+tYNVeikyS@^`_+!a%W&=9!vIkuG}FInqA zY3PCyf)P!?1(uLwQhCh#Z?YkWGj5d_U^{AY8p+w6Psej~w?n zvMxylrr7-a9Qw(sd;FXoUUzx?&f?(Hg*}M!wv7mW`gWpzRdS5FQL#2DVo_wvqOxA#AV7=*!Bl z$fMV?(k;!53@Pnzzy2iL6?q(y4R#&DSoC%D(n3(VXCXQy0p;p<8ZcG*-=b{$Doh!t z2s~iLdk&I2!E$g$8O0Mc|NXe9!m6v8rTjF( z2D0}T%QwWFxfKP`6xz(p^2h>?Y;0J$nr>@Xdr@qk|B2|t;d&1VZAfDjSt&gQ20gji zTaN^BziGN`+{6`5-+j4Mum}v}7GzL-h-NJYv5wYsy+4%W-Wkhk8^c3_1AKwx(3 zyezE6B5KLjOgN@R8OKCH}ko(ORMEv-XW zQ&>V@b@j$^emYis$<}_7%jT@|zni$l-GzU@{R=_wV#SNI_}o=v<(K(#MAMenB4LTVoTdcxo~y{V_dmr>Y^_(^_ME77Kx&gK!oE^&5_wcTHPV*6*ISgl;+c5J*D&<&)^R^rHPV@+9f&5USTdlEbM1G1HYo_*-!0eU>o+tv4E&y9 z$ri$?Yp3R-i(8q=yHHUTNX5l@e&6D^@UMcA{L>Mi;Tm3d_C!*Q@3mA<#>EY9GpBMf z+x|vaiO|H$klUYl3;|-fw10nF3s#cfSX|ed8WKigCvF6Osy*cD44(3@o;_Dy>fi?47)iTO)NByk#-6%kIffTX4a(HaD z3n$C%)R1lJ9q=&*G3u19%HLf)bjcgt0?T_qzpx5O|KmB>DQ)~Wi=wbV&a==k5; z$;*pjw>Z%o2y&xQXSVs{^!8CE$U;c;=0nLb!ktsxU>0JlJ)cuAjqcMq^Qw~enx&!2 z*UG0_$DqpCoN>EU#1Qz&#iz(g^ZRxvpL7^bLi2ZQ&1>h?aZveBBksvmE_SrEEwvj%{^AO;iT)FEp;Hn8VMWE7&d$tVfz%vi#Au?V7-&siZpq7G@vUUSVUiGlOBh zytjasZJRqYZ-t0dyo1!0MnDvs-+7tX0@46mvf~qlamhW@SMs?O|LfvS24|kJ^u`U? zydIVb zSR#GL{qq8t4}sL3Jgw7VSL>44f(e*6iK+xtTmHaZ>}9j+IV?ql;NWk9OXgT z-|UE3dtJl79eZg*t97sFw6!dPbI99Zl1`vPs39fw$+B5>YR{a0!*k>l|A#u8X6+K@ zR*YgloiW#g<(P?X(lq(J{FB;n@Jb2XlfLwCMuVj`)EsViZSRSCX`1ZMXDi#L#}ivU zZ?-YU3X1~*cgR&=bjG5+=%gXX%m865hWKwk_B_a8lAE( z-^^=4G!en1BH3!Q`mpQqRbU!{MCjTxm*GgCA2>^~G;v%MLEqk+_cZp(M~1!?Uu^(+K$K0~TIbW)>zn+v-_wRr^)J>h9SRQO1)vcBKg#^;R+W(H$Lnt@! zu6nL#eSWOd|F6a} zUQV~uyzZIx^Cu;P=~ZRX^Tvm&bc{88UKY=2rg1p4#k8~+R!D-Lr^5CUwf{?y*bRd; z(ljxVXEw9SVNJ`-()b|-Y@ap~YL=b8{DoVo>bTs_aRIGGaNF-#3#AuPXMXP`|d5+I(_eTSarkhFVXgnJ0*E^uK zYl{1;IMLe;zp1)}8E-%_p2ct1e^b!2N+Gh(ZsN4+T!nok>%ufd3$GHS}Ef9K|m8#!RYrpbsK)a&um1RX}B z4(~Vxi&8*ya;G-A`A%MX(a69fj+_0MZ%vVq>*d24#;(AM*F%*G6!DmU7CmnyZQ2oc zS@r%YlRwWMuU3NMC5m9+fOcB;#}U&tnubmJ(|0fFMnhacE{FY!}S*-sMX~N5#eHx<3 zOzM})j1W-&onJNUa{4=CU>N*}(ww44uECIj~r32((&p7#N9YdQY#eGLG!bey>k+9KfA>%jt}{Yt8F~-@8u$Y`BP_ z@K2)+t1l5)cEPJPKV=u49}}xH6Z?@DNWwwn-7fmu?k3Me&V0HnnELBvCMdit9#>&R zOFl&-(SsM>E)e@5>YH`n9j~(DhdwMR@sckgjYoFhYemmB266d6^;E(b5iM|f8Tt8e z00oO(6@sty+D(_F=p*+h8;5!Q9a)P+A-m#>zK@9$5hec2ueT;J0b6+wettO}mP`S~ z&Xfht-%RGc_zfYnFeV$6dBc_5U=O7fU&G7qE$zuewCp)eK?$x~fH0*9iRS=Pn6F7vdp zYJOLvrj?Oln9Cm>M?%%}oXT(N5G@owh&Gwn%t25{^ZekVk2}!t;&CE{`>sSV%Y#Z9 zP1x^seQb44DC^Vc>%^&-K53Qbh)SLT55XYIXG_ z{Rb)gGtTKSa3I~tz4=Xgq!Av%YvZodcjDH&jnt9_E?@7CPv843={lbN(yd9Bg{IUj zW3mKgwvVl!_B59?+xOXVxy7`6Ti)53xs_A}xLW&& zPmekF26WOKGu&3kt@pp>a zj7>S6_?p&tm-f!?5ve&?pm|8B^&DKtri(Yl8l|_ za@eU$Qo*dH=L~dqjp@IkpB~8EuUriEqLsA$OQ$>DV2fjK(`FB*lBYrvZRYz6P;s2E zJ6IG<)FTwp*xE*%c2I`OWX(Euva~w3GfLTzAis$wn~9g7FT`tPtsJvJsm-`#8mwNqyg*1GuicbE?20bp$K^qMg@qhPJCMcXvM<9lids1dvcBipPvDxkc{%?7FvN zSKTt4|FPyJ!dkS$zz=ma3}GxS{^&SChP3(c;u)Pew)-U*#JnvqV!1-14hB3+HmhSnFd;e zN;(FyDs=@8QI>Z%PfZ>NoS6C?y05;rr1+b;T;GSt*CnV4*rZoiPtfoGyHAYowY7=< znSw`@va?iI;JE!_6tS|l#{Z;Q9u}XmqTzgHtYpga<8u;G^W9@^Y(`hOpcr$CXlT-; zQDCvWZB?qKNSRagr>_J9AB&u-vjwA(%9E1l`sc0xIMtM^H0z{Rh>5M7E5Y^=1>`$# z_0jj2?k7^2rhLm9V_FPo?2d}j{X1^*N&U^Y(5HF`Lqf+h?)9fVx;ulWkDGwvBW+kj z!^6lVN5ZJt)MmIezLBLZMX2N^Wzd9oVq8qG0yE0;`qW(sRt6s&V`h^8kd?VI9Zq9B z3c?i|u89zlZ)l^Is4MeSQMdNy>^nSBQFb`qwWF48C&aJd2s2zBfNZn;Wz_q&=_{~I zi>a8^sR8d+glZ}BO)uKnMMv!{n}6>1Vjd@B($w15KL2V%)Z*2GEa#_}$leIhwl@Vu z1B;|2v}z5Y5AO2FyYU2&Kmi`wry-Zmpaw9Vb`+ziF{z{i zbzI1gLJuC0OZUI9+aAf*ZK6|FSh{m`&aFpVkEx#RAf(oB{n=@eX04AvpKDbi2HmN+ z6bz-4SD3)*pYMwh{pL$VLD+?+lJb7fpRZ$D@Bs8CkW)9+)`8rYV=+;*M2@L&r>8qs z)+)BZ#;{O#o%^O_&O=0p+BZKhh7Si?05=d~OJ;qi<$V6ODW#{|v9Ht4fXnCIjvukr zCpx=_+`+ox2A#S_4iQn4p-q@BTI;qlrqulHkPdCKZJ9WqndHC**$GV?YEX8E>+CEAc>}+k0ZRT{Ii}xAVfgp_>n_jGN$i{x&M!lv{MSo z8G=%MHXYVn#GqS5;IDRC(4&hPE>4AsL2%s>dB2(bdSs1yFXq_cMf{nMdxvQ+;cz=&7q9hMM`h9qqxYloci18ZzGczJT)pjx z%8aZ3i~lP8BRN{tuM`e`wVL;#>k^-sIPodcq@|urF9c#3o$f>ETCp-=5vWX=SOR`tOo2#4(rE0U$XQ)f+2q7{hIuaQ)jz#| z$X_H)Dm~|I)M+}LoMX88KHo%JztVatxK*nh++vGAAM2!BS{l>iC2=>hmF(E6o%Ga| zOTueLb?1r7s#fD}7Z=YvSq>w&lM0x-_5_|vDH+M|pL zHpOVFgMagqLJCezTexHfno4Zyjp|Z!EEa zQ(JBm!H#4NSVrc`sY0**|LjyGcwyfXqJA(gIcrZvyRF-KXA&%x)7Y@kKr7=7YFsQlTZD@oymf5h zD$_&BarU$_pFI^)DsGE@r%PC1c9>}u#DVdy#Jk6CcX}@z4p%`fz|ril^I(A9%gWP7 zs`4DORG(d?e~pTgXzbieantt1++5Es;S4NUX<3O-a>?D|+hcHs4+a2Po(KUQmHB7o zlxGJ@76k*cZ$TP<8Xh|TVc@u*^u5g0x-jc)xK2#gho}c{AYru^W8`OugxbSO z7jQdrfK%BQy8Pnp;51Ja^C!-+AxkYZV>1Pc4sC01$r0yzitoZxVgG+S02WC+f*+DL zcQ!0eXMbxFI&hTtm~+Wq@gwjZG%Ql3=l3?G9^GwWJHICR_Pi0K=+djQl7v<-vu?|~ zKYS+lxS!$L)ADWm|L8iaxTxE%3;$<^PNf?Z>6DZP1*E%M8tHBrLb|)VyE{e@=?3W% zr4gi?Z=UCU-^1^8j^;OT-}l~ot$i)egM@*n+^4EzV_7jgHvxL*Z#QB&w1m_l(GJ_D zhCLL9hDYmn`832!?_KH+cYi6x-Mpb55LD(_EdE%+kew380@3+ZLN))C72z%7>wo8n z$uul(!~owAl#yNoX&2332-qCqS?+kLc3GtTEy6u(#qX zo2?Qw`QX9D)2+rBpK;@Nb$OFPXEn5;m%?m+veqAHu2sG4TcE=_=5k$Mg4;M>HK_LH z(U;okYu9j(CWSRTaO{PLk)z>n@ocpE+La9gR8y z4|8jZl#5?D4a;RbD>|$;AVTg}9$y%2Z6Eh-<^EOF=)ATT#{OTE*u^q;g=|U)MHigw zh?0gePys|2CEl~|NR~HJVAwE{P0r2MjKKwf=`)2RkYkm4$K&*JukeZ+)nSJJ5l5@3@EeVqaDPBn6Xk&VsXOBtb0K(hFb?`V{})1H?qq1bIoPSwJ_^XP^d zgt6R)O}xs;^h+m1h4s4}3g)3m@v{={XaZem1QC$dVBt<3jE)q0p0bYRZJ_p*y@e~m zbcfIk4THZ^K#18xgBdh#sv-yEQAK#&v~ApcrQBIV@kmH!Gf&srm*zwtaOIUGx@2!z z7@DR;V@3XUCTq(4fyr4zOfpBSv*rbgJ?{T!^}h_^Pw>XL?GG1Tvy@<5o!ZPJS?%_Z0>mq3NvRNOm&fH?1x9?>Z^8b2{^mW; z;xqp3Mo!b8A!tt(n8q|8dC;MO4Ry_BFWq#QA*tHFMcL(`ym?y)qOzAH|MC8&B^|ng z$%13d<`zTmFO;BMR<9F^$H&0UF#T&dNyAVx_0oj(DOJceaFR-PEnlcDJk=gP*w2#_ z!%NOlxOP7`g&UVB4#KLoZ#4S7-ldH?sdAFMAT(q~R-S92G!oYJbgDe}HK22vY8#PDOYG zS0!?Izjvh%$4jQEY&`u^pkD2!GNz2(`qYehrQ$o!!rG!_=F_GQ!eBKaxuPj5hT1 zr>Z)N*@xlF-d!A7iwJ`b|N{LY7ohQc0RbxKZE(ujA2KfsiQjIrGVQ zJf4PGbbNM61s7AfAhfFYe%D?X5A)q!{LZqW9m)?(puZ#A%CnD_3Y8x6 z1`cOJ-OEckKwO2401PnXmbtM?K3C*8BqIrFp$sVb~FdcylFB8v4Ie zDGGNWgx#f!dZcbTyn_dB*>~V0ydN#ldF(ozugp!}Et|Db2X^%Dl_ZM^!HB)qzB-0m{xup=Zh;TJwPDj6I4Q|z^mKO%|bv(vXiV5)ojyIPxK!A$d z^{a03CN@EaUj~f*!xkfT755k!99(Zbl?UK%JUV2a?9pbjj-x|AByO_}SNC+5=r$+1 z^m&uB)PBKt<)GHw?nV0yj-w8v$?q(^zN_sZk(kkjGX$hHeOr~p?NMSE10R_jUqW{U zkjTfPYKKIz-5vbMPgV;V6%u_4Vo5r2ik!zQi=!kQN+T57F2sA~M;7VRQ(|ivMUMH( zI1x0_-M3WfZrK-(3kUO=aJ}e94`@PZFlicTXN@%9*n8455?L!%e68b7W<5PGmzX3` zV(FUv4-5<-nr$6UYmfe6w|aUU9BvwiiRm+9zIGrImkg2-6zz6`2j zlIU`S1Mkg#4M+%D6^Ok{2&<#JJ)qIOIJYdsVe2Bno|ang4^vCgU8twXzC0^93GK~z zza)6ML5`Os(Hq}i)jXYOCn|jn*j!k>lLwiwd*{yZ%~EUrzuax+4IT-3}NF-fuOdlR=kNyl=o_4=ap9m9C2q`s0k?nm!z+cMLntzMR z0xeKr0#AG?{`XU-xx_B|*i(b^88&KL==FhooxO=aFNg=0>zW%^p`2d&>XL}Pu4OwH z&<`-(d~1Z@0_s6RfGGelO&G5z@e70`uztWa6mv=yo{_qVvWn~kVl%>20^2OScBQTZ z(<4410v^C26vrr)@#IspWQ|ApQiiH72gPNG4b|MPrhQyWdqvZj#=LCd?tA^mWAGxl zToe_Cqz9FNa-Y6}-u>uc>_H>|R0HJgmxcd(Um;Xa&(4}O4pXy(xAcR?%@6nYlasq~ zy3qnof2f)tckb-k3rP6P`W#ru7!djG$oP=QY78tb(U_eLX*sIriK7)9-D}3I)_NLT zZ8LqOFf6Y|P@Z}8PxzcQE|2|S`|(nHx95iXX$ zpay_IWf@yphe-PnuUv&{BtieFMiU$#K694|wXxlz`h6(8a} zbIF!XJ$8CvZM$XR)$>1lIq{{RztBx)ayVZieLwan|CIZ7t$ki!qwa{jgD>;#E6zG@ z;G)X=>Sz|Lsf~-9OZX!gMsGe@tJ&Qe^YmLYGIHUh6h)h0;xS}-UAO7yO=P{BM{$6F z+w*a~*~83$CZ#QQ8Hw4rzCy>NeP7WRU%js)KCf9)pO1}bp)js#o3FO zvAdI$cHK6YC&eGj-?t=xxMvmi{T)i+bC6n{?bKPrP>6{{vrzM~>hcV}JYGxgt)%Sg zIQ?DE^=pbG**kJBr`y2V)-zT_A&U+LvW5EhXtrN+7znYDFgFCf`d>}llA@w{Prk+F zm!giz=}`60`|V}b$Db$u+BK`ow)gKmvL^vWk4aYXmhN**kyrrI*>V_vJ0sGG1ZEbH+Yk6}pSVld6PW*R$rf_c=Mg^Iz^>GP^A= zD&G@S7@*1T{y5k?(&A-<+>Hpz=EJTZ2Bs!d2kny6mcK4dH`N6ii*)#%)F+aTUa4)Z zpx>{%eV$EAJbx#uA*a}l>O;L}F~1(;J9vI*yxM-g#RlFvX)i-MHxBgTOEjf;HAYC#c{jYK_A52AV+wMZ;HFwW~rwzW^#_l)?rTLcRGO!fG4`0-ALtK@OV%faXZL+wPR_4<&q=7_W;JGq0uWq6gLb(2b2L|FQRAxihwr& zGlhTNrB+t)&V^nSgbsxO&;U3sryvTrv!JOslYTg&|HBuY8^TD5UITlVK3W4~fr)vQ zd(Q(!qOKP>s z^Yc@jEhew}NpQ15C!LKszxuS>%>`}~1G1+(XAKNv`;FP;7Gp}q&fAa^`=y>iYUOk; zR}pfZj&-By;QYrD*TRO^>+6OBB2U*QjfYIywMnh^IG_H=|74#<+Ugv zfP_Wpk?Yccj$l(}WTdK_%G_*bCAQU=h+u;$nqJSw(`6>cuEHK~e+=fEEDW!+S55Dd z3L1m1CIt$befH`kY~L9nRi9sb>;4h^j?=~0Ij8P=RUjO+t3&v`r@Q?q?(edvhT-cs zIXYdlru{@K($v)^8|!Nwf0k|_i4os;wU-red01@pnOXjHe>SrRZam6*5BISFA{Uvx zt534VyHQ7LzTdbYj3P8Vi|0FyHypAB=f?dpo#edN28RbL_KEf-9O^$Jx^{5+svU!{ zdkj8_&OhY#$wkQ%g5vW(CFEQ^*+#X=W>4liB~>;ce6Lp59si+tEMU?>o%RKr*Yobp zjNx6i!r0dMiaenL8J9U$YJuvXK|>$HqNG|e86rpL!D<~7T7~_}ug?!xJ0AO_HBK2-C0>%_i{BnxIxe%fude-X9Ftt(9@(Xq9XQeE(zan}nQnA3rnbY$mwWM=xIdIXY%keUL z^YGf_iMJ!i$%G8C{AnRK6T9w6q+cvZ_V<9~U1#-Dr;v&sS47i?*7592LC3K&*51ww zwPNRl?bktTlfpigKE)~&p8jc;w!=@qp6QjY{9Lc&a*0T6j`>S;irto`FyyyOSmfv9 zbK{fLsKC1N`ufrGVoIJO7jTPFCcWuOlXXG*p}$dWr-cB0#C#`v60@S* zmseD<-qRHd+BvSV2m0PTdWv2lzOF3MxO4|toAT^Lg5YP{} z!_fDzfIVa~kQ%fd_G!QI3dRp3Q9|~Bp&PxXS_<^WyMbtdqd;SDKt2$a5&|Yd97ARX zxT&ziKs?}5GqiHlE`%{?Hq5xV(!kEQvUco@nPH1$6T=0G2l4}o7KdsBeiOq=0)Q5|HfHa{SAYfY57ZAl7S%2Y&vd7XajpIO`rftwOvJG*GVepDfW?fQ0pH|_mi$nBHC9>rG3}<-rv8SaaSTO6w%hGWPK2|3 zVe6FRmoZoIysJ0NDHGe``xQUgdEp=L>8ZKlEi=E|E*rXsrM0r81F<38Ixg9C#iWb9 z#dV@gPwM#-`^RMnSHIV0mciv>UsT)XM@@?Fb5*i#`62UKsX3TwhSpr-!_s{^lEn&_ z_%uTNZbQ7Cl93Ope}x1b)Tck~SPX~mc1BxwFbCU*hR7V^`;k`Ju~IgS^)mXjX)wJG z8HnqqP$lXtmD|8)s`2p(wx=r`jnS<y(Mu!Ou5d=f_b6y4$AD|mid7Lcmq$8x?? z#?RJU*L)l#B6)mTxt!#zexD`S(NcKbg!Syl0;Q$MdlsO-0$2hTe3XYIcgrws2L*ro zsL~?z`N!CM#3uX&x*?n3+6qghK#7I1xZgQ{xb#+horJkEJS$ z@FGlrBw?ylSY^oXz^PCy4s2E=3eYaJMwHhGU8(DX4^tkxAXo+T0Y+kJz@hpv?=3*g zmNSyq)-$hZPQ&7fX9UrN3c@5M5d_hUKrfT`XwMc=zClK^QjH)EP~%DP$AB1^3l#kg z#5bTr^zX|MS}3rHxthwON;dUk4>dT6a0IF6uH-j_%m^xXa`%LRlmhYDq=kX+Ab3hX zf6@bUwz0e5XCXS^C(F-nfaD!r_XFV+B&6`DFwPF*3-#Em-@*VLIOd``AwtEoP}gyip;dBz?heg78Cw)(3X6z-+sFe0 z2H1@jiJ-S3AAvdmUuY#PPo~vAdF=SiyvXVPyEql-6hQ}k3>BIoNsjzk58QIr{k)Ue zL!LuA0DM8dn-y8iJsF3le3&)}8EReVFaIfRP8+Fk!qL#t;dUbVSuS}W=nF=431vKq zdM#$SFUeHnHzQBbSE>UG@RlY#PE#J9C~v6_hX)D@HJn>X{B+*xwvZ*h9{9S@g1F#f zR#fL?kh}EIQ6l@#mSn_`onVuu$=&hSO7Ta%#xRBtr3P4cR@fAG_<08|=Uj=h;Sqjo zMfo#7O47~O59=IW!OHadL>S=}`G+s?^`tk^t0OIMv(K2D`2A4e)_932CC&=#_r+iQ z2y}>2x@t>ngO0Fs8-lq8jFgRE-sV&wa4p)p)n=Ify*U*&50$pfOydYf-CEb3SUC%U$Gk;nUs5MIV}x@6$GKyZtwWqsKoxmRGec z@cwB#2_M^mK$VJ@esykVz=5)vc59`P^#o_zIUc#XC#2a17ZTEqT$yr*C#${n+AOSR z&NBtn57-7*r?(0bp3OU|JJA`IhK7l`WIMiI17efzINsqz1a8~W&Arwu;aeO}N<{qjkrj1TW+rjjsj6{q z_PeI*jZHbT4NL@`>g*>6;>D$s9EwhM7S@@QzVO12>t?D6Nv>q(&wSt8o6|u9is6?c z{(ZKg{-39LjqWkoG#9$N{*;y7zq24U;dyr6tj>0h>1AF<`AX{e)uO^xnPOtljvsR4 zv?u{8JrukS@^!HU#snvuAw9wlvx$*7$+MCw1u#^J%tXlggr!8;V-LbUeXtKu|tJ9mH z8)NV4Sjfp~CB*!oqR+=b<>7ubhc0%IX*Qq@*Zjxey1aAwP=BEf>J0; z>7nW;NTTu5J6HQ1`-1Opzi~f*H_K1?qpZJZ*7;klGCh=NkntA1CKQ4On?5g#W~-(V z+|QldnQvSVCt$(;csQ)J=@?j?5WA4fEXsX|dtzNQnRV@5F!WQsHxGh~f7+%9Q==e& z4gkbp-xFT$Lik|}Mi>B$;uP>>V@NneB}1Hp-onJv?{xv(&A2GdX#r71BMb)>uU*V3 zN}!HJDs`F|8z~B;m=`M-2v>=Z(O#gP00o z4d}pTk@#3Ih<1~@TM)Aj8m&SJd%fAztVcK(RKquA049^gMN}Q)V6_@wCXv<;6H!HS8&Yu{iS`%>;1romov&JP zRQ5<3dWNX}kQM*{Ikc&nnH3ICxHi0jXmj{&{OeS>8K5%YK~;|tm!8}`zxDB(=eN;N zE?>s|1eag^3GwlhySlnESP}o@a0t|U7oFX~1$XmIJs&Q-RInYtwqBoPH*0T={nZp zX5AB?f{mxam1L{z&TVo))3j75Kq*@3?owX8!I-+?TeHk+Y5D=l=OIPR40D6H``^f@ud6k9 z1$|Oj2ahZ)6VHb=(cFjrB6jru`>^+dl5bu9eD> z4_;)tr)4gg3^sxRK1QB3KWBD~vnxM}zkDB?6$YOk3CP}>U>NvtB0F$%~;Hn5D3j*J0bTcq(zFe@BMk&@KxunJ1Zr|2*=Ulpy&674ifaz zYHvDuZJR0gJNA>td-JT{yiOT^7s4cDUrUZ(|5gQ>C~6FGY==yBpPsIpO4wTMI&NP= zsW{~dVt3HMwKeG%S>xO!P1DV0#Klh9t$te7L>uTUhQCRd9$UPEk)jcP+p>(wN-N;~ zLWe%g6@B|gCbpkqJm3f33X*T2># zAG655w_djIX41b8a6KuvCc_XB?C#xlW|i&F)1Lj29W@^_$mGq4JKDCA^@A-YYoq<+ zfbkqGpHs3V)TZs%WWWq0f~PLKEhpP+Wp^%KZmxdMG{0YxpYd%Tuop$%=;HU-ML0=$ zB+WKgyWzJ9CGIR|DfEr>HJ>%V`rwUZptJARG&Qs}DOTuSe->hhAyWc9!|WRym6jbE zV>MO!f~bu;eCyepg6sko^Bke9l;A=Rh9T|$HlZS_gV0L(B2|J10U;wEhrC~nwjhc+ z?7H}MRInFF-|X8r5XWCtkkI}v1{7%wGqAgLQBH!`9z~O25J%v5qY5HEu=QZX0MjU}7fk3zTBBSg!{HAUV{V4!$cD@8J#j z;574~x9`Jb!ep2}$pRvHVK~w&@GA;_t(D#>=}`|#ChG}=BZWss?3xdE4DL4ak>lln zw)-6hpau*2K!{yS9-(B?Y$GCZ^jw?j{%h9@LYvM#6J)`sqN1!>8ZXp-(qg;fs`r!f z*2-b$!HnA5mEUU-o``*l$96M=*2|lYXW&V}Ue)SppgG8+xR&c|S(mvqZ#L+it#d=w zVaeG0n7k)fFUAPLOcug*C%`U zJ?5y%zgqEAd$G#1??jj+A}I4C%XM6qTr|_*_ywOa0B;4Swjc+lo^~0>2jeC{=o+q~ zK`ytC+T_}(P(((!v4xlYiiTDPf3}AxA9Pr9rCTj=K zn4#7_@%uqO1el4^vg_~c7(!pMQ!dhW<6=f3C_t`(_@>liFc?O7gAOPGd0kV(qPTg9 zbD`Eao0u0VpZ{2+*-|O*Bp{n^jy@wtcq^csQdpvKArt^~P^1aQ)Y@+gRk2x0h_?oy z8nuF+h=2M+APdw1ZZ_>HNq$IIahxm{9;uEuHUKNA*#30@U|$kmn!2w^U-8W$DrFz* zrV~r2_q?P#PiNy))S&R>tmDAT{qY&H3ZaGE2Vir&9LYND39M51=aSe;eZ$5U3tz$8 z00oG~rKW0nicIO*;rBpsdO(udZwKvIKKB#QWMxcQ^pDDTVHmHl_PQIjz51}(TXP-E z87}@yh91J2nweYsZ7V7GnNjW;Gx2$cLXWqizDKatp9?n6{&qeNH|zQi7SS%yj}~Sb z0=F;tvC!ioR5$EX-`-Ht+WMb7-CK9OKoAu;c_=a1oEV35I=a< z;jvNP!>!aw+V2dzPM1I&^&v^8I+(7D2IW{lh9>dFTE%7wj`pam7?|2N2c^)@uis9| zL`>kT+l-X%CB3LEP0*Wf{3DF<^RlVgYNQzV7B5IaI;rBFC66Xz{Yimm9Y>k`bbTJt z&%PrcnZkU1XL5M+R-R>eV5d0Ol3MK*F)4&ik;zmaw$oYq2{EX;s(Mp(f|cQ=*^h-9 zeBS*dn#tIKpzUrW$}tStsiWdd z7n{D-_8b}+BLsW$fnR-;S5KA%4NMZEX?tNFU1%$(OOTfu-!sh=b5 z6vdTN^%H%?GBp`w*s<$-4h`&1PL{kvkDU^1E_nKY*c;1gIXhK?WhaBrLQMJLL&Oe9 zL+w{+QI5Xuf~JI&u#y$L(F%Ue_!A?D@vz~B7F!FI8Icy`4#G}P!=TD%rYz3q$i*i! zn?-j3c|qhs0x&FgFoAmGr;6sW2o{D{>Ix&;0Py&^#xMOH$Qy)~hAEiTZjW_pIyVVl zTD^piSB0R+a9q0qC>oF|hT-Iw{!{|=p&NiM6=V^j$25<|9T<3B=j1(M0z-2^+yVa( z#c(K}NoSmJf6cBkp;~{|y~3DopH8EE_Lm8cUF3E^mqPdkt7~vmN|J7eR}p1neWThi zR3b1LXc>+(-GZOsR_Eb^ippAF<#1wYV%4yuuyb3rVUT42j1*3?Vxoqo@Gkg%VHAzg z_?NrD79#cL=lSpf`j)`g-Sc)m)(hp6KxUfV8bi_s1wc>D28_H^*K zdu74St6R3Vhj*nme~#3J@^-Lp-7V!3lRF)eAs*yD*>>D{Mw^)u!u#jsY`1OQcIL+AddlQ>5gZ-m1Vo@BWgu(r&_Gp+p6VzArO=W=R&J9b+(O25SKzFfCO&e|UK zOGInY^GEfWq0JI`%9;>Y!H?AE>u&Z4-D6<9cNF%4E%sv1(->B9y9Cl$^J!g!~d#xJxX2M2P${CmID=SFn}3d-EE zRixnn%mk#s`1FtY{P>I{jjk5jAq9E7{K6I<`OzoM%#1j_7MKd ztuxO%Pp_e8S|&4rues%I=yL6t_SWukU<0RrLtLWktk?DIk3hHpe_94ngVCbN2dUVE zrQg;>4Wp{B(U;6=kwk4w5%eX^#JxgD$V}gSC!p&amQ@%ku(^zWH^8ybCui`t9r?8| zz@ysS0JfBAjOjt*Fu_HXU}!|BN5wxoEX6A?S2kl^JO5frkMar}{2oYBLSg4)28O!- z!n~&_g$P(zh2P%Jt_1JFG}r%ZSqT=OELHLwP zz=gbP`c%_e%W#V+HJ;^XU&sBy(Gdcf%fH43lC%Rev$N)}4b}cRmk12SA(evC+B`~> zmJFBI7UiV7_a=L@-v(URE9l<@C0530VPnBwo>sj z1a0J27O33jclX*JWkro0*{gc@y=z@S1R9^h{9{<_ zXXEFhOjZk4ZZ4}cEn{;0XmzCaU0oh6MomeDtT6L5dyiovy6!d&mcRN|q;X=0lguNZ ztJj2ULm5h6i*SUgQm@kzlKj|_p(sF4+>k7K3S&(pb{))aYk?gcw}4Dy{9cQ!KL*2> zLkJ>i@HA%D-u-950{%}kLwZ1ac{xz7y!4tU6cgHS3K%)6E%m(*5XA_PLYM;k{q>*2 z3x5BSo;FlDTe#Lw-txD;`dQ`rBan>e;WQulEy0@ZcG}6fQHD8CB>=kx1FiXJsX#ij zgN5I%;b^``AGh^V7uVY6m83Xms7c@P=HzRI7~O=uFGYL?#9>VG*nu1QBRqAADN(Zb zXt`K*P=iIF<>dEF`8AYa^zPV0!=>xp2O>X{N7{`soRXc=5Y6?AEGLtPwhyw*cxq** za)43EM?I3Bt1pGoX7?S@E8HjR?aA5w%`ETB8zpAsg|WZ6_?pK#*S z-+0sL3dhWm%v-#G|DB8G>;rK43Z=`#J^{E|%-v1MRRMCFM#Gj&AW9g{J4WYo{5%L5 z6R|VLlu|g@jk!Fw3mru^JO7l_2-Ol4gU<`?y3;UBPi`s_RDjoz1wXl~F`@%F;k<24 z?cZ|XxqzSK#u-pg-?~Avz2XWG-YRzTYX({t4$Nyo=m0#&kp3x=|pAD>1TquIu)Gv%lh*q z{$BIQlL|TB7N5S)$e*1y3n|1xyiZ+5N zV`aQz{>$+B5AUKhczkCjO9xYDS-Z8N(avn4s)aeU~w*Zn4|j^{?YBJ|NyF~_+o^egG< zNEpqVt(6~sO=@G1Ft)6DFKl^suxLBqu6M_2Dh37iHKZF_$ChiI4DRy$G2pN~of`*z zF<;B-keP{)mD*}_QRAQp?eb#0zh>oWUVU8;e{IU4>FtQ>-`_KF3ejX^uPvJo>}*xe zb4-^w&m>e_X30nF43z#B9jxN%EK_^mk{)2-YObCC^Dp{OyL-(&KP8Uq;S*b02@1M0;K|_6WjqKGc z1uaOeI>(|^bMJ~<#5_Q%0b5YoILsoMFV!mntT3fKtT?1a@ImpM2aY*l07KdAQCn@t zu*8f))B(o>q?SlrU@~ag8QLPQ7Dg~Q7a-X~aeyMzY_B5sfjBpn2QaB>{}B*>vuM*Q z!@ObODTIn2Cy#GoS_c^OC~XkH0XYT;1<S$|wfY+cxFnZB9mjS*AL|7MKFvuT@L<0T{qbWXr7^<#tg?dV&IjgoVSp?Tg6yA zes4u5m+fNQP7shJx&6eMGDd7XhLDDJm>W`q9Q;)`vI~V1b0sE?$J2rL5a~|OPx2^p zP!s!DXp8PIoEN@J*;_5Nw6Cd$OtIj2yHecS{F0ODZlGC+^h!94sr!e##f@Fg?XGP0 zl=oL+W56T9YRkj42~~BVDd#2R3y%>0A-m`~B*sqg@||)NN^4Ba%H8L~#q0XnE@uIO zgVn(F@*n%Ll)j7raPs0ZW=(V1k*MVIpzcM8S3f*Y(DW-l&FzD8x>NQ}UO|!7o734D z4`blP`OM5Omf1>frzhvFUp>|1bBN)uTCY#ACh`5}qms;LpAt)>9BdYbw1P&YBT#%C z_V>1!#P$t;&B;A#^u|&OlzEe$FZ1h(nfr)OZF6?Itwu1$_Ib(N5+n2~#^se$* zE$8#z0t<6~V;86pfmZg}PxwWW$e#K)3&%c69YjjyE52t1U;Mc~f$DNjUUE>r7$76w z?qW%Mh`DzDYFs}QkN7{w@ISA#kA%T{Ef1OL12jz9kG*2Pz8aEap>tiU?*U^2T1BG* z`@CA5RC>p#%JSNQ_rh$U|6E9Tfz#tFt-a*OvY<`frq8odS-xzqTMmSTQ#ZR6 zwXR?(`X8;3+sz2s!s|))$_GmP+gqCK^fxwksc}UiA7xF$y(rAch%BY60bS^anJs^nIxYhot=0dd3MLK5y7@0{f zQk(Ob&+{S7d;_~4vq>o-UT$`O>(|5U!@(mY6$wl`qn!cm`bQzyBBV~N$_K+@Mdr$( zv7* zMuc6yQ9m@5<+0sJAL8Ol9{Lyw>mKq|^LLWhR}8Vk^X5=nus!00mHx1?!YHvU^x zv=#{pLJ?ucM#HT?I^>Sby6_g@LwFz%09*ke1zIfk@be@*8Nek%bOCFL(r$vdc2~G9EZ|OQ4%2nVaXA~+0bn$^%UU?$S~|O;@AB+{ZJ}$6=VmT9l*Rv zW`wB4U{Q$*W5|O{VMrmef++*=MKw3;RNVU z>0Z98bRkE48+-B%&(N{Sb&df_P)u7Ikxm=U!bJ?F|2w{-_7?cn<4OMNU?rq_xk%!} z&N0=iY!IzpK7M9u1Fp^aa671?adHksO&>uf~L*}P;1yJeFA zpx)?4t#KkJ!}Ky}=Ado!CZ|4HJ(6RA(|Z3VH2DK|=9lf1b$J|!5z(cAzp;sks+%%K zjTq#RRr|U%Qij~PWacQ=-|b;q4j++nVl315MqU{9nf)ktTlV-e9gl@^h(+z~XJZj3 ztI!w%&cQ101bzR_s$neiZ@j0IieCnW;g`z`6cGVp4JNi@5*ohFc;Y6zxvXtsGYQ-V z-KeMQgSLz2LV_gnEjmfphTY;%xz~?8ewW>SF8F zvDFgS?*kNcxoDB=pAo*8Pk2jO>qk#;+x&11^YhyrxSwnXEK|$}h0Z%sHxTZ9Wj2+U zQ>w}NIc93QBC5i^t5t1uWp@XOjomlO<-Ptms7$Zqyx0)mak@!em_KXzF|t0dxXbvvhsjlYi+rj2+9wD|~wf0o^tV1uOu~pd>=31d} zsY2KB6*t@Dy82$-v^0j8woSk7R{S9SmSgTt(j>FR`dDY=`Jru#KW3Ue7w86rZ07b)mvYa&i&rvY1ylgw4_4D6*TYk44&XV4`l&_7&rEuy8PgfVv5yk|qyYP>Y< zE%Dl$A6tI6BdV5=h=J#`NKv)eT_rUkCyn*myhWVxI}BixFd;qbG*R_O9Wfi-AID7( z?d!=@oB&(aV(|;aj$vsxpK`LyQ6G)qf2%XklqB}LE-)K{BxvFcamofJdB>-; zujG8o$B2?phoMC)a`ATRukMFx{GF8-Q@1~q07p^%O=2Zwi)SW${sN6D=Qlo37lCx2 z?<4IZH8{5#NQk#~$StGT`V1Qy!z$U2ntzkU;eM>t&!2r!xA8+F0^S?BYm3E;`tm8N z!!I8GR#|#?$%F-7yvanIf?o@*X)`8b=}Eumud%;)CJQV`Yhe_rUdO?$KYlf3VN7vkC3yI_C>zd44ZuIh=mI#U?Z#QyYE+Fi==*iXB`5wyGk;|wz%hwXK~V;6emz(J>laX(@%(f#Xefdzxy@YPKS zr{49A#plJbRey&D^~d~kNqvuw>$UZ-$GI$BejYC5CF~yed5(j^;UJwicPSM(Poruc zNB=6KO7lv`w9)kw*FtXmXT6S7!5Y8H$0HxO^4F$aeV^@R0!o^g?>b$K?+iNb-nEug zsy3G4A$8PdJcNyNAGD4%`^u;OqhG*>IA7u^DW51V#}0_nc#LNP-y7J7j}1m0T;<+X z_C@%+4k3Qnb+H!l%VRm4-9R+dal91Mx{TlkDZMv<_4zxMT)6+4on8tEddO;0q%jI6L%Iyn^U{map1e3UDkM@Q?0>#l2K543`ApGjEH=JWHk zf4KJCEe?Lk>%LmSZT>T-ri2fkv&gpw*0u=qxh-9suw%g^uw;7-~Gj66DP}c zZE8Ou_XsOhY8r;cFAw5)KUQW5;zu_Xi+?#!(O1-i#m}}a1=Ee zZ!d&p=&cC(ed4@LaFd-SLVJsy8ju|t>ugMDJ|J;t;6X%}AXs_lG5_GI-T0mVS$I4= zF%{?1=OjZ&U>%jI(=|6F;_uWk)BT|=>p&fG%Q_N}xZ4ij7B_r5z;f!W+Y_82RC*mI z;l}rpZIZdywAY>y`2~Y&>E}1&8zN28qu(Lf%T^ra4;PAl6v%E~gJ>17RIz`0M23&C zRkDX~P1R&Whj}Xc9*&BNuooyjLVftW<7Irjb^_g6|30o9oX+;=!h;l^&NvG@Fkg!e z*{xRzr+z)}KBuow@_q^4D*fpl<@SEw8LWjc26juv=SNI?{b++FuKwm?@F&KU*bYbY z>wKI$kOpizqXs2kvMl}wB}x+z2#w3A5#GuqMhsRLuTuQC*AEcAdHX_=ESEdkpxrOU zLm2i^aps0f;}S~fE&2`ZcB6zXdsKZ)m9SkpGC$U}$_h(9`5W|U(RDZ`y?UO( z@yG0%t{rXUS99k@JV1v&?&;>u&iMeB=vy_lcd*n#iu@VVqZC)jA-mPr!E;iqFQ>94 z`s7S&a*Zv=2kapJLGOhTlXYUQ!1bcEMT2Y;27Pa1oZynTT z+iedAZ_!fR+d|ReZp8`|cefUIhu|*7-5rX%ySuwfaEIU)WCS^UY-P$DQ2C zOtNL|wb$O)weaTcs>(nECvKho3GifH=(d~xcPXjP4S~<$PV+nwLpv_u9h%FHir6-6 z;(Yo#9dSgm$A>RpC`|W7&mUlyLPTM`B0%bf7gmpG1bzo1w zpVYC(I+!{x$Z;tqPXReEI03dON3(c!GYR*D!j>kS3+a>J6!$nk@O-)7`6HR<}&fC$}`K zx0#^mVNL$Kr)t;5D2cK`Q|7t%LnD%mI#h^Bqx*tmjAcW@M}_r7Itz)vzcfHF@qhqq zIJ+bW&aL8I0#Wb5K2tOghD>S%MeTQJroK1>(5r! zn@ZmNbG2?CzfU_Y2i+%`MjY?}!Dx^3?S)x9DFH14|DT&R-0>b+QQU|ei35%^_52vK zuUKy>AK%R*VG6z_e~a09eZ-Acx{{5b$r1R11hdRoG#45GtNv~bO0+COJ!=bc{h|fJ zIRtP)N4YE$lLVFjM++dMmL*T{&AbKh-zql1+;0Mv=ev8f$ts9aChNu>2@A*R+Dq!q z=m0eXNY`vBJ|5p-Mk0d>{SuPvhvhu~9bh~XCg5Hp&#Lc{;&ujwXj znAF}RedL_n3LK3_B;F0oNN(JeuKNJciPjT@%T~8^R^USn?#%|I;=VJCr%gu`;Q*&C z3I%`s>RRr!ewrql_mqQbPNWF+wMP4*=WdwIJL zV@qRO6%kdXwxvE<**b%V5Y=bD6M8iE!LL6Bso^>+%K=hCH%BdL5TxR(0FBi$^kCTR zt-7=Rw2z$tCdV5`Y6=e|@_$+h|A68lc|YOp-I#yO`=1LBTfbpvj7&0P$|DF!Am6fY zG#C(N-vYQl->vjji2I??CBo$F51aCEn)VinLBBVt{I&L2z4Oz>Zu0?bG}iSy0d8nV z&f)Td%R@Qz&k3f(MD9YpfHhulq<_cWb8qvK>BHIuCkwl3(em`aOo9;l^WUWN6G~m) zX^YDZ8UJx^#Py(ijyo<)Msb+&Yyh;v@kR~X86FAmU%uq7S(K*SDGmOlmLP4`2yp;a z*wN2L6FKYO)mh20QYiG(NEGWQv(WH+@*_eMcTh(M1GM5{WtVYVp(xzQjzCIu;?(~W z*R~*|VauEv1xBG~;J~H-h-_3Mp`dtA6@MxN@cH?zpc6kZk#V_ztNsLkicQ8}1NZ=% z1xG$oQCCubMg&X+(!`K`KS(eQe=rO@M~8|?w}Qn@Pk(LAwX1p>urvHv9&T2MBCiAu zmmD`Uww^>1c@ye6lep37o->+2%XDKm%eszw%c}(-$lwjl_k!$;T)$Z{P-qicaj*3d5&BP3MW(1Jrse^D?0W z!$OK+*H*CZEk%ezy3;D&>cos^u#xQX~<#!@Q;1YDNnt{63~epsuxG)4vd>d@vi z@4gIY6r!6vBQQb))V-^KwCTXeJC2ku-&ud&wucJGn=V|wT<_u^)P{TmAf#7Z%aWZE zvN+5U%E3hcuD_b8=)Mf*9H>uO%4;r`l>ISuX2pvxFhtF`EF8HWo;P@wy9VMUHY|M{ zwmAqo$WW7OWF5lTXm=j={rMU2hq@Nm08a+IUR{wxnSg_7Z&JA$qZKlvX8gG^aHlLG zua%Acm+{~uF;72)2Su!wIgT=d*1DL^U3Klj!{}_+(IC%l8dnGrBeYKZy(+amy zp;ahv75m|=jo{0Pj(UBU_vpwG6*rrY-z4UbmuK=^5+|>XP^yw_7(>xoDeMl${|)Up;Lc{dwnlf zn?6HRGYd)O3DI2b<1n|RiSV#VOp&OMqUo;=+sl7v2tORvpp<%%JK*{S0P>v4QRoo` zQ50imMr_}p#+%V>bN_=5&9Hstt?-2OZA||$nTz?OM_IbcbdnO;_QxUf5iD$ml;aIG`2*o{2gV3B z-i@i!RdzL_d85|JTnSUeo>;7tcFs^A%bQ?6E`qkZGm@xtW<`-!0b%m#?-gz~lfKEuXtMYe|?4 zw<(n(M-OY6d+f3Bl!ETA@5juuF0Y<8=q_PzHTTCY&ooo3GKboH&4Fo`klFUueLh)E zyH|s|Hg+i#69%rtq5ybJx~HDVPFS{PF?5DecP|o0^T96y^tR=NmSjC0EiWKuyg7N^wb|>;yT`a%vd3{>+5;K`H1Amqhc|q!$o! zF|J;cvop3#X}RfOH=;3qa0cS2m#A5cP{PCN3sRNME3{$lnq%htj&%h1mlX+kV_;L( zq9@(@#Q=^F{ouKO7!ho!Ag#fc4*YgrLdD1i@&>QeGMcx98;RsF?ORc-noS*A$Fap^ zV7s}43ebSDQf&PGvJoChgc`G=30;kL$vk;61?e(#xph8LQo3J5X%meTvROTbiZXew z0umk>t;zW0f9e?Xzv5A(tw8WbwZUMwKN55V z4c<>1|5l8I>(RYCj`!YEFVK5m0g;0ok=+omoqlnD`32ypD@%%o7P?#q0i;#odGM3- z%y8b!uLslk2jN$*N^7t)8?QIxy?^{BL+)@Qt0SE&mwElR4VP-h;nKZ!&&#hrZ&mpE zyC3neXUfDkKYd6+lY@2bPP$>?Z{HCkqh1(W-N~%l$1f8h9#0RRLm7~);0O+*#*``- z>7HTRUf+uKdi?BbI8wgh>~0RCs@F~PLY{BfVj$kPpHpkqHV(tHxqmm2U@rHlE#@@k zR%;)l_P3+qu8m3F(?9Se=T%k?0{}q<9i&LjdQzpLc8qY}!@i9tYesrA5Tv@YF*T|l z6nifXed7!10k4N8n@$JljJkGjSJC73AWOTA;oJ1X_7OyZpAO*OdErL0X17w-ZoR28 z$UGcK;zt?Gjd}Sv^-LHYe8;7_K3}n`Od8H1;>`q2&1P>*)q8j1cO1%wa;<_Z^qdz- zR|wbNw7-8bTdj}AhEZGnTP<@iktzhKXv2hYw6-P%FcB`@A6@KvC90BG_v~~%q^k2n z8;b!YqVcgqmh1&EoS(__p!>Lp0CD{YCF;&)1g>5nj(A9(X5!=k$#=k~0#shvhtfe2 zGU#UN=Pyi$@ehJoE`s%YYurOmyWR&zUhiXs{C%;A>jc)*uLT-6JphGzyTl+~r!HGa zSXw*$-dElKqg*xH_1AO1(!XfKuzFY&i0Xk4?2;YzJtmKS~0u+Fe?ws6FO~b6n*41J^r#vR`|)~yX)3){u4=z$aAqr!NNT-k!$gq+xP5fFJJ*DR16;9<^G`KdP>Z ztIySBc}_VvU;2B1aLx~&!x)eJh}lS5FAuqn*aeQCH%_uTRr8pn5yw;JB;#Owau%(h zN1XNievo0jP2Rtp_>l90@79)9hx+Vtv|@hxPk1|c+}HnLUTTQLz2h)a05qXJjjt|& zM`r^!q==!Dz!|4Hu*SU=FRQ7hEYo&ABJDdlU?Zb$>Fb!*`$@3=7!wS05 z`z^H8YV*P-F<%1n?~-7Iex-V~9~#O6#Q>pHu4_aAzLtLY`ciB%V>LxJWvaesIc9{y z=Yb09JH(>wnNbGM-vrIux)57&^)X+gu=Zod zLxp@zFr(Mmi|E8A)yv~$s?H+I=j)9l)~JorGjsjfWB>ZM7XbHT*WbG@Fq$6%lGF># zZbM%{MVix!NpNO8aYwO+fl7fD>!(VU0z6J%^PBd~aNu*^v3y z9FVgD#Ber+a|&)a60X&uR}*mU0~(5Yu=<&)Axu+-2SNii;&1d|4Zqdgt|ro z63a<_xmAMOxJ{BZtfho13BQ--dbx!sP|Aay%|cZKbIOgz&`MQh zdNi*?<1mqbafUv=0D$u`Dgz_7jj+K{B^?zos+SG^1Ir}?-r{KR;W*13IDM9Y^ zG=7`pWuQ1Nto{q3&v>}?wTKxeNd5cAB3RYyVq@{(W7fDs%wO1omRT|42mKo=0n!%Y z#OTzR7-B3(H%M;vj+%}vLNvB$N)#jO(py*mpZMJi2k%#hx8n3)fq7alp}5>yN#AG} zfDRt}a*`{-t=~yFfuqZiOS2TS%g=At$yXD};OCDwaX(@i$_aTrj!W7PgmdT2QkCa- z(gxqu`J9&uA!J7N$<7Ar-W#uu5#WAz=Hb+_F-NC|Tk?=(f)HX)m&>;G-zhac z9kX?e;tL$)td^jg?!1FC0o%_MiW#ZJ<3A(ZU0y&sC-=IOsn0xTJ)v^A=g9A*evXbw zN1(JjU&Ri2;lgj_CvI$=A(j)asyWn-KbqK-(2dN+H<~TZ)`NBnbSwJjwY7JeiC&+l zrDA>ADw|OjN!Q!JBd-RHKrnyJ^DJ}yw;ENY@U`0eH?d{7PX@a80uUfsOATZyA%d)) zPV&S|*8$I(t7We%LEx>U&FRF~l?Hgt zPd*w|No^4M0|9Q#3#oh#nmc|%kd7!)arTW#)zACY>Un{dfg({}*^0NVq!cAe*F^^x z^NhNOKU9;4f5O58ktZVaNr2t@2EAErf5K$r(&?6n?r~+(uDvBSDL5?ZGjQ;NcWhqkHz{iH$S>sgwuA+O6WFLETXd$V#vjnreii7h z=f>k^{AqODLGbcD--H*0FAH?-C%kfaKt>cXir!+j9OEaPsV+a}?8TxazhVs<}IUF9Z3IXkhA7fi(qo41Kh4hc0$tf0>*W=x0 zJY-@zxIqVBC#gSM&S4)FiuEetw-S_~yB!EvjoGZ%md3`uefsq4?sORw6H}+vS*KdR zA5YJN+2g@hO>NQPV4_^F)9dNxsNP}`{oT7{7Nbzf{E=Gy31i^-n(Jnl@9wVQ=IL@R z)Ylhm*eEIY*MT53WMoN6NkT5$%_Kc94XFDo70Mn-`uq8{I3C3&C%@z3;c-4+Yu0YI zr%gcqDJ`vCWBC4;k+lq9^}(W?r^QR{=zJqa zS*Gi+df$@RW}}t-titx&+AUerUN3^_vg~`GVvNYLbPtLsx#m=F6DL!?(IV4%$am8Y z@KN9}OsTljFfa0W3PJ`EBa=Qhmd&{~lX2l|>g)<_@IatdJWiXF&u~C@rTKYgV(-!Vd7weV#)j289vWFkI0)#1Q|GhR zB68$62=qomwzc@7hmo2h)9+Mg1|MU+BYokq>iW!R7kF!b!m-_H`h2&D2t!hjXw z8lJDk^At1IFLNeWcS!xS`p4&TLC=ny%WEyplbiz2mfar*C%?bG_K3b|CFW_(vvUJ= zP$+m?IjcLjfy{gJDbJ!GYWl&1yxVPRg-~%{s!QEn=2V9XD5%z>gFhqneg#=ts_RtK4DaJs(J7;T0rjc<0qjrSU+idmnRV-6PPjg8u27$v5JuIKf_4pz=Q`&GJT*n zeZ$3%*&Tvu0@7kC~|QS1r16A4CtIV#LTRfadzP^HG6FIBt0 zxTtDq*noP0(wT0DhsM3gY=Mj~^#0?dg2KYqmX?Jgg*>y_g270VSt9{BRaI3iD%rZ) zT8qVs_1lw0e}8|>e}7$Sa*BLmGvt~dihkg$uImtE*~tCGL}Y~6nTZ0>Nx~R_iB3&1 zr0YllvxMzEi(kX|I`2?ruo`{;<0q@>MvLtJ0CknrADS}}R-kd_$_~4ksYrqK*TG5f zexZ$%`NZutAY#2V5K&tadwEj$h;=gKTj^YY*q~pnlxPSSfncd^@v)(-$B%H!Qv<%g z@|`lkX&MqNpQEKFB$nG0JyEO%ZMQC4N8{7rdu9`?`tv=7)6SfApz4 z`Lsop2IsQl^fEH|+!k{!8_Us`hp8rc+tb=#(9dj&$gLMYdTV_fUm_!JN(7u`!|c?G zdJbx|7k7I%Hu|g$2^uWv`h$8J7<&!FCG2pBO0wzn6LIoXu@|yv`%hr4aedXdWyB|? z>m97!YWX_&kF6D9SE~74gi198cliCDD--H3PvY+ zg4*L3a&jl7?Jvx70nDwJtoo}XD*BiujK{tME2P(D!=ex-y%9$%k2sj!YqY9J6O zUoOH(ZF>>g$su1mJ|-0oP|b)|z49CmegdgdeWQyT$)YpA1r2}&$6ADEPK`ph$!qWrO)44- zbo04vtf)w(Z^CZb^vTS0FXhg8L>LVqNe;BY!+x};$|>QjhyZ37x3oWA5qo$9$pUXl z=R2O`_={9{R7!3PNkZlz%+oa?7EX(8t*OQf)>CWs*ZVRyM&!m3xc_))4yuIN25=#v zA4^w`@hlR%uqnvFo6Fg~qfx^Q_!pZ(<(TtnHBSqDeZ%{-0j!pHnnjpWQ3@pEWzLbv zVTG%Y-Kk=P>$ywESao>as<~g&J!-5gmT{kGX>#ew#=B^Y6Amlod|axy{_Q>ye8%0} zd`9QAf-HPF;+b-Sq4o()ag%HbhQ)D=;q2X;AgrMvF)?h?X>P=)UeqRZvus3xcEw&N zU;a-b?p!b-(J<55?!xAH7yh;AFIMy zI$yOVQ2ay=k+^rg@_qMnD1${k-i{OzCfg5Hz8eQWvsIL+31PUZNqvXD`8yG*1e`d! zJkoKZwBP!>cfpg_w5h1>H@Xu6*tTO{#SqVU)&oDn*w1it%f0U`{R2M$EVf9SgyOy= zg}Uh7xghzW)(WEW+h+;o4-x49mbn|{|A1WI-8@8IOy0e^skFAv;19W{sf4S_Z-Qgg z9XJ(&!z5Tj`6`|P$tU|OIB~-hbYHpk2Q@3l==!;TQaatI!DU1$8YsoWOtpkLSxl6g-By#zs)RMN*j5!YvOAOO22} zA}J{;m27%4({JHI*$hed=l|r%nxCf1Z=?$OnTU)YvYIiUaBC)w1k*?gPU$PJlN zLnDUZ)8)-9`SQurh`0@}!utzuI=t|Ty!T6uw@m|otV+JQ%dXpBrE%9Hc5>&go`AjE zT&!(c18Tj*Z%+y=)({zW?$0-Y5brAEqZ9TxxP@2F3kl>(ic;B3uJRI=Up)tL9#7#H z=kqjl-1sO5h?(@OE{$gEZJ`KDEgWWN>opO;*%pQ;r%KQ}$x;^EwE*VKOwU!|>8|ER z5!MF96yvAi+`Yam$_Q8QDM%?tL`hQegS075@PzhN|E4cED!I{FTf}PWoSjNBl*ctr z=5)ge;#W^!@J*A;VzUbvJh}92#u@ylm+n4@&i4wV;l++^VEF*@HIC|*l+E>SySY4} zSnjE;9RewdXdJoE>M&mY%M6B~x_S3em3C}+*uMI-G~Zp(be7s%WtmAK-V;r)o5=IG zYQx2rI2JEY)T*@Z&rA2C5kRrKu*P5ZO2yVdlTuasQe%*8e!Xq^;!5ov>szV{qu$>{ zT_<#5jY$3rP>Z$Gy!+E8n+F`TuVotd{tQx|SGx>f^&_sum|;C`o%rQWv6UfWk{J&R{7 z)aj2#wYR!y@rmp7QU#v9tg{}-c$?(u?V>z1=zP#MZti5&MN8Pk<_nUYlCCVGwDb+N zMefPwarDoa60?CGNeLB+G9IGpb<5|Sz!G(1I}5R}O-_6>gFo9sCPqSN>ZPVevVpmv zPZqJb@GG|^^HJYZLlV$#*2|BUMY;J8~zWsTd6E#JD+Mhi2v#99rc+k&&vj2`%Fsrc=Mp zR_Z^0{%kN9DdVwE2TYK3dbs%AH9l@N6#ZEyjROq@WtOqs6XFaF0<_9S@+&)3Z61oA zRP1;dNdG`*$)LTLi~~Hf#%f7VZC7Oxfbid8e(yk0k&mB#Ie4Gwk8 z5)qwb7YAyii%(RCm2n==X$i%ZoviwbMuD^seI+<=jZD=HdjF#ZVAdXSVxWFazpp>Z zeY?qg@Zyx7iDckEBx8_DDX4Fe*5J%7I3M|J!eYNN6XP?yylmsu@|C95`gy@splOU? z&f%(#bNnm!@uUQanjQmJ{UV3+DyJT273rJU{TA9b_yMf`WRuj-8$jN7n zPOLZo&^Fb+@pyr2GJgG@xR>TDsmst-Wi*# z6nkcWfbXtio82!zUZKd)sx?39<^u_@lB7KE3)lzkyV?v(q4Rpb9;!UzJbGm4)>kYJ zQFg0&FmBX*ds?_L8_!q|cB*Kgx){??SfCI+%ed2C~MbkP)7cp*+FrXt)LX1j~$Qnqy)Lpr(LZAv|&ixZzjt zOEbZ|ohEhqeoQnh6f^3vS)PXBh(;AesPaS1h6E_&59NQ^NaEM!W%5_Jz*vLBI0R^A zi`ex+*7Un@+PW}&Qsr@`(l+m!#aZC2d!2p;5l#{A`@J8dkNnr+ZCK*{vHCHO!fxi& zaqwd<-E$~fR&W7mWs8tNeU~@F$e{}YO^!*u@FkSThy>)qOJze|x%@+OVqq0*6oqpOR|eY{_ZMfs8{ z9%DAuz^>F#PIE)4U|vC@^ZmSND}kj~&P(8M_nx~l>pPzG?v#T-o9B73HXSwlVndr& zYuDg(N&y0~=G>e7mV4$FComDTMpcP)rgb|l1freIP?1^A*sPS#!q@FxY0);4eW91rBzhq< z0m19juSs1*Z8x#njE7cSgtXWN(D6=P?y{%%Vlsm7F|tzTxdeh%%i*e*mnXYUPm3X; z6)=Wf+owr`pFga0^DbY1)9jg)hoS6N@;;;;;TrTst0~e^gv*U4`VEN_a;g(oWi7J6WD)25bjd_K9JG=?f3+i z@ZktSO!K3GN+S6Rf?5~BO83v7J~VW2_G7=IBrSs4$Q2L{>wiXV$^3CyNb#?WCLJ-vES_^HMS*~KfcjXeM(|A(;cL7`Hr(66Ue9Gf2`u$DzsRpQ}UrD12@y3BuV-Zo0y zWj!72pJ$ZSaL706^x`*Htw~TKTVLjVF?lH51(!5ERTb@*< z?VVdC(^_^QHz!Ka&q|N)OO9?mSa@Ard>d|WG?rOKeaLQi$D-1Ti(j(v_4aOEd<*3N z+FLP8gi?TRt}_cwqedW=(#02o3WOo$rg|dQ9Pv`)`cfX1s`wo;#Pp?>=Onl!2U4fp zgml92bcmN+l({U2qNMTANt0^i2H@EqUW78O7LTGpDV8G@Dy5d2r;X3{1a`k0Rha?|*kHacQ`Rkpw<3h0Z zZS>h7i2c#v)tg?|yU+p-FKoB<;4Qh;aaBXcx%kaH)wq5deDLxDke=}7wp?Mf`(!Zf zcKOInYAl$EVIxs8W3wGE=f+WD9^-QVys)RX`f@=wiUV$c^E9(g#`5Er22Ccy%cz?2ERodkz8}+GuwDH-*zz z*+D06QeOZ?DKDRHIX?Nr9^=+b1`R$^KclU0;q&>qSD|#!NB%*jgI4$W5}uDF-1!zC z#{67d^lY{YH{q4G(oVmAX2SOJ?~P9J6ch7Dl6H2svbp=4<-3K82-ON0}6 zqg`(NJsL5$J%vf2kB?7!7L-FpQyZ%`pD$4<&CC>+?v$Kx&V)krt=?b^fmXgr!T*%y zBiUC%TCiRb6`G*&g$d!AWHlykchAXXeL3Z-PL_KTGDoEWbIx0EqeSK5Som+aYie3N zhr=kE;7sXL&7tBHnfoY`>bGGjni*jKU%lJMCR(UH*T4^Tpg-!pM zDt9O8EJ{(bb~rWiPe<{JZ_!4^_q|9zM9}Z^!7T%8NHE{V!^y!G#H>;B#(| zo4Qbk z@#OlI$g6WArG24hVu}E3fS%NI*!}EGy6f&RcTyx>TAHZ+yMssjwI1WTH^x+--lC^& z;?MT_-`i{J`)1<<^!QgdST?iYR%9ORr?n>TtMgJa_@ADWo89bcD5uL|dOs~`~c3}!NcId_> zz*uo!oiP?H{QT~|KQyw$7ED+UxzA`4elqV1kw=s8K70MXq^-Je)I3x>(&^3#ju-;a z$ao5{pDZ<;PgQ%aFO?3;WwvEe^Uj&Rh*jQuF=?^yUX=eB9*PtG0B>;kRZzhFBaQvr z^ScH9+6k5RtZR3$fwYeOf!4ti0^BC@^ZTvrz{aZ)BCif-sq>c4d$%kmIfVN?cucsj zupr^c1mFAQ4JH^jVSZDjv+OS(s`|e>eEy*)n5On_F zbmFbj)$7Vz%1!|K?WhF8N>YhFnl~^7UZ;UPu3s2iG{WT&2IMl>=UQI|ZeIt+ziBZ% z?*hS)K=bH)scDOXNRQdt_v)r~Z|UQtx9P4ePM}pE zS(f*mQ*`3kI)pswQTCj-;U%%w%REqKq&jQx{5^);YrJI`lzR#t3M!=)fM3i(4+Ww@FgXOO(i5z0V>9j;(;|6nIQ=GFUQ zWe~=_*K@BgQdbJNwBC%hT_yO*!bT5acPK20T%xY-Fg=9w?)Z_FD$88cY4DA&PXoiJ z5W1VNwVky}?D)|L>~%zh{_L!=a`nQr)7g~xvzcj+XdN}>KB+u4Ymvv5^sc5@q>%YM zvgF!g;JgW&uKP#{1Eld_HmK|Qs@cwe64&-OoV)rE_{M5{;o{IHG7f6Jb-TXEpQr6I z8Us|)Vt5WdSAmgz+V#lEaQ6|4-Pabz=YemOyP-xgIfSTP-{_NGBs25Lo z-M@y|qNSvXEU|WI4GuI|OcV+39KJ3*tfY2wda^$TA#cvIs z>r}lfBdl;TKQ8LYG3ChMR=uCMTl?b6YMwG{)$R5H2?ntktdLl1O3$;OhEuktL)BBE~jY$eYJzw(+{xRC!#4H;qRe1m_ zK2xP7Xg1cfguLL3Anglsz)Ru3UX{F`A@mWKhx;BN_nQtnX`hneuJ#Y486~yhY7&-e zcMt+ThSEH`pwvl$r(w`OliuODQe+t+nK4qNRVm-upkv7Q&ctHW{vA9v3i)|om|_3_ zDld=M?;v%YJLEFG#=0l%zCR=TP)6dvE}vr>RMyFp+p?^Ll7I6DyEYV8j-GPP17Hgw++7;NB)*rPAV7n|?%n?)716dt%{;c~Q1JtLjc4+3!N2So=6< z@8u-knGt#Xb!4qN;~*6LfB+3FTY=0D6&f1Iomer3F6_%Sv3U_+==r`VjJ;_S`(Zp| zr9$FDzb(&M9QTPpdnC12-^b5&TWkm2<;gkKX5CxX^h1(Y3-h{BNkMl$;h-!&{9`NbkyA+2Xge zX9X0&gp8MPf&xkI2V;uwk&z6V+t`AVij;Si^A@Ir_B3>Gsiz)RGW~AnXxLkw8EBH6 zDv5AR3hT{w;`TP`d9lO2uu95K-&lIANd?N3*Ict{ZnXNQ;*k970F?HJcixO%Q#IA@ zBv_p&HNug|7?Y%~N)9N(HbF&Zr|J^1A~Dgk^E5{_ItD<_(llU=T*T>yal?(Z1 zmgZiwR^G|LN2r_kZgp(7HrP|F+1%PXG~OGwa=@76jusRq6lbw$eo43rHY&#kf{*vg zsudBD)*}zk_jG$>Ma`>2QK4;B#jo5I^T0nh_O7?FIq+g4dV17g$}+U23P>F z%`m`$hTG;lzd|M9NQgKaR5NbdQ?(gR@S|aE0{=D?Ew|A^$WhiT2mJfWlzxQFNT+yR zZ|#gdq2r8a5gYn_!bWO`Kg60s7c-#}ya#Z$<1h-_JP?ST&=Vtxe`?AW3st-?1}c+v zS$x`)4V`~MFS(M|={?g8W?+KDg{vYS!C`^n>2$6Xp-+YpfjN99^OTRMi8{AmpDb4S+f{;&k_ zL1FWET*_*N>nFAGy(#2cN1PCwytQzl5b#NGW90OM)C zo>#N5eCZA4-(cM2l{e`mhLsV7L-O^z`){A|?)jg0c<>QbXxU#9S~pT-CX7Sd3IEkU z3ddNvHcltvEv0+TwP>_2@B^LB3SM70_}k^b+z!Mxz$LbIG5y!^xAy+BJ2;o?m!r4F z$jfhJH2c>^q~B0WTFp5l_){nvkWvU$v>7g!wY6tRJN}TCr7ac1A;}C8f4r(mzx{cG zs^SqZaVN$mF+HrllGqrxcVUmNQnqW4Hu&7WS=`WbM$aVc*hCAR6AXz`mWWL&7rDc3 zi&#VQTFX(1qism!Ue;tb#?{o}gynoh;sh@MXUnWHND&8Yd?B6i1FqxF!fhH=!x=AbkyMm@@%G=o;*;%B9f!sQ zeI45`HQ|9$DtI(6hwDsxwfQMH>1E@8P%y@q|7h6n=D`=2i-^{y)0>oh#G1|Ay*2V_ zcKsc^ccd%9p+Zs?f`PVZ7*VB&^+b&>;?RSqn;G1`(B05R@E+5So8Wio-=;&Z;u!z( zuM{V^#aPM&8YD`l^LCf9HNt^cltt3%8MNkmnlA@g=UJus;f}oSDNXu^4#L{@6AFbA zaZJO6m`Hf;Se7eZwRKkKzS8y8?38H2C+fW?1~!wC6$!M-yK`vFoRcBy9|H7qfEhXf z8xf3);CF}c#VxHNfMkSorjMzDACiSVi_kK~D4KEetv@|<;WVM5Pr;D-qtwle67eFw z`F8(;qx}e17^vH)^B`U%P7=x5WbcTc)n{r*Di|M`ORS#`Ji z7@c*KmJdL{*M%Cnt9xM61Xpn(w}p{-Ky;9Mv-}VPm|%AhLG~1mJ9a z0BHS&D}&=~LWcV#c=trG3D<@D09zpB{u2N!f~ES!3drhCn}U%Qp>Tw!w;bE}sUYd( z@rw(Pg~tLT)u(lQ>4o}|LzEsEJcXLR`5~Py(j{#B+nOJ|t?$YQ1cxvK3X>kocL?mt zK|ImOrMl}cX;_$5yjj*sBd_dT7yq#LzU}h%mmMgxo9GliBy8{wOK>p%n=4t|UkR5` zppwOlOUou}volcc59QEBy8nU4|BI6YwtuPQ2l;dGs3(}28zU#DPy_N)^3B9j#q!2A zWK@^9c+}(E1jP7s^Ic5Yj^qxmMyMl7%&&IEhbh!TuraG#HxQb3kGdy^`-EacpwBZ;dBNPKK=1iY9yQM`NrGb`aEw# z8O61Fs@loI#*%=Vz35FYrE?%T8Iv0c!nir{+YxPal6UL9W+$krH}vZN_;0a zBfvQ}wa&3EA=&4t3^VkD5u5A7-mnXPg-kCQk#+O>Ac9AZB**w}a$*y$>norHc8c1)z$zpC{<}jre2|d4 zyfEKG50aZjq;F+5bFv6mnV(U=GBUg{b(3H1pZl-jKw^*gXgLUdy`=4YZC?hou!ZC? zzI}Qx+vCBdKuk^mpwD5Z{!CEOW&Ti|yiK15X~ba*vVG|EXwjjCECPBBRDS(Hyb~Vo zKfxf$`K~UE_@he*KS_tDG5f`W>r1bR=`x=;+Uft(4PsaRufpX2XPTk@#l_8*#*dnD zR3Me1P36XV2m7IfQVhjfJoEAl68fKBA7Y-g%ij&i{~he>m*P@&WD?DXj112$RL8!b ze*a$4d8KHkx+W~>6PDn>Ty-w}(dYMvKeEH^ilXHLu~>XTPkMKm=ekTfv7EOdfe#cMS|ByIWK zyF!0yMQD=<$n1Fvl*Q(ABz2lnZ+n*egNmAdMudy5GVqs{U@$y;bcZ;;pOTc_19|b> zs~30RR)ICUzAvaZcfP#rE9gMj)qxE1i9MvE`E(~*eI%V85)6jG=$}xnHf3Y^-hQmz zcK0mSx3oU7{sN`?*isrbr(i)hXxDUauP^z}O;3Yzk?!zuFmuS9Q@J56NIJ>qU&%N0 zhzCPmZ03>xM_RQywU2qD$%~rHyep$L2P$&s4YrQ9OmQM?AG1}KcvWJX^JItmeqa*d zlvjOAdVRQoQwh0eOG>V4LN!uRx?PW+9hj)fBww#rNE-;9s<+^%m_TN(m%&oX*A53J z4XMXF6z_e??+*mY(3bO}K~-HtFRkXhmW%GLn6X=KccWNS+*#w{zRb-@Y(p5bPIoq=MgyNXoZ6-p9t4Md!X+a#*p!X5S_so=p_-nm${sk+2~=Jt zEtE&6hX&_pi>Y8RuGSfX-&vd|q4vu*4I9CpB8pDZ6W9VdOLQ>Za6D99`0prRNF{sxtw(m;5A zpDa3=@vO_lU&OeK^{evOefHtrB05oepg|Yjh4s1%*VY&sM-u;D0v^NS?ZHxicu)HG z#%f?%wdC;Nt1=Zl?AKqG>IGE0uSntt(>7=8f{eC^W0@cM>(Vuqr`rA?!n*9A3{2gw z5$v$%_eC0u#W{{hG-4lZHFGs6D?kFWuNoQL8=CH?WGg_@{Er-LG9BNyMM6w!B=a+o z+M{PHjFV8Ey~Hqmwsi zUF#Pv#B*q#8cB_Md`OzhI6Wm;Gdu>S0xeaOwHw)g6q@s0S`g&@%#J{JjEW?JZvA6y zo%~LqYVvly<0#8g*iK(kS5#mek0a5U^Go{&9rUVT)gHPH?DPNC-TqBp{qx%X4qt-h z0Y9x!WrjjMtu*x8xRg_(%u&X%^mb1_-e0jpem?I1E#SwG``qWav<;tvP59PKSrN0{ zVQMA(h=MUhNvcXdZYPV2u)KJGbKiz|`?Zf78%gMoi`mk|gJGX+s>b6{3*^nhY^VLR z#>mR1v*U^fZFh10pDoGb)yJPt$z7#dbgJm@_7=P0mcAcJ`4ptK30uJK)CSJlD3c)Z)MWBCQrpXpUEZ#R%oUeM zU&-1E`G!I<{x(CwVO;|Kt?GPQZHvZBE6N2ej+xBve7Vo1z!u~kjIhr|Bx!7fS1MB0 zXFCZW6|gW$`bOa>T(7gLnSb7|flI!Pv|XK5M@bB&)Qi(`yD=>8xo!v#?4Ge5Iz?~dhtZ2@A?Wg=`Q+hcl#fIm@ zy}s|l{su`VPKwK-ge?C)6o@WBX;b`Y?js@8h#T$HS=4mtAL=2aP0; z8&)!NgO>%@noCuNHJu{66~e@=quG~;#lrVup0|SBX4^&wGMUVU?n2Rx28W!QJiR*0 z)#M~tLi2%w&wN2H&n60A*Q8YariPU5?ae3hLvGI6V@wUghkR55m(di#Eoh{Rbe9(} zPK8*Q?$=iKz75jdOg5TJ4|TF|vP)XK{<>z+jgbwdoJZ?e9Hz6q-HEMEf5YMB0js$Q z)BgEQRnkJ!{4&xzK?P$y=hvN?@skd*2oghdJJ!W|`=!~6dom5|M5&N!1B>K%WR6{pdB-{^swV2? zN$Dy~>C-OzqDi)W{J-=d)b(#d=6^rbd;lGUfSt&?yqun1X51vd7w|*KVUp7$&Oi&I z^>}{aHEYAC4~*KawmtOL|LV?bH`YzUQM`teqfyAHFdb+x2zO3K4=6`#M(wX9#?Pnu z^6nl9*lRQS(QTfu>8{y(7EKX)4uAS^az6&_94Z!3{a{?oB>mdebNg_0xIHm86%G`T z;crJPdB5|H%nuK)zg_x~c76I>lc%oGGa}RV)cS#LXo3E&=V5^(kILQgu{b0@vVFhp z`7!#Y&UDe#g@*lZJmI0kvXN+EepcaIL`d`fBh#z)HS@idqrywSZ@DWw!F&?)URaqX7L&_`qS}*J2EvT2hT>`FF4&3|a z5sHFQuN?!jVYQc?SQN`#&SyFy+_jhJ zQhBh|t{rZIlPSeuv(y{(wwpLvvA&WbIfaE>cJ3v{p)5T;&x{Da7o30fy*)V8xSWnF zGxcVadC1?59$Tbb<$rxV${_Z>i^4Y|z*LMedg<6)N(uSg*Kl+b)Aos<U zO#j_F==49IgXxbmgI&M=UpC)g!NAy98Q_oYmJyPY>MpL}zUC^F#_6j*t956AuU7e^RJLG=Q`%!BQ%f~9-QFJOmmndj25AtJ z@F}N1Rh#(sy0G$Z^WArsZ}7jj&%dm5kP6H`+nIdbo+J_cN56E?ot?LpX&e6bbA~oV zc;-I9ZighY&vH1^41}blh!(LV`nFPzI6w7Jo)(-Q-EPKI{~Gy0j}VZMW5%`Dw{l7e z3@%+{l2||ndi8??ygiK@a@MZ0DordVKZQ;HkT;U%pY`G%+=py%K8Z+mwj>3&l&HJs z4ZMyX;>Z1CJLj|Q&92~+PVT}LFQ~dx@ML(a5 zYu8visB&Lj1>~E~#_AGwm_C~A&k~H8Tb?AIZHkWL`w z>M_GWDw@Tb^L9Ka=QL8yd#p9ZrMtiTBPsSqrIRpFBi?#6wo6 zIYv*XZN>+kK|$85>Gcz}{j7ZdM9^$xmn|JkF=;^P(XN7mfa_8`_p8PS7{vF5vI&y; za7>m1e0bPSeh34Mg9v(6A^R&fCDtlc+ z11t%@yW_!hiGb$=FlK;~iVCN+1vn?-cW2|~R==Gb9xexZpx{x7s3j4A)~N^rb{_XD zGxVTe%ggP+*bkt;*gdipU{tHzjA3VdJd<3S$rlGF64&`-{YHwQ|_68 z5v14lpuX6LA>UbDz${o%HZ)3Yd#ko^&rWlS<`K)W>^-_F6{S1dtrJ6nW5-SZF?I4J zk}O{P|7I0Zlf+W|?(0*n&|qg~mP@RA*_tkpO44YsSn2d!T3gd?%J;ax$jr?AD4XeV zwHHGn0d!>kp7Fi9x=L5k(MJl%Rj)I-FW@3%#6}v*w@(_-_2Q}ab(-YwB35-sR5@bS zznSJ-rlXDfENi22wY?*SSX;XtzH-|0O0@Wpp>s`~e-Ns>b?3rb8t5{!$57)bF4!x0T#_a&$mR&4%Wzw2EpO zTRx`|;bUMWJ`D4GD*T4(99rKeAF|Pr^lt=qHYxYqZ1F3~MUnr4$+ohu;r(a#nyu$M3#Q$w^4)WEhF(egR?zsdx_Z_3 z=w03b=#flINm0K(n0dU~2c|Nl1LHNehPTr*Ggpgbv;6(v1bjHeG<$xywq0qx-kTs7 zO59El$+es*oyil6dU<|qYiZd}1ekCgf2Rb1Z*R`d#+9q&^F-G=Js$zuWUJF*dSQZO zK~qyxSXdaa%Mm!K%nb?(Dk>_9ii!e$YiZ%u)7PIUkisP;&59)Ebv~FLn3$07dIgx8 zC%{CI+mn?t<+4Ofd4NLa;o$)gnHB3yWH2$~IM+NB=OX^T<7ggTjvC9h^S&@7BqS=gQ;<-<+&0EiP(sx;X%{n5Nm`obP$Gjv zUnM#>*=hBepsFGqu8oGb3_V2i4g zj>6z0Q^5A+BpWN;l(V?&9JW}t<5cZ)8R}pDY``wx(*lI zwnNnsqQn`pUeC)4$i@!tLk_tI5@L0E{U*C!{ZD?b*6e3ZxJ~xuT`D>3lh4q}FM{A( z;@RozUZ^K9>jD@F0U8wxdpR@|9MyWLW8Gk3-QcWdSvIR|eORSYQBkI`q@h_x{X?@P zH#ijeUG#f!Utoy_-oG+s=(^hDHx(<%3wzW=WDW zV)ax?3=5~y4ZWnhqS2CNtf1UJBgdka=B>j`IBp?*zA3kLAxhXUXP*a;tA^`<=c5ckcc^}mM$BqjJ z2M3^0OBTfH$U{P_i|t-Zox*!x9HFv|%tF);y*2^kwh^8s|TuFe6tAz&1MhzPU~ zpcFh@Ts^;kCj=B#RSDWGH(xLS9K1AOV^e-U1!Gc~676RZ5iMsKD_2e81S>172yWwibM{QVauLm!q? z-s$IaQqsUJYTv6v_}6bQU`M9sFkGmFy#vysO(Xhu=;w*#~_#iXgZ`gD|ga?C0*htROW*bNYCJe42baARg8q-&hv5z_TZDj6PUIx~aDe zv_qkQMxj>=!cxZ~b~wGDtY8XZSZ0iWM84sI=&T!peBQrQd zesM81Ir-;~SmUx3f0>MT^a+y6wAfL7b@lbrQ&Wm#3d6gX4)*r+3=E2jiUkTZKDCY( zva+%%(l`L1+&9C=$N&2EE6Cp7K0G`e1Q=96BBA@o#>PfR8M|r#$pe_3-@n)Jpj;0R z0(by$6R>hhO7XF=28M=liHRoG)+4)@b#-<4fi?B@0OdnchT*@`&ebVlb7O;uh{(XepuD0&TT4qVk)4BMbZ7_$@E+%P=Y}9ySlACS#U8FKEG+!) zSIq3}o)1?g3hMVwVilTAjkUE3vN)HT}n_CRRL$(op7csMSc?);VOiZFYR9RU;Tih@m0Y(Nc=y+1MCi*C+-|b zC2^<|x*9GT+PFz26I2b+5Q0X5KcYLagf&Vs(S z{=_tmj0pwmlf-4et>QtuX4_oqY*(dqD8q{>(vXsr7(`xMh6}C$Z2r}3F znNvBr`!l6~w5Skc5-%^WYN}JR@xs%S2O1h$igck)Tw0o?xw*R2tz=0DO1F`b5kS5< z-}oKo<>dv8??UNT8c9zSrvPLmFfhTCPKg>TVU`&s?@!Ahe7P6oj~(rC?l+&tx;iPq z9!W^l03rhji6bb0BxRh&xVe3v$O*9VfqRwl*3E-qIqW+Acb5 z+i?^543(%MCio@G;Z7W*_4rRi;&i>Pal+MZq#XHM~FUgfHU zzXdr7>O?FuOJ^So)eF5pVm^!Oy2%Q;=pfkkb}qs#`ei!D?RT5rO3<7?h3svSZ9s2g zg(Pv&lD6f*>9)jM8S#R7zQaRpzgW@keqj?kwnCAW)^%e8Swx?_W3P-7Oe+cvNhyrqBB3G7(8v}fJ15a3MR zzFR?}ugd8|gMx*$L3Sa$z`7GkJNYH3xgycjzABu!CdBU4 zS;gZ>mXUY&tFu0Xn1aN`#oG++iWY2z6+phD-KkG_+dOY03mLQ{247MVpVSND2?=lT z|1jGa??Rz6IhetmnCcEbITvUzr#u~srI1@muCb54`Q_@DQO|Voa zrl~$xPNz2=eExlI-Gb{<@ZcGDvAGm0oJb2|-Z*{AGK;Bbx9WbmZ1(xpr?ER$-rK@7iR_F4%sVk^VXS;7w`k`Qq z(~AxK`nxAotU&l2$(#gB^t@=c{MGemjwSw;C(Qj=UY9DtDuEC3n|Q`l5VRiS8Pgfc z9_$rtB$Ou<=mP`_PcBtqAEmgsMlD-RVew3GD03yH1pVnY(YLmT%dO4R-Mb13HrX% zZy?CoL8upuD@0|T4&;Mz_cpoW*ym7@d}LEK?0V*%!%9`Ia+PX=5;3wMsKSS`svwdn zNXHxT{TD*Op927vl$e;`ac>-8U>?uc<)k zN}pYUH|GbfVx}{0wi%jZNXQ26ZAs@9(Qv5 zUG_FMjFLr3z%U?V^%a7izP_~7)JOvMP~(NIt*z!}u0-R@iwn571o=v|te-wjr}2B- zRm)}x0H_*pAp86K+fnN~J0_;4MM|{BpHf;YK79Dl8;*~Oi;IhoztH9FZEJhFv9VFx zQamI{D(H1|aKK|WP8J{{&mmY3;Qh!*)S+5{i3flbK>Gna51a!h0RaI(Ol*u+?;Lu{srH z`vfF8UxrZ{Nrcc4BP%hGk1#Li3S9_OI#g;E-(8*^I4~l;@5(#wazEX_y6PAM9}>KO zl>Db|qW9w*0@d8O&tYc4*O}hF>Kj`bX z&}bE6m@j+3Rgq!Q9e7hu+NtfiwzeTEzT7*U`UY9CuBN+;2TSPoWaI>a{YW1yn(+e#Rd75PRfZ&}mWGG<>VT_lt?H?eW z;hd45Aw!_+K-ZplZ-9=YlCnh=OCR=aj(KPgq~S?tXvY<>{yQ>wFWM$ihgW;LQ9%o& zVpG-$;VHR>zl!?uFsqao2LU!|@4*a1UHG6uO*`ALot|sXxa#qey?FutlnFr*LL8oQ z)?<>nyt7Wfu6R_gKj_2}(fh9#;JUQE)G&vi8K(K|Q~3;%h0s3%0Y5;ChSxJP+B*7s z5WRaBotgOpAn8DTwVV4VFNuGSZI`R-YXF-hdM9`x3a@zH(QmQ8exy*W_-G28J ziKe`%N{w}(2jX)*%T|bW(bL;rBf-YO(E+%tiz*eZk3i%MByjV+JpTL&kX%Y)>To{R zD43*Et&G>Dw=ApWpa-An=1N}NTm2X_RYt~=z-=1sXl`&kAB!FN+T{JBr|`Qg0L!H* zJXxVNJA9tqTqMANgZHehrMq*=`77Ys*a%Ugx;J6y{eJMho|3GN;HqgLI5}|>G4o+| zf5)_zT%AAYSf1`QKfw3LI_gqwb`TT=XVS{6+f&2@lf#oKyf{nQ%6;T>?TulLE}|ag zo7P?8Nh8&xhTr0*z2^UPv5CE#JF?qqlFbICG4AW_C|^obMg1r7rB#!C)lnqA%nN6x zgq+Av_K523&<(eZ^Cr4?VbJ_eu08<_-uI<66P?J~S!da3V5x?L6!y=a@#&uaP~?PM zY9?M98Wwg<9UH>Pk_bdu%7MY?uB)kkpdyZR#8mN zJv}HNmpK!Nwjvxq8O1}%lv3v(?TSXBi#pc)JZ`+o5=5a^mC zRpa!-t}-+!JimQBx3?65;?<+Jd+SPQhU^M;z@$3mYY9Oh>q33xFrR5urY!Sq0*P4% zY4qtNLpOtRAlLSEmdCo+!Nbs7LS$!%plpfuobhEp_K1L9E#0^5i#iY4$ad(igm;L+ zrN_%=y_-!MS)fD+?fTLr`*(=rI_J89XJi?FYM(8D6J=BKZMT zF#nA;AV8=&HYQ7tQ%6roOw5qP8HzTEf`XE-P@+)6#>~vj!NHh@p7QtUjD$}-!)T(a zF|wJ@Vxrdse7z6wVH6|Dz&lxQwa={ZV%IF8aQn*i-Ol0U={&TP@!sS~#0`DT2btLG zcMA##luIOpTS$~&1By=VM%So=6G9ztWpdGZ{V_`g8x128DSg$>6xzL|n53D^V<#FR zf2&QuT7PoQj+I|)7 z!F`64O<8M+2rD7k+GzgxGiWS}`y(9WVq_%=u-)0bs@a@tD8E&8$Q;|-S0-umy=7$T`mp<$hMr=+hwv4+UF^v@?S z*V>LT^|ptJ+Sn(N#jGw9O{6M!w5h&FqkUy)8B@wkNIbV5hABy=AEn4E*?5os6c87| zkW@GHT}dWiWnImb((tD{A?1?M~vY>&GB`A=>5>#$idp^54vx_R93S;=}c7l zFI@U3DNjj{ZcZVsC`eQCm7FU-ub#{lwB%g4`qvj@jYH2;@TrhRlTW(2Q9;EB=MRskZyB8 zr-G{sCwsNlcrxSO3*n8U;lX~H0X)*&xh5vAkOCtio)F&p8x$L9#Gg!(-FMcs*A+Lk z8dKLu?BM}15Ir&kW*8F~P%{X~PSk?Dt>b*@cpb7Z{is$qh@FOIf+dF2K`jAiu1)ny zGtLMpu7)v-6-5VbW><6azoH3%F_XUbIY8PJg1(@?+%QvY}IM z(XY|V75*u{h559u6=~{5XYahn`qK(mWUqE2o!FiNUj5!wCZ20b*iR zh!u<(U}{*bUx}V3&4epp*IRd$!era2D)3rE*s$B$@jZDLJ@YQCqIDME)uV9v?u6l9 zdW%$z=_RMR)4tT^0&8NYVUiQ~`W1T3^6=!(x(x6~en6|!zbzPNWt>ANcoj(;f>k`} zR9ugrGQ(>*UHBeD6#HC8FvFS;j2nm6w*^tnQ*Nl&*GHt2oA6h_i#Dvo3#9K9YoVH$ zb>m~{7&n-|*0#r^n7?nDJiojuv=L~SNJHdiwjKi5i)KJxCc5nsUmAhL=<$=3FU2?RnYJYL|bvVH(+Q!nEB zGDk!_XnU(AU)7=O?S6G?5AotI^Lt)sr4QG4Ls&??j6Z7Cc$i8PFKYZ&YpvqzbPIR8 zS(!3@-uNOi9gzbvV}!+6zC|1mcn6u$AYcs{^SURizl>y&E$@pR!ht?*5rIFc$0 z2(SMG8l$$0q}tZ`jjC;9F&6d^o|9J1Yu=xyc?Wo}5aT0w#>eb{_t6goaIh}@Ti9u> z2Ymrri)m_lvx}CK{RQ9v-qm)zk6$bNL}q&p=jFou32&jl?d*IXSqeYl-v`tEToQZ` z^vc9upofQj|0Mz=n^4q6PcR$C1K~-hhWf1?s1+(oa<2IF;v;R-h{U?l{wAsrqg~t2v_4^~a zP;et=_CGb1U{RoK2ONu$KmNMuDmY}~DdDV2FK-1^WGE1I*w(v7=hGR(z4(-{)2!7F zo3Z=g-V4_9z#KyFjg!Q~z=v9;r50ikXgORnMi{Xm4t5|RMfOwfh3;K5Uu2l^%`>_C z1S|@u1?%@9?qlRgBcdMQ-2N{V$XdQq+?Z#7b5$FE_4g|i}eF+bNB}p27 zXbnD1pY(it{7*Aa4<|wq#?G`M*k&yWz=33amL5gd;B<^D zi{u|jvpgSU!jT3QJMovER3kly4%N=s5reu-;B1K8-ZRMSOhR|T)~I^pu{E5I*6m($ z(;H5=m8CInM-l5FaF5($x;Lqk1oI+cpgj z>Nwf1*F0TlpCw++o1^>hx1BbRxgt<@$a#4uqGfneo9234h_XKzdz9LbWADr%+GKs} zZ?o;89c_jdE`w5c@0lJPmu8@|&qSKnlSl*G#TC1jv-R$Go_E0-!KW`zvukLz_+)!3 z7e6Aht_Znzer0yyO>Fd+OS~-_1d<~7#kN2 zYDdU1VMNEl^KcUR5XynZ_om<10c63;nYwm2Lr**o-$|>@_J#+B-mZ;iP8D$8KrA_=Twi#Y5LjbJ zIWb!!HS8vZXBwgM$@P)T6;Q9LhfWU zxHI0DW=SK6;Wb%bIfOfq#1on54X`SZW!*wR+PK7=+WuE_B9hbNC2RrBsK_@#SuVUE zV*$R{E`#(1rr&(M%in>*n@<^-bY@E&D@_@jRCQbY1n%I{GxJ(li7;ig+2y@wsNM4U zKGvmhYwyG`bjE*WZU6VPD5yfynQBro&~#)!7!>5HQ4NfRdgBn|5NE~r)j554`jqR~ zmh?-A1moR%cI(XrO;{h;F25!^*{eI@Y7l4$ukp|-Wb15wJ$4INSmy_p%A;>4@Evfqaa3F(!Xi(`IF~j{oDay`3~mR#D&P z{<&k1Y!;vKFfZ4_^gYp(JjGY7E<9IqYT=h(B{m_PV?btkvp ze%Bx57lVuEP%#)3vwmJLqmc3;cEneseUsG<09FF0{6j}4?<%0)(HAgGR$HH7hgL*c_P4i^p8Mo04Jdafz51GKK>)MmN2ChI#Oq)Ud-4|;&2U5{=& zEKB+Sie~{3F^D1=8n7}HBm#j)fRbBknRx1D^3eqWTW6{(d@n&hg1&wbmUV}9atLxi z44*F!5Eff*R#pWP2yfoa7@z zM6wXIkM2EUtZjq&{b+2v`oq1P-Q^`Zfj$BVP$a+5wKQ@LY{~8W^gZf423~8@etgjQ zcD-|a4Gxh|wm@(SZ)Zll^&s6~$S&!c;kAqVz6-wOjBb)s)UbrdL)!NZ*u^|eE{^rzywg9-I3LMO zHT0q16R&h=Zb;Tn7HiK{dYu1$WKn&GN8(xwfmGLA|r<|fs!3_*u?X0)JR zob+Lr7KK#~I!*@1!i5hMm} z05l|njgF)Jw~~Xmnte3qMuTDd=}5@n$!uAj_BAgbTiTKnSM-Br z-kmvtC}Gqg=W8OoOsUtG6`ZjKnO_3M-jE17T<7p`&vyn;q9yt@@eyd9x8%bmdR*tQ z*JC&y9UrA^&`n6pp*S@busBMs1U{t%dO5G-qsSsNCwM>hLmVJ z?|h#8oqUOh3qexz%Wn$AL0~q{e~8kD7z813R_x&P-Vl6WB0>)3~rl41w$=D z@d!cLVa|iz%)04<(0so@psFXGd8fN7M0{+7#C+hfprd^GA*-Ttx+5Lq#Cr(&P_Jb+ zx+BlLr1PkbZ~TP_stM<35e~|L0(H3)J=-s!ErJ`g2j2|U>H{qfKMG}=w4?wOErtOd z^YU@nH>A_U1Fb`W^oSv_CLkRUT0iVl8AU$_@V;Vnwn$xuS=GbLQs01S zRW+R^Eh-HO_Js$_wf%YV(81$wZIdjGZPq6c3hfc2g2yET8gm&{tmFL!T0|xjZGsp0 zUbSomBbtIt=GWcY4D7!hDET&5np`PmwFbW>JAKfEAlsmIM92Hk4=q~1k;==Z-M-Rr z8+VazW9{tFXe{ET*emA8humT`Zv-#>v>y{7);y2Px;fsnA?qZK&IaLvG`pl3wjrD< zRtGe$j@{myQ~C7jH9t=4?MCA{8ctM-^7W~k-Cwq;!(3gNd{i2!nzBe0Z0oS>vdj%Ea{ z@|JoB{u|inH}!9RSUgawk>|Bkh|c{%&?u?e(P_?O!5eu76YtI30n#PKFG*kUAeUQPT+e9zEqiJHEqhg{%DCiUSQ{vdk=NfX(yWH4*odGRw(o99kX@c#JDR^ zWFtR0+E=(KJ(LV$1RA z+k`@)eTo>NdCtPf)`f;~GZU(>_YPvzehPqD+zXjoL@}71rqtx&Q-4lB6emmERd>9PN$FlkbJzL(0_0G zOJsE26qxYF>z%>ts`dEnLQ*TOAZfxfCi8LkArJlt*+19Ds6RF~Zhn=q@+K7W#%tR& z{-cR8jLOX{ioF_0?)%X(eL%kqZp~XTd44|8cb2h*-q0v491t=wJRu#~bpnyrRQ9`& z%TRqw>p?&947Kt}&IV@V0+-(hEs5+XND1Seb7Y#!4~Dm?K?f1o4-YVOE}Pzta&{wH z)^GhokM`=Has$CQBK461AFLupVTkEQH5c0DJ<~W#Z7)TytyhaX{|AOe2t?qcRJ3l) z#~YO3?sql?`0QL0(@1=@Zn1e)uSb>yU!`8f3;3w^YX{~oSV01n2)HqIzMk+ zx+%1oPQHB!Ttp4}$04;Y`;s$y%4 zWv^!55N2e^E)mz&kcVs{w0zzD1LJ=CF4#6mR8oo{NuLrOja7fHQ!FaBXTd-*l_W3PKZI+Pe!ShAM!OOj;!!BW@*q20Q%(Cag zaYVBNGOY0Rhk)Yjv1ZrH=1Cw^y4}&ZsXsD2fD^l$J2RmNHXmaoLF-7qyG>Y(ll!ce zLXW%XB4TUn_h4BaeQD|5OGEop9R!-{BUbBFnU)>ZgO)|SIhy>vG~x5~S(sI#O5GdB z@R*X4_aMeJ*T?TDmb`XyMX`jWA=6?vBqMryW z4$lc``XcylYuB(`cZLPgJlza#&t%t75Bm&Dt?*d9xKl~DlxZgEKQg@+#IT8>47tQOh-K92+qA>gJ%UTHE_o&1UO9;Xg%}xBSap$5lE@Cf3OWz(z+U zVNn&F@n4W>r2c8=g8s1JxG^31k(ELU;oq1)B&QY#ohRx3MTMz^Uwx^omOtorQFfOa zZItuTfS5zeLZ_0Ha$b%*G+8Yeity&4P+rmUIpxS?xp-meynT1*c7?wf zC&A~qFKlyco2!_~gJtg{se)tN12l)G!j{wex^1;SyJTffl3y@qBK^w1g z*k|UFg~ICX{n$$bHQ&YW7A*C>CkAB&>V)Kgoc4VyZff!`X8Vclj5+YhmCjk3{8>uz z8y3SYquu4QjDZ;O6~E_*O2%cALJNsvp|j%e zmxs^U@i3rUnA}hSk~(L!!%JJF&2NO`2fHpi(;lk&+cEUdYP{hquLhb1LWV#r8YlA0UM8D_XW=@6CRo*%Xs7__; zZRL8St&lc%EFC=)1x-o7s#NO4XO3_x&H96tWtCV{8VXF^Geq#lhtM!kz*!S)7~v%`-Lf!aX(GoXA?{ky;H|Y|gm3{u}1-KzM4eqccf<_Mu!(^+Nf#x4=dAS7^>2wEPErral+YH&;M|* zWg9t_$SR8Qna!^SuyK9wxHpnB=oh_&H0PahD+P8HftX+;%P0wrE~W81@vt zyRR}ZT-SnR?;~(V;)MPO>t6v+{s6SuFg4lQ*i<%le|+VpqN4g6a!y{2BdZw3#k~K- zR$u&hqvfC~wL&0uy|V0|H2TI_!^LC_VUu*ll-z`!p#U0tyxf9&-^lHaK49Cdhpv%u-|NV9p$|-1I+(c0xJs(P8cO^Lg`5x9$xB> zl#U{mnW3Sc*$0MURa)JhDt7B*sebP_$Qd}NkEBF5uAB3>(8`pyJWAoe%Oh|b4@rq` z&t-EY-C}CJ#)J=q`*E?_s@iM3w2l&O>TD5d6L#i18&3E75(QXS7u=d&9D*<}7TbH) z$0Gj zqlFP$=6(&>(8qafPkLgHz=()D{Ib6aAGvDTc(VE)s5O8~psCS#!L}acUw~E2xvmL( z$7~9r0B52=veUPuAtNu1M1<71z49LUGNQG43H!HyKnb4ISmVuC`CAMnCx(hd;pr4X zZ!~2=j+=CjNg3=v;3wZ?CmQ*#?_4f!_`+twYf%J zCTMXe?pCb06t}ie+}*9XyF)0Yw79z#cXtT1#odF4;_hyl+!i|d&gj9uQ_Q{mq?+{#GE%!G3+#mzWvhqCkPy91g9o0r&yZ`=~X zp8SrQDGVJ^6=eVX{P#`#9{~7@Hd;A8Z$bnbxH7VSPr;9XZ=bjx3|k4f?sz5+ub~#* zxTN0H;aC=Z&ie3{quavlumx-+R))+94)nPVL=ME0k!|>`X4xP1IfcDmZV36fLs-9s zOF1xO;7eFeSQuIj^S$k72XPF*K5~?`fq^@HHs+%JFvx1z%eKhRkL?2szSsV2L^SrR zR7hK+{u9$ca3De{lc~RRZ|Qtk?90-XJwkW$^Z(9}tv-@5@du@j!3qKDe&SczC~R3f zX?S-s&q`ru0R@4g3azNfMXQy#EU|dk`nX0>V;l)QTajnE!^>5)G^+_6i_l`elYY6#W z>dvm*fvDEXH49Mt!>LkMft{NGo;)nt}`SofghoSLCdyY1Rd zE$kjvaLkqYBkM+?nSIYz^`pDn^u+1ni@&+!ZOBy0&ouv`u5Vp+KAriXv1pvOTH8Z# z&x-u=_Z(zYJCX4R`-9Lg8d(Gv%w--wz>2gmTRs3;`Yp&-(x3+^4p`Q6+S5KgT@w;p zaa50Vr?za??GeqFim-?3fGUiDK!YWSP&wrlZP!GVh6ZPt(crv75J&o1?%;)#h9wK^ zntm7E9bD0GI592UthG7yG;R%F2D};t9s%hku|ES|dO8SWT}VjKbrAs5A%Q&)!m0f1 zN_D}98U>0yKij)@fZ-$T#y}MK=tj9lxYI7y(4Nx8YS+Pc5ug(a-V^P<>!?bMpR`*Q5+R;5rN7ekAFuc??t zK%nIXm9|Fp)3?FN$&DiK-F@v#{T}AN3-eMEg++P=?3NqNtBp)F>A zE1-%T?#N`rg2swxT>GN(m&p|*(j>|wyK?x%&-QjaA5XSw0X=&yna$_HtV*#lCN6wh z7N#pHm%(BU0?Y~*`1hSnuZ#9;bWK*PxD#sUExYuJcXH=(a~KDtmDIq z5okB(Lie}Nd=DLAaXX5ZM=N#FRl=6KT4{L1iEj`AT&i*jId3pl^#wgVyy1A1QAXm2 z)83Iapx<{lHYolKr)H#A#X935=wbx1`tXXKS8xCqY%YWNKC=a_JxbGAjBi&Pe6?jCrQS2Ac^*h1AQEZ zra$WVr{!)k^PskWs#(GGztxNr&SB!{k<8=d!ed72R8SyB{21~GVx6QWL23l<%vLqB z*hTykc}i72?7}#&LPWN`peTx{=43QnSTgez4LcPPJL1!Z!pNqeSddx|v#EFw0!Gt< zFZ>1_CH(%_GHo0z1>!$kmJlFr5Xi={Jxk2WXC)y}Mu7`n?%hrl-g{U4<8cAS=6#GD z208y*zUJ@A#NDb^_%)YF3IQtw@}_bm`3Xdu(1YjX&-r-8ivW>O$j)Aru;m2~>q zNF_(Mu;&HI^D$!lM13{<^TEpPWpL)YCVcG^V;g>=oe!NLpY4Jya8h|bI9&VKE3an8 zaPw%azBrw%=d3=eKr=-iDlEG^8;xkE_lltHXPd*Z#4%aC?VE$##tpkF2Dh81NzaP1 z4FfE)(Lbh~B{- z44ryUeV5m#S?6e-^sh7M{@&yQ1vppQNRv2bG&8ij(@~RGzg^Eybh=NO{3gD*`#@!4 zL*mM6XtgyBryekDa#hBLbHPOb+N>uE*p)|FZmrRoU~ADdWe;_1Z$$}HinaMdU;hsf zB@>YcIepef5Z@+(0MzrQ+3844`MLC$^T7#Zwj9T(06}aAqz|CL9^M|py~1O8+q@Si zh6WEc1OfzfNp+`81`x4J?KgV9@s~M@%aP>yqAIe?9dB{_m^O72L&QQ!y_;Sn5b`|2 zYg0i}6IY9|A|*AABk03yX~GWx$YOhb6k!@P@&ARA9r9;HiBzgZR9Pf_L~Sh9K=>N3 zx7Dy(l4jD2V5|EB4X#TYR&J3xs^Ri`YD&>yUgoonx|&bP&w3+GCy#3<8W5+ocGvy8 zW+AHnZMUelDx#Ml$K|*L6EC;rZtsiXZ$a%*K^;TnK^-oe^JBq*w}E`4?*X90#UM zxo^@Q$4qpbkWlVU41{`$4HI1{nVjw)#^zVoB!d1vo#V8-0JY5~Jr~z_Ax_AvXV0## zw+VTk%Zf{9%YEb_kO^udkz%c|LU-eqIk5j)cIQJJwEKNaP3{tRMX1x5*=2jx&1!?k zQmV`Z>KfQH2$lkPx>0@#zCZpFQ~XEZAzPxoa1$e&w|vVQxeG$|{CpVpOj%yM zP`|M(;rz6})DTd_Bk=V5+K1KO09%l~^C8pt@mi`U9cZ~+Zi`Z?jTgE-iQ4sOXy-0-$Yy-X9%y{%A}(5@@ZhumTRkr3Jn7CU<= z+B8u)%`@Td>w1{kiKS;P;eefA%^9>HJ_qSf|M?2^!KN2~?1MpjGDCo1)Ko8l{qy@U ze1&BWf`|B{d3f*a9vk%njFM>Hf zTzf@mGE8*(pL;bobunn%7AL|9AGfFewmd1x4A10un$Z4YHHLG1c%ZTUbH6z|Ju^Lx z}HFw6xwX9*~k^8p5QYtnwrmev=cDpHpX|6SZ=A^`sH?(B2&U4stptIKFECkz{OR&g@_LRh=D=F z&6rVjFRhip@s^dH;~RQ_Q)mmx|D>p_m>L3HZ5SCB;7h2(ffNG(!&iOU%Xsh%ozq-G zh(qiE8B3A_BwzO+ezWT!BBMY+2YH7~gm{BABo)@p`y90a@ja>UDDquzYIf30sv9{R z=LX+(U$!lgMWYZQm;l~`gj}jT0$FJ=(g3m`cS%f_F2kPjy#fWNnq zEnSNA^pqT8p=m;o0%$pod;b7=I;~w@gPl-zZ93!Z&v?&|1_k4 z1>B5jxenfHgk$ACbmKDg*#2F8clt;g>W7o|EMmPAF%)AcIpDt7a=J%tBG`Hw=RLkq zy}-3vW{zH0JCOudBWLuxrRu!m>7#w>=|LiZgP%XW{ENv2aRQ zD>GhBwBx1VW4>Ncg$!11({A0jp^gf5%T=ML`%2r~JG0)Df=7trd2MOF2FahBlV+7a zKavt zkN#bylRk48d`A1#emBo+zBAX&!KzM)8NS(YJbu_O$uT@+>FL0Rr3j zbf?E2HBqsSzf}|3?P!xrk=%-^DKb-uTB0F{^LFc=>dFbPl<#t3 zTyg)}f`;hSkss<&^8nTGpx~GM3yXB_jMCbcp%S4m*-Ny~!V&DsSEnZXhC9 zR2?}At+&OJ{5BLk%2e$o>dO^+9&9q*iNp*%38%P&Dk!@RmQu0OLYTRsX3 z<|rE0TWsjb`}$>l2JFhC9Z%&$zw~=v-^jo+3U;2Ej?T=37@H$^I@Qfkr4LIKmsQpx^nIzCHo1nX7HS);HQ- zx2ua^AmHGtoa-^)uJqvS$2xz879XzB$H11Jd3)%#CLwbUwhm(o!OrHidfnHe0*!<~ zMbcJ|hk15W=>Fbqn&j0@DR(9g$>-;@-u>?Z$#i488;D(0Vg^aaVZ#FC`WI(MWoH|H zqmE?Un)_%hR;w~tcJ{-A3K8ckl8IT8I==ESO3fdrH!y zydB;qT};KBIQ8!m$$m~&^c=IhxIZY1vIu_ozGvr2Su*k94-ARzY`OKjhJcq@GL(KQ z!NnQ_ey3rg^n9)bfUN3t0a`B`#08_>)Z>eD?hv0BJhphS@N@KTxir&<&*iBD{8fgH zUUT0JhHht*)&@(Djf&-1NO@B5I3Hgw*TXG?>0a{!Yqx%|q=Txm#iUQ(()}yDbvH;l zYS4OmU;I0_N#Xu_HR~buc1+piv+#g8F*Xald*6cZr&*V_ zn85HDYSl(wVW;{4o`jo;wN{YPDTxihKw4TOsdT=a>7BDLjitS`#9LDI>O@`Sd4nf?if_gxlCHptElXsE|xUDEQCo`YTC9jga3K zTPo+W=`^np{<+ZJW>^hJP0R#*N;8e~xlp=BDrq$W={j9Dj?|IQL3?fYnvc9DcaD}|NCJIp-oQi0~vrB zoL**pSo6{*5Qzd{M4h@-Y53?DI%!JI`VTn5M|>FO)mrfWE>XjG!tA!^_o+%hQe z*mXhxC3`pAJ34AJS)AFG1dyx~6572Qb`UvTa#$;uAq{>yl>W&{?AUhac~kYA86*T# z$0bPPA1g}cZn18#-gOp@Q#4we+YzonJY1|fm$&L3<|mX)&PjTTi9}Ewd9n9a!8>|7 z(2YVc)hRjrd^>;TYB0j@Fz?dxQy~fcyTs8VsiT!O{J2;cW7~&iQpkNU`{BN`ZPGbAQ z#K(i@nHS)cbl-u3eK@HPFLSr|EgHVExJ0skK$%gQz4KU%+!%7Aa_2tmXr%39Kjs?s zMz+3g`9@l%8&CbExJ${u8nF4vnue?c&Fs-6_bmA#Lw%^&d#mkq?(*m@xRohG!F@SuVd0|)Rb5Hh z7qG?ckpr1%@MT>TP$G-mu=Zv7d3UI3LH}>`jUi8~?_D~3iG`G#rFm<)mr6ODJ=^j8 z=NCCzTg~ecNO_n^L@W-n@7Y=|XU(hS)(%T<4=m2}7FRt$urxPM|=N zjjAj{V>20QCD09Ye`>trZ;CzH2+n*krYmpM)awkhLU8NB(M1DH?)d9yBkc6JnOdKV zocFw0r(P$j#h(Q7+iEALMAba-Ly9#6O>Em@o&B}(r*1Y(3_%7xhU+Rc<$?xRis=j= zf%c{Vmzwfvlq%X7XDqt626m`l5pEAVvbCD^o{goSNy<30F$59BK=2yEw@$@HLX-Oa z)t9@k-06x&tKU2yl4$I{j871jMJXwf$f^O8z`$IfWHS5UyrX@-0&OY^iB*bKIa^5S zXd$*obq!>bVNx@vmCV*2?<$}gwE(m z=UysjwAVyd&^x{1gW9K`)^gW<-jtKRdW*=vx?|Ga-#!}=7ue(}ez`DcVP$r&N1l=) zQhE|QFxoG2hW=HRZkRJC$s|#eMeO0?=Z)Xqk-g|m{@TN0=hru{y%c>Qp?1B~8x_O! zmeWF5@xk)&s=rPr_KC+|!{=t%Nb)?J_^Ek*&3>`|vIfK5Ga1cQPJ1k)HjWNaS4p2Q z2tPLusmDF7)7?I|M1t)}#4yz3kDagm>|R!7Wn-j4xMS(nj~gv#eoJ8{rlY6kd*Wbc zG2aoeQiDm%D*p2oseRqDDSbHc?H8%|Y01mrbPiEfJG(f|#!bj{{40_-N+8ZCcf zcO3Sozsl|9Rz@--?wf%s&@A_6cUFEm5aajqy&H;B>Q2@ug=mjN%?x7MCF{^pvDQE)ECMlgC8N>t5dfqUGhqT)vPED$P8`BrqR`|Mz z{JM^SLX6)1BgaP$pJZAfKt4V_ftJ6~=3p`+J13db`Y=%M_;x?7-g4o^<)RSz9}T;^tw~dI{bdrax1B@li;dom)5*JU zn7ZP5JjU3_nr!>f{P8m7u!qQrwhy-kA#tSoSEz%ePb#EXIqu8zD@m6nushbT)mxa= zQ`wdlumZx13@SW=H^?Y!SVd54b%j#`0s%CIqX@rwT+28eTgf)==GlI}<^`a*bby!3Q`VE!(bsMN8m*LPOrw&;4uSTEDkFF9ANds( z6$O9HK3wmtWhWVH$h_NC>)a_Qz8$%|4lLT@qi(oPRjX;P5+o~{otm4Q`wO;E#~&_y z?8H(KW3%_>Q6}|&{G^@RHY1!?3n&IhCuZBGqrY2lh~Q$QSkgb_$sbmTkj1UnWs{^L zI$N%u!pO*ok}5Ob6~bL5 zeB)BT;m)ku*}vK!JkPe{I~$%;n@A&$FdU_6HDEi|#|!Q{H9r^kn_85f4I8xCv_O`k&bKhyd@X@>5-Paucb z-KHIw*0o&E6CuJPZzN1N z?KVc^55XJsw#JO)pjiS{f>|)p7n%cLI#Bl#V2lL)B@q-L%!UKjF1S z;m|8%00#l=1ZfLke7#EkuVQ?s0A^?X_nNlE0+kKW*7NdQ zC`^uErbx%Xd(iKQx0$?nLa zy=dx&>f1&g)v_+9P{(K@_=1C94(j$>6cBl4kcM!cpD)>lwf+Wg#hM*@<(@JPj|BL= z+~C7S8}0V9Ixz6meaBN2V#|imYu3CtzYvZp$g;w;0vcB!TVItqdyO zoQo|-14a;SU6*7b*A{Qi+kj+)z6PZ^0Ru|Q>HFY~@Z*lUp%kV7*PZY=0e2g+t$d0w8ZUDcteuveO*X|R9pjmLc_h^D2)o$MNGN+lO-S03)dxs*K zP2ju?vZMv;q#M#hI_Hr0(in-isATs-#mY1o~?bjOc<=}$OI3g_(o{6%MD^Ea+_ zVXtLrx)zUnj%sv7XK*-)*0 zB86St;4RoFiX9W&VSTbHMyS9dn-c(SK6=C6KP9s3fApUP0X02Ydx}ye^ZPyWurA-l z+qMTk#qaL*3e&UrsGDlvkP(nNC>ry7D3KokUiLYEwZLRG`1lA{i_CkI!RcDE0YA>)gFeuEA#rY~d zjEU4Urz-X}Q@C0!;+}86n|Ve;pES*kBH^>rAJ87V7F2(~@eww%{6sgS0NWX*7QT-i zQwr@CJNtS09WPX$k0+0TWhzyN;vCBwfs2aAXL@Cv-Qn7Uz+6q=ZR`GUAeNE!z)CjC zFIKNRAj<&5`2DE+Y2YJ{k&+f+48erkq`(YHM~jowjBIWNZm(!}!|D3piXVMr>S{ZV z#s1^eD@{Lya$j2)_93r|T+9ruS8jyt=-t4q)_Uqk3fqu#X?<-^k27`fDY% zJ+~G$pxoNpI`*^5Dq?ncpPU-qnn#XD4a0v2vnj5NtAy0l2 zCIV2!y&H#mEY+;AZ@(2AIjQK!JhyQe#ARA*3v765v%f9^2vW93>CgUY>uLjPWDyd% z`9mzB)*i`}>I!e|A79)%+FRkRqO1ndRap`ykKeKTI1`% z)5Re$-r7W@1d#b)N+CWDbBdx(qGBVU0UF%`Lg$*>_=^r6%fTB}(*rGH70#EjJx-FE zPi}ORRB?z9AOHubV@l^j0x(7NK>FN6h$Vl>x`qSMSrP&_{V1KB0I{rN6hijs*ZeyV zmMep5eg;LAQ~ct$bdnK5)Yar?ZPt$F{8zGmu0X!M(eJ8g&Y74w!O1b*wc<^DA#=1Y zgHOPECO7J==HTpnW3aK*2Z% z5zzZ06r`GNX`{9`L|u{-AVdFos-&BJDBy= zjz_I9J8Q14Uhv&f`(;H*7V3LkviR$)`Tn`;Y1TOTHrj58sloHFot3886GrV8D_iP` zV-3G$gUrK27eYO5TF8rU>5O%)zmC5}HJt$CS>tXsFA#qG73(;CS-wOfXMEAUOIvlg z6na#_#L5uffMNnnua^97`h@{eI;g4RQ2f0uUo(XQi$7raj~{CDSVkTpR`GPS>JR05 z> zx$AOR+PJZ6hQ{OZEqYS zs6xmq7(tdrxDsLcs#i8d$1Tl$8CJ&PP}lhMKDzC&wT!dNn;Oh!zxA^EeL_^G9}6bH zR&#KqOD23%l|+n#+CFwH>>BRbYkOKT6!=N`=+x_q6B?z@kpH$-sa?g%{*3RGeU2!8|yU3EsND6=P!%l{)D$s82BUSc!M7%3GnK$mjsB4DsWE?J;x& zD(GS+!#l?vez&{d(9&a#=hq`*U6GEw-d+ZJvsF+8b&RWwD^ITV3P5q;pY@vgj!YS` z&aPV}aRq3lC+jX%v}Y_MMx>4f(H5Gz+gEiIhUBf{!bps_RKKl{Pn;~yLQz9vq|esZ^w5U zW2;+6K2k&JwZS8b1AMvo1IopdpXQq>A6lDR-pYY?V#M#3A(R)0oF=VQoZMaqB1g;D z^D9Lrr$6RsmNlf)Ta3J{X1Hc5dq#9>`Y>8|s{EozrmnBL7MKCQKWz$}EUd>&W?HvM z4LNjRQ9t%+laGHXV=^JuQffE~AES0Z?NzbIIS^EQ2>P`$#+!>JN>YSmiW}?&87@Il zbPDGTA}!eM3Wg?YGB8W4x0dgr3=aOm5aV{wb622F2G!37rR=e@mjg=j_Fn7 z4=tEu8v>>0E=L|dUWw9z#dkHV_6lrS<=J&I6*or|!1~3o)!Fg`lr5oE@2!6qQ zS6Jh*87y0ErRHNHJY-eK00%`?`}iW zIW)-exJ(mTAqU7TzM~Z2yesGA9e-SFdzD`OQRLRv9Xti?ur<10CM1b|aJK)^3hU%A z-gaX*LOIx<=945<&g?u>I2;uLr^cdfGww8xS1mO|u5Z|n@vjYDy`PQJeu|b(5=L4; z%!jjYG|w~9O_>4fsHW_3ufBu&L5MWItOFj*T*!c0eUaC-voeKX4?-@YJv32-=@4TO z`kg_?rWo?ms{qt1fE&=d2mK5Yol{&p&A^T?50UL{P+8dKV`=$+v#mLWPOXQ#X6tLm z7lb?P?py##en@Q?dfP|13_&OQU=jVIdz`n=75Aq@dE&O=Njifz& z!`c!Ed>lFU{jZrvv4o7WE{rpM=4LCwV+Xfy4+cty@i5`qw1zz>HfWy#Iop|4j++Yq zoQb5mHkz{Tq-z!~6HOA+?cx4^-*LWv%dw3tF4DQz{8yYY#Zm#Es_&C}G%6w}sA68& zyFyGZO&Ndp=E9zp5q7WsX46Bz`FuB*QFQI;LeE#Kik_Q1Gw-GZMFI3wiaygZ!_y1_%t9jWSaW& zD=RmjFkOi6&t!kUW)9JtIGunjB!*OZAh?OeR1zYlUeUb{_Z}_&SCRe(-T|IeCM1xoxZNJ{lJ3t$eWU?V35ocGO=J z`*DfH+tu54i$(@TCvgrMmQ3s#wBr8$!}-jq@;Blveg4mys51>g2*|DHzheKOMjdWW z+rYXs834LC9(I*cBFEXNTw7M-o4ZX(Q1Rx#MFm=bXea3#Xyc54SlP2C?O@>x?$6)~kFAV*hHm~HwURKswtLo+29Emxo z%#GH&KA$)L{CrpeTXr_rzT$=($a)DVw$^(LzuV$ zM-iJKZ$2!E5qy|n52dx+6YT)o>gI5D>}?+(3XM$Q9Ez_jJxY@S_M_Lo*9^2=+qf=q zk^+>@jUM^U7*^0f*v+gkxA@zIOAE|G}CL~5RD5i4}k+h z-U6&yst3*4f8Tt}f0SR|c>cG-+#ve_tALb}z(OD-q|GuV3&Q~^3#2TG?NX+wY?oYg z{rWWgkkw}UZk~ruCyz1?T~oNpkTXyE{FJCi6429di&vA8G1s!#P~H{sz^!gVhS7-N zBPqU)Q;GOAEMTzQ|F3z?_lMsOVy6os=}J5&4hC-d=G8?1Bx|PxLF6xUM82>m!&8$F z2$zC8R)&<6D2N0j-z7FKKZQXOgxcSACcPPv9l=ur9Sw=sOtQdrg1#1R0pnIgJUoZ@ zbB-19G#Un^ZKdSS+u8>eR7SPGghUnIMadHyr#cC00iP^6A0`ps$k5$Or2B*?cXa@1geLzz=?0(+60wtMlug+ti;jgQSbqL2 zAi(1|C90C1K(Dp0&)3BL1)U&t3@}Lf@-7MPBw?6DTIrLWT;O1Bnm=V&LeTrufd%ya&+_eAEr+E!u!m27<=XZnmc@Y4^Vac^=H|&fY{r`v-6m6 zT)ob*y)BdVL*N&T1}gt1`w~vfGi>(BZKC1~HQM;|1Z8%^O4p*5zh)~S^CkBvptUy;Hs?p~&A{_+}e zTZKeod_Sz*7BL@eJup#p`FM2bVuAXMuSaYOyvA?*@TU4qvNldb>o0qp{7Atqozh1? zvH!}5Vx~sfLh>mC5Gx!K0&lC8w5Cc_jK(>^)FD5^7D)klg$^B!4FziyZI7L$)L7=6 zt5Ml4t&ZLz!zEmVnLlStP>2|&4z~IWcQmR-M;EtiZXi;R)!0#(P?6hGZj+NFc3tm6hv6_l^vRdWv2C9bhuK8ra|H&pbew~SZVkEDdB;D$&!cJ z4TM_E3SaPXSuGkofFPErAW|hEk$3jJia`aHTH%^gMvJ~h8XSl9K8r(gWl3E{+!(g# zpk&f>W=Zi99T6EXw?c{w#^Vk_?$@zM3~zyXTHX98$aOG31>JUOp&~gTce=(>i%Y@- z<3Uo;S334}sLi>7Z^=dgLsVvqoB4nQ>xm&2qZHG3eLA#B{eES~eN`+wS%eHgEJ%O# zR!awOQLE`8FwZ_Tg1CpwG}=l*KQ(xfrXTXqm7=|JjPq0|4igkQ4G&J zXGgx?^D0Xzm0A9#;W>MX8l&h4^fG8gDK-6d5v90p=YNs6@TYSU5?)#^hX;z6dfu)yH|y#+{A9v%Dw4xv2*;^%o5 zf`eiA(3mhCm`AtI<+Ma9b-z|1p1|2(jQ(MM_H;ksZ6_iaHr2)XSf@fYP^2ItY_2i~ z$@!r|IuyeA4Eu!)3zD?X*M3p*7X~?p5HOs(Grj8BRVgOl4Df*y7zKhlR?{q4H`7>=r1ku4;otP z$+EMyd$^n4dItBRGKPLMY zzG$Vfvw4$ryNp*FJ03?VWGFJZEQm_Vz2ETP$(_VDQ4%|I(NsSg89aN{&1)=98#eTd zUwelw&EFyDMD3-a_d$z^Zf1Q^cUiT*gIg{+s_cuSy%j!?C@ zI;oCph8OxpoCVmG<2jt#U`B#gh@Vbq?e?!2=X=jwg-^J1(y#L*K79=F>VaObV!r15 zk=m=U!Em17BlllO{u72s=Wsm-4C)oUW-_Y}Lxl5HW7ltu7)@|53U4%yWwk7;-%Ro_ z1B#b0D&$#oMaK;$hKgyX1cl{Zu8Z8Cv-~hKc^sw|xh-O<9?v1KezZrfD7KfnMG!r# zw$k34nd7`8D#YV^I)>Npki52C-(32ku|3%iEbKnR8`8~ zap@pcgJW4Qh16)-HVq8``t+kGBbbGtK4bKAaiug1faYbvWS1%lN>})^7pL=wE@yg| z%{Q|zkTLuYjj#`4Nvow3D!CoWMjOU1Hp-99hz@Bl(@uevx_GIG<_fp(HT5UYa}m`? za+O~Xq{1+hKN#;H^S$?OjzxDA=^3iiz%Qv5Tf+8m_gv|=m3fy{WBfmH@fyyEDumlh{U4qUpAf3@EYbf%vzdR868{Gc1dw?c=VGh`Nqa+SP}wzDMT5=g-EeKyY)n zWk<~kB@8}ITY0{1@}3i;(L34Okd!uw)<94U8eC-EA3(*jd6DXF*>8|Q(1f2H{u_YY z&DJd%W)#eHc1aJ3@R^?MS*hW|==k!tw55~Z=;+@49!ygyySP9<==e*`l&%is>fsRf zD`*q`CVYCyCsLI30hvpPaLt=+a!rw>)%kC7ttdJGi!zdPxF}i;SE3D1#L^R0AXU&- z#r=Gf_1C`i{`i+d2jr0q_J|+By)%`B&Q0=+a_u|09%E2)VRi_o9)BZz7nrN#W3*M3 zt}ld;FU`s3a5p%poQkOGMY#~2>tcER<)fK&TUHb{p-40XI>rV3`3)4?we^pgI3tc*|E5|%>4BFE#pJ|){k^OP*^lWm;)I=6O-KD zox^P!=_*{TQ)!rJSeKnluQvVzV&1O?$Irmd`#frW*=i)+%7nF&=+<^N;TCLh-Kt8= z_%D{WcC0cK_yDZt{30ODWMNWz9EYZA`*7Bs%om(l(Qh9Zbu?GrXc}f#Ka*|#9SW5A zN}$|TIXQD^5Y%CXZb2o^>~j2f!-n%+>b-z;NU z6qc=wArc*oG>a!Bn4@zLn=;LG0ee`Zff>YyZ;%IX&XU_C+ z)Py8O--}$+X|og ze6$m9ogeO5J2nnC`;TR=U2Eqp$@hen3>rSOot{h`k}3k&d>2e$;&wc4_Zw6@wB7-O zgxo*n4PgFv;Nsx<{WjC_ES9RPJ+h=70ZMMOwP~=|>EHN)e^CK0Awl6+8JRBTrymjM z!3?;F7e^2?wHn^SFomQc$Aa{4^YbYbzE;{!fl3pP@goZOKlFC^m8A#xDR8U@ZZiO5 zagkr?h9`$x&E2?LR>&{row^L)7DvmSqIAdc9h_;I2xLBe@4(wg_!7=ZcY&G30sMOB9E2C%p!panTKx}p4ktI{A?W!rtbu}Cy%s| zvw8d}<(XxNkaSNk8Bs|GEQezvQDa`X+l12P!kMM5Kref2)u$+N@c#OL%PO2$;L z_!Ju$zD633Mv>nvxUfM={SDEo24MK^-i~n5vc~d^<>8H`l z{T$miLpTP7XsR$TKGDf{cG%Z-2c`)Pe*=UD3<38v>7iK{PqtX7!N*B2jn9A!T z-#fj=BfGQg@N<4Qcpf_H#mRIHT;^l$A4)+6@X@xc3fke{LX%5~i{sJlE z2ak1l3n)0P^*Va6hI%k~j8u&bqX$xOwOdHY$P>7&rE&=6n3tgB3hg!9?SDOu5xHAc z7!3Xi*c&Z%fh{c4t|839q7dGjJsY|9W z^Eq#>te(t$qcWh@S$*?oGO*jo9m;nEfsS%QeJD81(_2osCm*IdEn(jQy(Yp(B0hg= zn}Xdr)}quGy~xGq-&1)!T|FV;nxTj2DdCc9#s-b&0F)Q2YEVYBwLc7d(u%=jK(CBVoLoh#-FSCHb%ZOyuZHa4F(9{YIAcR5s`FE9Ht-| zA)zaE6`AxZ7ceihDDOvOC3%PhBhBw2O_cXUG48q7ZBU@0_bw@s58^6NW%rmU`o~QK zN3%Gt@LOX;fA!ZH7qhea7 z?h*7S=;(}~ypVf}6?K@WBjk&F4u)!TOZy#T-yeSgw|S2b&;2=qPL@o&&@rK;x8j~_ zhxQ2@bP9*JjcBazJ@G0|#^iU(x37LR98Vs+TK9t#_9{s_50ELDJ@|U$beJ4&ULQN_ z0|ot0IyzHZLa_7;ZmXEp{#fPSYfNh7#`B~&e&$Z-eaEqPs8cZA!HmJ?S^R6V#K@G{ z&zdhU9=ayt9vohW)lZ~czSyOIo}+O^qDMiU`28=ywmgN$t=Fl zmB?}36_&8wOv<~*x*rS|l#97ISt>__4yE%u>>j@I$>X?JIFvtWXOo^tUJ4q___jfLl(NcAUyDuSVXKG$i`i^**JhuTk*jk94)#icR7 zCVyx69~#E+?Q{CKqjhBBl(7&ui0Mg>|E3q(woUX@JZg8y3gKAUsn;K$FHz z-SI&wHZ*q8pSRqs==pI&_p0a6vHZ)Zx(iW_k#BL#;rjyG>NF549JDb-No-sBWe6}2 z=FQ&E*N^u~M&AsV8@*V5!CFG4(Tdp-J3M@R4-)5db{ZQM38xf7@C)*2zY3qWB|NSO zDkQu3{YdPL^!4_KjKWPkMZEB1%lSJ-XC2pB>RI9oJ^CvP>GsRpr@G_)^58hbL`L7M zT?xgEv!*xo8Sx5E1-RJ-%znf7PC#u@QpY8I zWkftA;l%{ill$dE+a|1H4If9BpNA(4FPiVTC;HGL8&?1(`@h?FRyZ~}X|qscH9P}s zucP`{OCc}&lBZ|qBI(09F4t$uB363(g0>X#xY%^I)r;Z%W;~2wQ7D6wRNY$9c$~xu z9$eR;5{`?c_+YDkEDFZmBf2xk&F`ER%r^;j#L#b(a;CC7KrN<7sLg&=I&|V&^%_C+7U{U!NEG(bOPdCP5 z>f@c^VVo&VYuj$Ef2cJch>GIm`-RUiG&W8W%ZaG@-#oBC^SQb(zm2zaXta$OzJZ5{ zv1{=`dNL64+$H8S$jmbx-6Enk&0Pj7?1$aKqG?6t>+M-aeK8+rRSnt&8a5 zW2#Z4(qiQDOtCHhdb}~6dGdREAE;sbLkcdBWmk`wdva<(B?DVpRwnNqK<28h6n;_18QdJg zeNR>!^_k}JroplI;~tF^aFm3|ULzz1y=ij)*@Cp?ZUd%`J>`oRr?j!EK4F%0i7NQ| z=qgo5ON)zrIgwpVg{UFyE*D9OBO|#~SPlN}l)bA+!Vk6GI_p>K8TkR(5ZQ4Q`ii`5N3bgHeLlsf&0L(ULtxV7_M4S zBK0AgiX*Ke^2aeAAqvBtg9V~q*s`>j-Kl_N198PM`vPfJ3$UOlL?He7QS{+;^~qRJ z9Xv_^X%x5|y8r&_U?yGThkg8GNzfE5KKPX{)dt#d-E}0R!Bp>4QNk1~IC+BUzVW)o z+Nb{sfQaj{xyn)f9Nz?`c?JGCdGj83{11mG93>BP{ko`dXcBb2jZ zGGi#_O1-}xWOL9)y721;n*t-fJ5wEcd@ky8)*Zk2kwD2%7q+Fm$jL=n$Z(;-b$ujy zd-?n5SGa+n|M3$fruuzQldBNBmTTYh`q7R>1Vz&Ld_$WgEnqyrvrjy{&PQ=DbJAztE{1{=TQj2+-C2XJG^;~IO7q-UZ9%cwR?_0~ ze>l;^={KJk`$atOLJEa!M!eByae`+<3O#5RG24PJ>eX14&bt|?IBFQ8#xy{dWqfMs z>xfn1aWW7%;cIm01KQ$S#c*~(U2yCm$xM+`cuE96Fvth%;l(C=n>hIxm=K8@Nsyr2 zkFWxn9DYxn-H0{{&33ZnQjp`tJ7jV=)DEPI`_so&c)|q#Ua~&q2y6WEN9xvc(ODDEHqEK!5Rq&_|`E`PE1!#nTFAMqK zmzA2_bnKH#qe>zD7h#;Sad#U#&!LCqm>5rXeGP*9WUv;S$Oux7zm<>4RtLGbG>Yph zfFC>g6&I~9&VL7qvAq$BAQiK^5EO*0b&3mQW)JCyD7k*RL-2sMB(qI95?;>V=<5!v z0h?M0R**aXKgA7#M5*d3)dRnvzs-L4dG|hDyYC7)&2_04o6xbtVx#qt{`rP4Sl>z( zY`}1qk5+izR0}`(`+F~!-U|OjP4|61krkTE?h@L0eqt?c`vd8yT4b5yZ4Q4D3jQ4J zX7YUfOlxg}RKR_0bkfg}$-{xg4+0UQq=K^p>xjSq?&WLS zzt%n0Lyvb#bhGf!%k8Jvh2d z7M(eWbEZY&(zD;?+&(*)lHB*9XO`6`5w>(yv*t$l*HEYE{|jPuRhwzw69ql(`K>j^ zcSv?ZMaNUwecPfND3uCJ`m0y>ZS?=K9nrs0FH1RBMSX)e8p$nDGA$W37p|C@Sx1BJ z3u*^e03n0G4|aHvDQ-J17%?E5AF+ZX_7J@S`~nUWt{il~uEungmjfmSMhyi>1Sp7H z4efveRFk`l+vr&XO4fapL5TrUnY2b(olUj5K^w(w8MU!@ZAF~fO4dHbOABIE9<`?a z>a6?TlD<~8ZD`YZK^wZ@dBcBz?SI(*rR%roXF6UwVLDMZ9zG=|oWXGR?;oqaf44=P ztdVv`qXs~O$y80=k5_7$golxI4K%-*_o3qOCg{A8yKlN_gVOD!e1Bp-vf~y zF>G1HrzGl|G(5!*4BB zyLDlK=m3{p{QtjVb8A+NpNF!$ZqYMfsvI~x#aQwm4gAkpy|sx(*mYlo2NC9W#)yGF zbkyH$iGk4O;7X7|41xXuR^53%nIMo6+%6arpa%+wN6M1{A^!wFAh*CZ!HEQ_ijN8& zCYS}Vt!i$QT>-cKJ=h5C5_}8>>40WH10V2SW;-I{rd z?k>i-FWy&ylBgdaOkFy&TCLqANMCFIlO+03F-$rlm$`{d-Y@D_C98Kh8MxgowoG$j zN-^1Jfd93%kFO9LDTR7R8loHhY}3ENiT5M)uRYm&1mBVB(2X!FCoSko(j`t@El**+ zeGh(_`~%wHE9ssUIos(wiY>(%|J_Zu&h@inze;eHP@+QX@r;FXh#Q&ya!;cd`iX^@ z_gZJi1cJpmGSmM|8+bL0m+u40YX8qp-*-kr9f%|Fjp77GoS6vbaG;-CN;E_S(6^(L znQ?!cbpzJA(P+T>gSl!mf&cXA4hijVsGqQLAm5Pj!;N*Q1;A(C7($d2|G8|?Mc=%~ zSJPWJ)_-Q63XlFVB+IsP)o1-1hrXvGUf=04>VME=b^=4SBXWyyOUQ^=hBW+8!i8c6 z(S^%#)Nr5^$aa>Z?1{!ebpO}()ti(i?_IG*A`Rth1+$@Pk^3zan#QmHeEIcOV@^U) z?8hgsM7bE|I6y7`{Wgw?y*!gn-G)e&S(dV9Hz){$f@prv2{cd!-+YEsR$}15P_k>U zAQKX{lct?rL0Miw$xb@voL;wcgC6+`f+>Q+m~{1jH?M54@sv9}af4$AF(vX(!|mbt z0D%fjLSW@wY5;-}HU|iliSQE?@If5&25buokg8SzSq%7SFm$k#HQKG$!W+QBcp@}g z8#{bEz;kf6h%8}T)EE0q*-RUygMT4{PT54;#QC><51#{Ho9C%(O`gm)KJFzzzRl*t z5BRFVHtVrh|0dqvE^cG7T{CHmj^Kb#^LfRE-zIAd;Bi1d74!rvzQE@cMK=uDJ>Jfm zq`ENY#*SsbGkoTE^zHo@ERSc7>$R~p56R1o{vP@!5#GJNF4i{i@JfaYqqLM&;C!fL zDRPaCd-3?o5#e7+fb6Bq_BR6r0#V(s{fJ>Z?4aD1W7Yc_#v@=g@8vBmyEbBF<=zz; z%72zj(Z%*>wq>qm^@54qfu85&*7bDzgn(qq-J2)oe_ieiT%*Rz{r23(+hxrKc2fFh zq30{2e;RzO>Uy8TMg7ng_$r3Plh?gjTJh@5njU*^UuJ}<*$#FR)E$ehXRB&v{95x9 z7j?Qz-+?ObzAil>zgs;FR`?)N_v>`$>7CmVb4liSUyh?}-S?SCLPa^tQlz?+NwzNm zz92*&_$AhhHZn>$w**gm^i>c!2=pow8T8`6<~dvpIW!A~7mTXa6+@h^Mo1kXyF`Cg z-xl?KjPHnb@Wp`%AOi6{T%@bQ<6cc)F`H`lt9V{nHTDhNNZot-hV~-xRcls%TiL+L zZ{)Yvnzbjz!6dJMxG%Z7w)TpgTol-CduR8qb?xGIZEX!1ll0f=Q>uV7xv&SYs#+O} zK?7niL>9%XKRN7!W5{7!E+tQE$={#IVZ_Nq7ZTJB5kZND3rL-`zAL+Mf20s=@idjD zkVgUy)by@DTG%gL_;31eoK~fqqnl5T^4X`&Rb9kz@!6-|_T;%Rz17$`DbGkd=|C8C zJKd*VdA_~9zcmnO8B;HB`$GRuu(1?p_+wrwVyWC>MRh*BBc_hR;ZqZk5V1^5= zFXynDY*R?tR!GqX_7u&PwblYN`Onm^WgQDYS_F9-^~A#Pe66O1g=TO5w(xoHTlK%4(op*kviPhd zeBSr6s2}EK(xA)NQT=nA-9!%>T`ny9AlEml1phk?q9gue7L-b=7TDtY_rb%>qjhU{r&xuKFky-Wi&k~ z10ydzFSZ2)1*_%{J32b94rbduFY*uJ$jQl_cSa!dReA4d1buE!D@}Uk!fYXs_NO}= zU`@G+$;R&PE}4***qmQXMmM^QVhZvEE3E+1z<6E33-NN?;>8h@W%>xDJXLqO31pXtB%$p%DNZ(cA8i zo0|@Mlv3BtjJ|cU?DTw(Dp6b}Gkn_FOA)g+h?EIgM>rC!4a1nD%>IPF>w1tSW!B}AHGuj*_iuBmO!~5CKq=n1EOxLj1EKum{e{)W=6&{1&y=IUZZc$ zUu$Z!8W~em`p*&VvDa0rL}Nl6q{t14CLC`YJXt*u-vEn*mtDDQIIK6mN{ZQW>hgFm zjd}VOF8xfb=VMWNdg$XD4iiLZWcMv=Cmx&joyZ&)bV;Y=Js+BQ40+t*v48Q8YsJ8$ z?SJ=bvpCFf099^Ozpw%SI)YU}eSWW>ku$w9NIFGhsDi*TiAMJ7&)RcF>wQG{y5$Dw zY4JezBr|}r`!mD!XW|MNK2L%cwtR^vjtw_m54i$YB^|K>H?i)a2=BKrGK5!q@8aD$ z@XDY^_r2qTjxS!U`-u6(G^Z`k`e8L*>UJXh=7|C?+lxzr|51roOwm0SR{IM1-IeA$|v->WrBwI2pYoI~+)3oa^ zl7+LLHV=n0o?m(PsxxKhTj7tR=iyMBukJMJk-H^B{SDFZJkU)Ozm-Hcbu)pYyaV&n08X>%PX2h}67>N9awJzuiN0nF#h>#{!hNkuXusOei` zYPofBDf;(!?MQfk3*AEb+STU3odbix`1ts~cb8cV)+;TZ(s?s8Gbw!b337R}W%>sT zwFhB%%*MvX$IHzH%v%opetv!u5)yB-94>dqh1`#U?ds~q(;#4Ry4drBiJ2LX$LXq} z-))}7Xu5G_!> zxN&>2Gn&DEGj|Gu88$eq)!NJ=zy<1*t^j^urc@V;Oc0lt7F^7Af^3eU=MEx;U*3V}Rdu!OXJ>A4FC+1;{Xhk6k0Om$A6x{G$JOem z^_Fd3^sZgQSY9s1Ve`{niTVllpaLnr2S0UpQD%iv^og28_6hXz(^8{b zpWzl!8_?>_RV?0cW=pH*^3gCYogSdLTE&wNUsv*LJT~p-QH>JQ*%Qg#Y%=ysojv)N zJsu|<4_msN(!|^5CgF3eHQz31eIXwoD3ko{_|R-}NR(Ab!C*Q5<&V!bS!O2KW`#-S zdZMK<-BB;0(co0qFrRcR}2U$8b%rJ2XHW0B_+-iL;Ag87PLzJZ^YL#DThtC5)gF zcpbp%fajrRTpa?YfCIIa93imiyI2e;%u{08?m0B04&6l{Zj5JcRw2)ftw z7n*f_ZX|k)!>{_YbFn6ENhN@&NoGF%tnn zVmz~E9b_@#&l&cCt-Jv}{Wn=dUk3W$UxMb!6>kccQ$iVB1sYI?Cf4Ep}#$NlBr27sS9!CntH zXRlws2KK(P;170nZ9YBTw?AGl7HgIR<4bc%1e?&qvVr%X0sb&HHnugGXklhns#R$W z)DU9n6xN*@4`$1e9j*_xK?Vi}eD*82;oTcOKd4d0>@m||n~vuXibsoKg67LYQKj)> zD5%d(O78B|6ggv1W1c27vjtL#^kcYqnPNogH=wrrG0%%$ch~Jn;sjS`4dvI_Z_5aEFt-p7gauL?(Npr|VMi~2?BJ9%)QFOvPkq*BGk>}kTEJD9cca8w?G!IKH(UDH}*QuQTq zi?7-&#bHkB4m)7_~KId>lG5prk1M zjNg+jpwAbLII>;>i>Gng{V=Sjwu{*@VM$e{+PKo*8)*uxYnV&T+WXkp4CK@Y6C3eV zDb4C2=uUqC4ezO;%M2iTa zvJ&;}x?zOqc>PqS`Z!dYDQPI8kO1nt(yQMrE_4HWe2Ftg9uHJOxK-(Pm#w7QEs+e1 zt+4da8;w33X;*CG+6eMT3-^<`?(WW&uUOiOrj{Mo@_gjKK)|f8g;qy%rP?x~(zeBg zTOm;9=V$Hnj}>b6g(XH5M!G*px!tVqHHLo(ph}##W;#Qce+=Y;EE~ut%SZN4cX!*m z>Zp5b9e8t`sg5)5_TTmad4_iHs}#%|!7=X4tT(p$ryL+$gun}4vgSeQwaUYe+`*E{ z7hB7p9u@%AvMwSWzI#+i;sK^vW7 z`E~;UKV;vJ_+89p_OCALXP1WYh#S5TEsj1{%znSVX7kZuiQ2WQ1}UH zb~O~JaxJG5mP;bI;&!>{93z=fp%$s$I9chZ5UWaQpHEAn`lBJIh6eB=diEWqr?q4p zFF~icpv?OQ7*dP;kf957h&TY{1CkdU3nf+qIt$C2o!$z1$Tit9s8Wk%--(T?)`Pg>f~rVq4l4x*#!)<5=Q<~ODBYZ;zkgN z@6G8aG_-I=)qED+I@`U8yryg*NT>5Ve(pBD+MDc!LZ3JLV{xUg4(FXGa^C~)FeM!k zJjOk2!~yu#CifFNV`Hbg%RSBm3^Znsu)xSjtbo40zL-3@yovewd0<{iZ|@Ee>YM%! z`|jyM!)K|es5rg4`tj}Cx5!9#ovyB~T-SpcZ`d(nrbaOqxiG&;&-XMTcempu0e=3Q zyDQXmo5^PP6Zu58<#vB4FtcH6AU>Rcos9`QRO;;P49F6I|Beg|A!Mo)e*W^M+3iRN zDsrtjP9ugHjX}zxQs`O3_#R&8frG*AQ#qE_aJnIeCjPK=;)uF=-On~p>2NNJQ7vM6 zvxXu5hIZ{WpSxiEv278ttK}B1W?iL~7U#+D1RHhIQ6Zs#mUWjl<8-zZQJ>l0GH-o6 zUy-@I1+qch`idzW6W;5mjKLlIV{LFtR%i7dXIt&*CE589+(>!kI$D*tag}{*ky3saRTsXW$r+K4UEdy@)zX4ug3cgUAn7Rb2lQ&b{eo zi^0^>MG1*e>;(Y5&ArYz+&)b8V3m9OW{bVtzY+E-(M-VdvgRq9#Lxgm^CKwk>=tyH zi0_!e8|?aYM}gLre}a)N>^v0L@QdP_Fd2NryYYpN{Vn^^OQPBnL8FCB#JSqN5Ep&F z?1!FtNGNHkV{@fVbNnOr`!k&g&vM#(&%Pjw@(DbdQlT5)n{m$jN1Md%-74NK(JwnC zu<9Re?!LXnJ_(@;2O4Bvyl(`fbmEQ*3L0I$%23z}R(D3CuJC<&=>NU>7QQyMse7+` zZ?Zkztd8QUGuM80oCxh&R0E7{&?{>4V4H!2ApFgjc1_5QLezHN&!T58o&ehw&57=C z;o;-pcij+7^7lEcq-nkn%{4YUaiC$cYWMqRpL2=~AzxYvyZQXlQ`!Tv#_*?l){y`o zMOL$e)|;FBwHblPQT>)U%GHIseE}y^mYN?^ zkTANe6^H(zS~z%=gv4PtW!n3em98{Z?0zva9dn7K6l~2;gQi~@Jor#T!K z%YmW09C#eB$||_)r_T(cy}#}DbgGhcV5M+&>Qk|;Rx!QU zi0Mz-XV=^NW|q@by61^R&+SWSA}z7hOGK=yobh z7fIZ1q@p-iPfy{9E^Nvqgr%%3{m^E=NR&L@_S~|UZ$?nOFKi_0dj467wAvmaU5G(N zdtT7pa%*{H*U89n!ajZ{FnN_wyVg=AwU2$2p!Zu1sK$1&UvlY?I-3mvEynZ#Ulh_a zLU!i=|$MAYn`LbTjQvmYbJ|EZBF;i`KYr&}Yr%=axM|O2$Q(RSgDSKIV zRsPi4de*ILxKo@w1|{~AE4JN9?poxy2QorpkH4)L81Yh6L}Gp|$5;0!?I(MC+G;Lq zzk>i*3lJqoE3HD5ltuun0=Q4v2vSE!$MN>ek=*yO_^dhzs}p(h0JBo=aCFiNTWN6I zuv>0Y=YFVGJeVnsBo%<8@69W$q{(!Gvzvjs&auR;mt+%@Zvd~Q=DH)kI zpW6u_SPaB7zj^bfDHL$-Kwip%FaJ0rVt!bf7vV!_-CS9|xt#)v^33HBo&LI9101*!DnANQ!aiCQ)t{zh-cH#m$VC{%ebXBzXZ zn<~*9$P0_4P*jYm6uzg_I96uUCNUag0D^|*d=1if)rKfR0-Q1mv*YjHx8(bB%C}xY zJ6whj9@?_=Y@0`KvZ2G`|Cf%>UO*>j2-h3;; z%h4yl(sKK9=8u|MxTbsV>l0uSdjU$hWQ76WB}t5n)7n5BN>RRN>XS{R%fX9+ve@cov_4MUd-uf!$_fpM#I{RlwuXtI%Wu`4$P0jQ7 zH)ZF&(BUNy0kV!Yj{V)b{_S8Mr}W3m{jx@#yekEAl{Q1AO(1{Y%4M#dqK?y2Y9!k&+*Vv9XZ|!LIpj7XiY+3o0k| z1y-Sq-HpO?&z1V7RI1vO>G)A+QP$q5Buz~5lu$EKz9j+Q`$NTfY@rKk5MO5IWUCu(Bck7=4F13SFn`dt+IDfP5Yv4vt*Zr*G#%-rSFlmKOXnyc-k~8yjxGmGt2e zZT|sqmKpv}x7SCDKzYUEa`z)r&SAYi1w}@wZoQ0*jNe%|fms+fHn!;fVKoK`ua=HZ zx~T6$fB$1@Id7rdu)_e;MMG1I#@BwXaZ}&!!;k-al&HvB{ zaOGHH4`bzqV*dWm0A)9v%u{UA8_^`q$;k<_tX;UgbTKh8!7KyH?9vP^A8uJmF@mXl$b57Q*Z%-hZ&FTf{-?^ z`}G0xtq|tCk|y>g=PSFhqmF9B#-*EUBmUc0p_DJOnU)Ti#U4#3W{~0J$h`gfy;Xa! zQUuGO-&kjzL2Fm#`4J>{tza<`0Q~-|m20VHcvo%w7Dq*ev$_gl&++3ZbyXZXjx4=1 zD#((9u!%mSQbB>ArL6K*%8a#DfN@lA_H2$K(~zHepjnrSY<}CsY3Y#-gfcI8_4Anq zqq?Gb7*az1jICs%Li>djp9iaSI0X_-(T^paTE1PO+Fu+8uShF??B4W!;IOq#MJ-EX zGPC9x%x)}qJ*)N3dtHmaQK(8~Z`ka;;}=u3aIfz)7SGQL;@I27aeo_^4-u*{f-@S^ zmYyHwP8pPRGW|^G#*2xI0jg19QJuht2zkWy!Pr`KiuZ&oLL6su08|U~oDQ2+Hs(je zs5Z@yMIjjTG+5AMwuPU?vZHuEmKDacmss8e_-sU^EeXWXC!U0%QK>HrKI?VcEZ5;Q zdu$I8M5?-$-~OJv^ck4wnORjVBg8Pk%23qX+eqOv*vluBe!Qm{UcJw2_2TEMGCqb* z)V?$%J>6|=_@!*Sce2s*N?15Ccbjug4=Kea$)B1fi4(SK`e}9hW8HQS>9>pXA^|D3 zc-_=TbZUHdJ|5aM&ieY2plDgIdWa1bYC9<#k8$O9Ef#8gA#Ns#5&N0(N=tlKDciVQ zqc2=BU8_Hn7Of4PUAq-NEe!PH7rw%xz@Hil^}VAB{`qa3mctUxg*@Xr~?y$(-;VZ@Xg?G6Bef_R|w?Zfz4eELJp+$24rmAOFDASx4 zU(PPmL2jmUALE7jus0E&yUj=Wr24DFVf<%;Q_e4cHmj2KcvW9zJbdW4pPeWh+7o3r zL1i+MK?VaR|TTBhHO z)bZz!$$D2PfPokp8ENHna&n;FCnC(u#o3`Mg}4Egm2W^46cqCEgTUQKLPqZG>A60E z`u9b>29ilR4D^<`3y2az z9y%vlv~iuS(iu9xtc2SZYxvZ1Zw7kD-)y};pDi7+_f+s*pw4utl5@J4!e2;>FXFbd z@ek&Vd&S>M=Az`Fqn&kXT)OVri9feT0%qOqUVpquovI0s^>Cg9lXBeM*@9IZt=O3+-~YqvYhX@G2xkQiD#>82=}kj@`&RR>P;sp`ly?E0oUH;9g?{V@W2q z4t1n7eF_(Rah?l9@QaD;FsNz%xAEbL2*h8Or!4nAqVDS**JX-y(k3yR=bZ@LnUq+< ze*8-oBXJ@ynS!p?Z(-kBydNG)m6xSG3-aq&=C1E~OAPm1`#;#u2gsdVkF5#_2!0$$ zsbs{X*|sXNRp9SqJ(EtS9{zx%B2scBAkAYVg&msBU#^7VV3RDaq?#WwL)Ry-sEJp@ zAs?TbfJWGGuccElmE@@{x9m`iyD89^)SY)dq7iTljEt0xw+sECii32%f3EiSKUV6U zyoH70CvU4lT*V(uh#)N?ZO%^%8@;R|atird`R3vhhfIYfyS=P(IY?ThcGFh1>QY^A z>T5Ei(_S+yNULQZx?nV=q)fVWg0nT`y1UyM7^gw8-JJZ;2DTA+T(vA@_zj_J3dhrf zr}u@9(S<01Ft`ykQ=Su)KMa!_N-M*q9FdDTHlwwFLze6ew=(`jj-kL)h~Z##Xl%dX8y^Uv;?6I%?eUzhg9p~VS4j;~J5+-5LD z(ung3j&!iDPTsBjzgogim{e6K%iHecV0TN6&a~k!f5@F1m)@B4>{f+f;`djSVxZ33=HscQHFwhf46sZwNlA@PTXLX}A3xs1PG(u} z_X;xVtf!gYycq|E;?GrlVv1*CVJR`C1lH-oBV(!+ssU8Kw9N-OxooQ@AmD<4sLf40 z45%kPgbe_i2-MpKotNee^pCSSpgL^ z9i3#kyl26G1yDaIES8iOgIkuaZ)1c$SA8)8=H6mG?G*^f6j&&VWT>8TaVH5_%=%PZ zM$5p$F;yi6VEPyLdRG*>+e29WwU#`eCi!gq29kJ=QX6J}$8(WArwQ3VQG?AEhvtPP z(UdbYzuiG@)^Xl9E#Q(uQO&9Utzo@bhvcX*S`GZ{&IO|~1tA$#3gDi{8wX-%CfU-V z5$AmtZaU-{v$bi@FjumTzTG|H+R4~Ky_nW%j+mTrE1r&L2-E>M^yDHO3Uy4f?UUp& z-sjs|GQM>j8WkQb5m}bx5$5E;Bph$2-@ze|LSF8Wne1>)sKJr%KQD!{gBV3aoM!VWCZMYvV|W}^i?EYh$ae$b8&@SStq2vc z+s^nO%_c%GT+w#Vrk(2iKZ1>y*G@-Av(63enmZYoBmNo{^2*7F?e0tbjBLIi6&aXf zLm*Z|Uo;p7s3X;X;n0Aa#yw|M%HOs8APKE^q(0-dqcH#`Xt@(+jAjUXRvo)~cr<9~ z`6OPrLw)OA_9j?$>;B?Qpqx6Lheq=D`7=;MB@ywe0aEA>bj?7Dp1`VGqf`2U4kx^u zGieA|EcyF4TtIt!yPY2pjIt4S0fH5PvasE8NthP^D*OBUY2iOI-~wqpP&k|Y!BI+$ zAD)w)-Q=Iy_kK(V2ys0bK8S9tgSSeB%?xVW!7-9Py6O3BgmEWzBn{pMCSZZMFS_;m6bu`H6$ zPfPpS*^}xdiJ}%|`C_d+%NPlM{aa4TC`@D<`e*%;N44w~O;Ud&F-KHZJGk4>6+nh6z@7yll&Vm0MFiWyKA`>2|TzxFkOcjsl2vpBJX5cPY*9x977 z!CT7h!-RT7_zA`%v|&P9AbTmq=8znW>I2mD9P8_>h)|r-aZZ+`)FJ#vTbUC`qGIM#mHslDb7CZW7LQ?A)?N%$ zGZ=L*3eBp?wbyq#cQTMOddW91RqG#lTA}aCp%g(AYH~UWG1JM3-a>~~waAxwluGXv zvb-C-`3q0z;>nDXl49d?zUF-4B3WzFv(AgwS9R8*>QmNmSD0R4(n)2C%FL^VPl-@d z0ZrLjkQ8%LPiwI8DGi@e-Bw!CAGB#Q#Q0tPWIAp*U;>U`tp`{(l zs7@CQb91`YuP2VEY`I8~>8gT$7s;^jDwCxnJ8`B{40`yXKmQc?vBYP;n|XcQt{2LA zlMiMctb4C26hTKV!qZ&WgX}drJ4q|%vz>5H&;b(vvZF0(qJb8q!I$iZSd3?uN7fkQ zwmBf85qm8=+s4!`lq|~hxj3R?A}GY#ysoS@HD?pq4Gmho0E(+PewYzIJe*gY521Esdl-;Gtdw}C zB_!~E`u#OK8w`)6rKOe1@0gwc_>YBmEk?el7hB@=9;~Vo80&$rv(p{hd|JSQiH?-Y zxRM`V%ZNHZqbBKc2T2wrX`SK)`afJlvC!C_w%xa()ghzH;pL~Y1n?IBbK2PwS@S(U zkhT^F76X6>8V|;ju2Ker;xtm};{@gxv@c>+%rZMUsw=n0Zwo!o?^aIhv4@STLGewL zO3_H|e;Dj-h_ffE-{)F)4E;_HuJ|l>nK<)Rvb=6Y2rfoFlX0@_iH*fk%ENMcFR5jM zD391=_6a^2)D7#ZV)5*YKj`tyqm+M-t;t8P(0P`2zSX;;HWWlrCycGhJ~%n=UU8qX z=BQ)IuoK-X=+Ncxw*EDv1C2uiQg&k+U6p?82hES0cR}b?3WqEzTJ#VcA^*s0P2& zSbnoeMMS+q_Esq{tYqvv2@?Hyd9qZWDBYp(8f1mmPJZoji(py( z1Z6qgVDPRnIlztJj{2V#AgN-#@D(hkRz$ez`jd>_B#4vx`Kk0nII4m(yg?T`Wd6V; zk~yPW4z)>1eqKj=@S>_ah$K^)5wC)S=M!alw(Vl`XigBdSB(wI*A8)REe2othW=A> z2+P2XV(6wEPl~;^Be`>49xPp2PRIS{($V>{vcwu?Pfb!ShLA2LhXaASe7yC~v*BOA zT}&js!tiB-%|C@{&Xy%&AFF`k$d80Pt)*v;V{NPm3Ue+%&=}f5b%MGgN>-AJ!WB>P zU17-LiL(yss2ySbWm%nDRja_EO7n}wac?~|)VvVOeC~-4MAO|XOQaJqy?hw?Y0-4@ zI;R@1`})=3gRF*rTfC`P9}dB=Z$1dQ`T0AP8=8&C#V4cEa)w%_#79uKyEkcphXFAl zc-?OE4vzc3ydB8i)fWHybg#=@K$XL8*w*4^W6qi6ygek~eQo>hTPMNoKMvV)>Zi;3 zpA!pvZM{@@1=#Z42Hfnujm05Gq2e_cNIcf@dXdpEXd=%~H7nqOIouoynRLDh`9qLS zD0IErRPN_j#?Z`y=y^L&wY9lJ^uO}g7`J)Z%J7f#i}PI6_+Tqrb2F2nI8^Ky+92x1 z?RL5T>T}zLDl%qj+5YO<>a}8oQLcAS7dC4&GQsp%go2Mbk)EB=^M`aIO>Spn-v)dk z18DL?=J$7{gi-FIlDQi4CdN3%JV*&MU#B`@vCB)kfmAuyZSYMW`~CI&lV!n~0{O!} zkRLzCh!+XLG7-m+JCx}FZqYXa64dPkc+4cnuwm{kvC8=hJAsmLk{mVBf}_JAd2jQt z2y`?B%)4~tZ6;|`?B$|A3T1SXQ@1v^4`zcnF-&YII>&lR_r4v{mo5x7d!67_Ki*&o z+H5>8IBND}Qnp`lXzhwwKOto;rzJWqM))GT*5fjS1#e8N)IFt)MyM~Ld!4W6y>Nrb zVD9~XFXm4BIUn?MJN15R^i?~n7Hp$#MANSQrE;C(aI4@=Q~T1G^Wc{vzJ6h!ooji& zl;G!<b4&U3@$Ewte6;<2QR+eR9!5 zYZ>#$B0j#$QUbDaiZy-w4vdy}!p{`>W8R-*8MvT(2H)2)%&kn0NgrD$&u>j;1;p-8 zwwW2tHpl9$dXIS}oZTqe%Tq$KrC*IZqGr5#q$^knA5gRD%I?#hpIBFS`2BRR#93hy zqPP^LjEMeQIX75j;6TiJ>a#087InM~>%!e#()$aBoQdj6Y;4u8Co<7gZYi7uni=KK zo2MZYO!D)4uX|x``jwWytJ-yzFy7(+9_m@nQ(#|=vu}IaAcjT_=VXRqD5%0N$(ldl zzNCt?QJYsgF7zqrPie z2%9ns-ZKBCS{^h&?|xKo^P%sDo_LhUO_CJYvLRjUd1;dE=Jp5w$KZt7GWq68x^*;| z3LZdkg;MdQS^Mq-?O`)g={|AM5@*RgHVfrx!yC(Dqy8E`-T@1x^7J^}8^~ZhiZ$1l z=gq-1;(uk*|2|T+h12_?IP^2p!^A(#ml_4QzhiZ`9=-V|(3dYNog**4X=TLfubE(*v$Jak60 z+oxgaJ@sP6`nvnO%9lk2nsiNzE$*kd-bERF{uP(7Sd&*@fJiOI`rv3CiX1lCqoAKG zpq=87WZlTWV`i+9(6TUE;^)h4w$h^MzoR*8--{~1ihRFFyWC0^ZD_`VUU2=zi$53I zOdg{`Am%j@*Z8HGi;ta-aj`?{^K6zG&)N0CR@9ZDxK=QJK=^YeZ(WdXruv}t=kI+t zzf3>&Pez8SI*;Nj@+Qcz{K^xUw;F#_SvbKrXZZJQJ`yaIvS`sh;gxM#(f-a<5Hb8o|Nj=wnCTIaluV)=GPWz-OE${O- zcA5a;(x(u7h{(&b#m3?C5|5th zAy%z>b$_?D#fS?xR|GV+@BHbn3UD5*5-3`*+4bRcv{jB$1?|RO#Sp31P8JKhJy{~7R(VeYn$z58s0ZPMsBUP9fhyW2Z3Sxr8F4-OUQ_P*+FNE;gV)tM z-(K8Y`6}}(PfpIUM^0g&{a6;b`isdC@oSpi)IQgIQ+|fRTEfP7u~wn%(JXvjqFdlb zXUamt_Y^JFN=S8kX*jBKJ-KJ_Mhf?3Uool+dv5i~V?EBYU(S$c?D>_AuZ3Tzj|WPt zy>)TMU6uh^ba!2?Vn0U}2N}}B!(EQuC>m$|W3xviN|{Zg{-^zIK8zb(F{09L3^-*7 zAylPwn`Q*r6)8r*xE9!uL{7%s=gv^^8AMmchboVrN5bG$yNY~f19z06a3k2|z@Ik8 z6&tABjvEF54=jqeqCr{9Yjp(OFN=774&KVd1LFW~8=4{{vF$TdFSu_SjvP)7;HE%0 zgqMMBcZ@Cl(_z8?fJgRKwaHE2)d-L@jkQvh+$*t9FP0+TVSOM=05~D0D|p9}4ThOf zU8tQAhjXeMZc08s-L}viDf1z5*{6LrGXM@$jEP;f93D82G4 z<57sS+^y#QI(1pUTna-^i{SuGExkia7ng)bb+j z&XsY>?hX}EEM{3cVM#;FQYfN)PX4wc=>M6foj#?+X z>NRu3_A9bqJ45*pe!%}8WrnT?c!CC>QBpDt>c-fbVb*!&AfSb3|WJjz<5V6G!VbS(osz6;n3jN zeLY}Awr~sms#zwj?q~DGt<|I}&tAic!q8nI4iIE08UUp}#sfuxaM*3gFEKbFd<3QL z!YdE~fZcbgjR1yqU8?6^da2_4W!I*tQTs^J-J?9v>qnj=dil$5z%&3(R%L~}DzAy^ z{(EO@;9eBth@o*eG zymPPqf}l=70E@`O?Lu}cMLb(AJ=LODRi;wu@V@@lTgs(hMRx>Jqc7OJw- z51zvzx-BdxM|w7V>RmO<4Jn>@^1ip1^uC#WmZfJJo~pwf1Lxa4)i|Lc+%2b5l6Ygr z+5zscqiC}DJ!Uzp6#C^olre3mle9HQctPbKPP)+z^kjErCljbikzb-wX zW9$n{3tN|J-t-kd=#awwgug`Yd0{D$o=(b%3|?j1&6;ObIKK-J|2d2fw7 z*AV4u8>WlIftOEZkH3F~k;b%3Skl49cpuIB?h<1-9bUcLlUXO1ugs7Ho#H#iaP~hu zV84BO0Eq$<3lf);=YuQ@x5hB+jS9D^rdN_S#$aJ>Dn$3-{Duwd85@(HSe%t9oUc>c zY)2ijFRL$aK(CvFFGS?o3i4ORvLBTXg?hD`K0BgN_u#^N-%yzT>ijDJ{<_9C#YUL| z!xS?C?h}+_1+oBfhX;(|Tj9uCH=d8UM<{~2f0w<n=EvUcv6|!Hs;eb=ED0{pOsksQx zZ(fr}oc9KI)~N=x;)92`**^DY@9uh=^M)@QZ^XVN-7~2{Kvv)QnXs^~Ii)tP{l=Y{ zy7aKg>xSE{`y!vXqV^BA7r(z3NlMEW*mC>1cACDN<*|@{-l1P0XelFa^8ePsPg@~z zq9gft8u=Q0GVA^RxtET+e_<+xxj)u-&>%mCi)bn_FgRS2k-&rB!B8mZdF*Q{W+5g% z)gMJF{ibV+W$|JXrkp8B{2Hfpv2O+0*(v!HuC}e~y#2K_@l^gS*?a>B9VPw9QzysG z`Sj+XdET{tHo-1&*V9f-bRd)fPW+vsvu?N3ns`voiH;7b+zgSpwY}&24t%9jlSJ6^AbTJb8lKg~ zQ;T5>FT!&6(_^M^C7axhtmJ4gUw6kOuPGH#(azf;8|`Kgv@vk8k#9e@L@UHv1Y`{O7PWGnv(Aq&u{=2Al|Q3%|Go7RhSY-etBqoX z_nT5FlHi?cvt`xe7<+IU1Xlb@O4}usz3A6~Qd_Yty>8s99Tc7ocl%i}AOLd~nMn?5 zp4#CRy{J=NEK&Etoca-Y9zO@+4QQwWhTwW(v`}Ot?rv9f-o|AZ`N5jv;){hj#d=CScp@}&xxe3CH zZriKBRTpXHNF7Z#y25UkWc?_XNc!d`gK1?W@XQaSG zcM8X}pQR}~sF!jwyh@(J`3jzCXM8NG2D&P1c>);(`J3eL0{-*BlfLs`_TG3eeATCf zrSjb8$?lP+K|_mQR>E3$$3|7NV1IN7U6O3-id!wrkMcyE#A9Te-pe>qH0Fi3*;SlP znK4nLVLpz3l_!#O0(O_w6-*V`W$X@5i6?6)xk82rjJG|*Yc5itG)G{T<&EZ?4^ zAtuFsaIitihaI#zWgr3+h_>)ZKL?+WsFG#I)~M>1)qO}L`&Z|Eh7TDlQiCIbB9xcC z>9+Visyp5H{|{qDt*QTvou>IJ40!SVB}XM7RB7;IfeKgI7gN~lDR4CCbP|=9{iVps z@h~46-`+$zZLZ@({d|ljX{q(ISN?j>|E**T5QqE6TBIXgetgq0tMWAfTdh>adXvp`i&EJ`y zZBtCa?JlpM(sL9}*P+A1?vNjOSL8p6AisX}d0d&z&n>>XmC|ar632aAU=XSN1(zb^ z8CQjOS$Ohz)j5J$uREtPSt)n*ie^myg2Dk)M8%8rEJ6~hrnnN_FmRxd1O{T;m*H&F z0+mu~!=``Wfp-%mh??7VK3{f|1B?WxhYRB1@MmC9f*%z@pg`2|Eo}pB6~%A< zZ4Z_|wLPo1u(jCMzbq+7fx`I)*f4{j64Gl-*XNGN?hr{|y#S8w<+9z+$>{XDi01&1 z5A&i{s>6h5f;t!|H6gu4lVxV&LUbhfPy!gXp<=-SNmCZrTq+wcv^2FRFWhCUMSB0c zRq%izm@pOq*us-Q0QqpvC5FgDo{amyTY_8d%D#Vv6VeRmxD$DaSSih3nDjY0C zb9#(aj(>b6LPHW62j(ZfbgmH*SD0nSA<-daN8n`8Yq1P@=Wy3u2C^~pic{28p660a zRqV`;Y2}kJd|m~nIJdM1c?o01}X8c%2no3WP>TGz9(l1(=jz z#Bs1gd-}4+4j*_1iz1$E&;L@%5`ro9 zO_^L>n|_7D9!B?$(_Skg)j=7@t`F5fJhK*<$kHufu! zwI7Fm)-q696_s&o)}!v&)Y#B?fQ4_EUNa24leq}5t0*&uof=&x&2ClhDYlMwK4BOp z&A_au#u&@k_UB|)b=TsqFfI28-L;o%#|@PM)axE@LhwAxP>I zbI#=NjRwl7EK~e&t>;uB35I%#{|8-jg$F_J7B??e;FHa>3uD`u+BRyW zms!>!E)*p7x(`!qMzev&0>@n6kOvswd~|tt=^9MG3Zr5Jp5;$I6yn!`!TAVP-w~k{ zQsui0GhIP#!`DE$*LW)i$M*a!nhrP##w5d11aSee1pt1JCa6xS*@y%PYD5c->GKhr zK0im`h6mVT0F;fwO%Y@+)(wkEpv|BKH0%~#Oala}7c$>pJc^$;i2z99wdasJ{L zWwBWxz+L*t>8dM~GTNCCI8i$s=+MlNJ}3cSvc8>Y=&kP>g9ayu6`o?JtZrfD{PRQS zW;w(*0S(mZrkAMYt7zrp62ZDQ6@ROTAWFlGD-)SaV?r9qOzW+lM&(eHqQM!-F5}IW z+kCl%sl+(0%<@{<^BmROq>YZJP!q|nKqHh5bATqTc`hkFDbEjYVvhM$PIL@?jGHxk zYhsWfJ0&(F$-EL(zg*w%QmFE4c3!zeqLY_c&nJ)x-gS^CG-XQ-4B>@G*5aF`n z{YGL@Zxww7g?EL#m}K<|l|_Epb}?;^4j*sz774-+LAfTW*ZkCoTaF|uBl`jZ4D3N$ z3rnoHe^h^9J0{siM?=hlxQOUb;=E{cXv5n^)`9;8!vQD|j+rcx0W&9ZOEJaVl3lIO ze?Nr6O5|nuZa@&G14XvQ2e2L`lCB1OinhSj!XTiq8(AlPG2?$b+5cFa_cuF!wckHD zm^7~up#Uj$ubzP=SAmMTk&((Wu_)I2#lP4`%MuCt;m>`=nGt{-F|SAMs4y$KZL^TZ z%)xmjvJsZ4pXtPmCClI{kBx~sX8@9=wsTE1jR2~`ceBsfh)mbH0)KW7!!mDjyJ4aX zMSJL>%}@I5^K;FH7HapYrjX8oY!lS(&~MvJ%rT$bkK94qjn@zD7 zKCZZ-Au-zSw#*G5aE*EAg8WA1hMie9EbJxLo{+Rz;EPDqa2++yHV#)B3EcOMsvKeA zZX+Q-`Z`3Ji-Loy8rYcy#6LD?s5H#pl(W}WyjPQ7JpUhx&A+~SI7#-V>Q4t92=@r_ zTa9ZI%ww2;Ti|rz2%Mry+H!I0IrA7P7QD7z5f_?Ht?xG6J;pKYbK7!G=okb@mFYnh zQAEB|wX$-E&G}po0UGz=4H|2GA zS>mQ_`%lgFbE)#-_JVQQl$w{HztwrCNuof7A+|3G98B}xUmWACJ0y3&T_JTgShDAU zc&XJy=E3K|{nqnbA!*9{IIIFBsdYa0%hw*O{+h6D(%8Da{WTtrFr!b-lPonM2cRo? zt3c;pk;+95YK#Xgi`^W(t@Toc=|qx}qo_@eUkymh5d8Vjool!@tK~v470xvOl%sVpyU} zGU<3#SMl#Ce<{jlC^ytMxzxvD6kYBmY4=}^IRc>GEPtww5o7PCdSb*{8SiRgX_ zxo)3tcHH-Q*3SiQ3+Lq!J)UD-&kq?%^SQ$WwBEwxUaM;78=>>=?9vCu4G+$#4NflJ zO?9j`-xhuDLOZpExz&i@wl0_)Prq6oBG>PFNGkj|UK_xiI8qfdYbkVRseat!9XB^k zM{O>CGfsob@ zhZlxD^PGeOdJ>Sc{0Py2uLz7K+Qz~{2+KbTWH4ifq9ed3PMV=V^m7lmY6Cv-acu-` zOZGWe0DyzK%m+DuP}^qm-Km!J6!|1y#Cszq#JTdnFh$OXF#<5OwwD_E8s;e10Ov7S zI}roN5P;%k0(w2%8xalC9<l!n)X1)QW5|CaHRvp&pD!io6f&+2GC?{RKvw(s5TO zhR|XhqW`M};QtqU0?#wNLEgYcox@@mIMMI~KZ}lmm34S@^bIAYUoL8?(D}i<3rGb3 z9Yx|*iOI=#H#dSlkN2RpHBUYj1+%P15{)jl4gYIJ?rhszhcs=;s5fQKI z41$%aWRid3m_k>HclQogSIDroj|}Ko2k5c(q)h{ZLqnOr%(~T(Vrb`X&)xs{fznrL zbNtpP{gK65Xz9^VR06${rh@-(LLWspeK~iDNyuJ38(FuJI;@*y_-SK*W7fx11o{EG zf<=2OQsZ2%adCN>K>MIsZ9nw=z^=c$@%zkMFVk|;E%}t%-%-n21&-&+$?Ix^q`4~f z0YAMQ#`|Xf=v@u14Qk!FYown>gD&p+%_`O>>CwweVstZUD0U-_qb%Lg(jHwyr;saO z{^r7wk5rN%c2HP)#8~|W@twRW%!RQku8$5n8!q>x-8*ehYLY&4zhOMhB4F(zn~bYM zzx;H5kj&ru#C!CZ_Ap@_?J#`EH-zS97>If0?X2L)ueRlKGSdL3W7cAX-)2MBf5#7< z{}5IpPk^_)#6avJ=k}_Y15pD2Tw4kL>{xOl0=Czs8&2P`%6txdc3+R&gRSg5#=M;Hiw(t#g|u$z=SnAA6U$YBMJfJ6~y zD9SlUikpGxz4v(OT(o@jfHCP) z$~UVg*aP!MDekh%>JUq0vu;^bu0bM&)m6bq!eLZLdP{dVLRmCGFE}Asg)zy8t zzo!dfU}g>v51+L9{i&imj4=B9ccqUX-+-DgFnu-D*U!w(+BN8az^PbiJW|pUMLJLv ztf+{UK1>k&9#=f)#}5@XHSVe9@$o8~zv+p5BHJ#ONe%M@C1Z+M&1v1U+3{qnro( z`}^zbokr{%_HK@jpwM&k?1oArY~{rDQ$<3T`Q>tBZ0uylJAb;6Pi!xr9SEU;`5X99L zP^7~J0X#=LzGtuxud&6%#JHPEelQ}u6Ho!O^Lxsj9WAUyQ5j7+H)~%4$+XI+P}DJJ z-7JpMx3`N?)KvHiI&X)MI?8mNJwW;_CTU>STPjj79g;!P#YFRhdI4(I)>FurtPyBJ za#ALZ%zJoae44q!tcV_j@xX|~w86OiQ%37}ne8=;>3S`Fo?-7eMHxovvB!RHOhV&v zOF20y(eK@}^A|jEcVr}km*J`Lf@yyyOWeoG3oyLYf)p*+#3dKy{635 zOM8c-Bea_LI!TUHTs65;o?O&}QUmMfCofHVM(NBQzJIrg<6h9suQ}g0{J|j8-_Uvu zSIGU)On7vC(VPmC|dT=rr!Pd%#b)!&`zHh0X=EDbud`xJHQqXnVD4ej~)88*B% zKE(Mi?_CAyZisnWFL@_Um&g+5#5KHf!xDd$G@ZL-7B|m&1mC^)Cs>(})b_Mpj%(!{@!;N~^h9wvGFw{CfqHbyA-rKDu!Re+EKw01*Lz0^=pzHH1k7 z#oiW#UftJ=WH+^ljyY0NQlL8;$dyY5Uu>#cg9kC{1BDUG?QOA7aC;frn{U zoV_O>=g8~5X&)KhgS*mvstLvD`uiYJ1uz-oekAy09TYG~Jj7>LL9oazvi%L%E?u#m z#=i0>6A(WtJ&)gAu9APXf~Es;AC{w^KGaR*p7$sz2|Ta8tet&l0^>NCj}@~(ZOU#w zOr`TGElKrx=IMGghT3M^oP1UL2m-jrFLe01TaPXqm(`=>O>RHT1v?vs<{jz7F5k-A%*J zjtvvd1o?}#H8sr4%rzAi2351*FXM`9f|s^-d`!%?vy!4>Mp3IowpmigEchH&R)y(5 z3ky2ntc?AAOJZVT7Z;bLTsc`;a|;UyXpaTmIB?%SycW$YH@g7SVh{$Yxv_CvVq(E) z^-s~&ogFhPtCCUEdL5RaPBRWda|wy=FVAtt#>T9dTNbm$a*;peDAj0LK}E<`rLZss z!FoKoI#}|1TxC+FZ3L>ax5Lp;Q3!_4D0`vcZKp4R1S@f~Y3XtUX|LcS2H1s=B#l;> z{*m*?C}ex+DD-bb1F~6$!WO1Zi)_M^ms7ab{gJfnejx_? zNN)15=J0kusLQ4b^!E2A$xPzYkcG}%dDVV+4xIWLq(72T-aq!dKB8ZjnA2o43K(5& zEosu6iy$5_E4MNZPXsN1kwG;9ZkOtdV! zSCLOGnv=HjuH5TO8bUUWkR`7wyZ)5kSKJaPW}BJg zZ|;L-jB&lDpGLxOLy4Tc(o(ri418=bsia@`h5f9)^S}1v118h<)V<1oCqZCp>ysNk zEhsQi@%y}Yt7a})-@A<4mAz(ekh4(^scsYZIIsN^IcWH6US4v}__u0bi9WN9d{e+x z{aowe&z}ls8Z^!JhN`^1kw+yU#kbSA1)GGeO}Z^cm6|qX(Vdeo&0644DH65yzKx{T z@oODbb{ErV_ln)|%R0;J3=sltVk#jcRkP*oLSQ`VebUa*_ zkzWn--h;2~bA28Fttma(Ot&FDvhzO;Z$#n07Dc;x^pMYR`SBRL4rpQ`nu&wQH`uv-v>2Mz(AKiB| zl(n?nPya}{xLkp-DbV^9`rf5Q7j#EeR8@tBgp8Tj;LyrXjPr-TnFXy=pdkv}hG3wg zqGDp&pi%qC(#1;GXbrBPd|&_%Yd(Ofvq9&veo!}(nEA7njUig&l;;F;I>ymVEoV9v z^8SLr><8}|c0#bBCSXJ-=VT_~v~~4w8uKrIs^gb$XHbD0Y}C-I)4umf@<*q3x%jHF z_`#oXyJEgdht5{wyBwq3Z#A>_3z9qi$r{X{4U*O3r!txI{lkX>h<$W)zKF00WV%aK z7=CWqpdD~DQ^ynOQ2OC*)kE#CLjCRC6C_JcBpP#sQim38wjs`^xl*cGE#jveg~%eHOjft2HXpYa$ZJ?~ecaV-Yt58W5y;rj7StZ7|*&X;xZUEK;_27L^s zI`+?5y{#n?IJ?f$F23;a-)x@z`mPl4t`+tTak$y=Y_HLU-N`At{px0AW1|W&Yo=%m zLrto_`!M@KOzf#Cj&l98?%abpdypC1ajbaeerQu24JyQRB9`8TQZ8|f&%iGHLwg49 z>D-!qV)e#2ss(!5lB=uOGrX(*1>Pq$r~TQ$rBERUSwY9`9xB-e4Z{Ih3J>s$(8`l=DnqZlKS=!Iuk=W#Q=CJfRs>$U+S2mgNEjk)H z33T}-Fqm4i-ZYIQT}m$K$e?aL<#Rrvn*P>y+4Eq2-}y_qIX&ZY-K9;6`A4UR?cnGJ z(Nfjrm7VdRqtLz~fTGgtcAS;_3&qa4a56`XJXNmXMTbd5N-XNrP|-5i;AukRT5tX$ zxlgtK(Z#@%8q55-&ixlsLL$I#!Kb#xMXc=)hUaP#EqZ624ZmI?B!@+IR0*K_J?`V> zc3@E2;WJBnwy!-Al8fMNbLeS8$-gzh+Pb9}k*)96x%86GY|E{b@L`Y`RTLas{*fo7 z`jDezQ}ZmzQKe>{pK-d_u|R;_!&V>f^ZHx+(HO3{;>b;XR+DEak7&!-op8f;k35D6 zL6{GCdU+5;J1g%h<${azD`R?VBU;8bYrAdkp2+c^h;ccD@!o@+F9kS_#!^9%Z*1jR zybb0!+LIk)8ke=3Xxo+k)7aYRZpymJ@FrVIuB@KrFv9x-dPEUSqPscF^%NN!@hA5J zID9>2iGfAqh%Wx@s=XpfZNwqX1fs{mGRfH5S4Xlod@|d#SEuQ0OYBc;;oD+dHL|E> z^i*m0-APsZLC3#t`$g_G&!#>9m?Zcx-VV#)VYlS8>K_*inJ7^I&gi>8$+N*6L8}|d zDQnz3xTc~EKoU`n;gV;M(PCj>2>SZGq%gPoXSLE&W?izkd#>C~TY0?D^NPND&&xaP z=l#Dc^_O8#et+Q*M_QGq>RG7+Gj_=+_q|&4%=lLFp)N-+B>ODo55PIqmhOrb1&(V@ zo`vX~%>RCXEsF#Yr~7z+16q_p;50}B0ZStg9srU(gdfhMHJGf?)cOYo(4Idp0JkMz zh1O`ss#LPEmW*jOsbh_J6; zNA#ORLQue3YGOi#EYiflV6^%J9q#Sj-Jbc{isBPz-1qO;;0_0*9AE?)nwZ2VC26Uv zn^;;7tlF-vuhWN|o}8@OzGY_DTQ2$W;}>YB=HZFY-2tJef3~(98gv?TN)_qoLMm%& zCO>~F&Cj=Wbd;5mSwFi0{|33M3=4aA_eK$l^6KgdM3E{DCNKNi4OPm`HfxM7!CQ1hvyi-vQA*&TPIHk z=@icu5rklGntwY1uxc~)o1(M$QX?8HnrVPCV6{J}i)6pESFj9~oPj3@s(-_A z*<49`%Z*cSFLQqz#5%`$iQ_Srph&wjh*|gKi~vyDmb!pv4LE(bo|a_ritlE;d$P`{Cj60p>{l9+Nbr}2kD^4LdnOQT zRd>gT<5Dbx!%1cBTsWwmty51-kw}kd$^}l$!^7h)Yh}#{2}r4SyVf3$Lpll%$L4tH zC_WGMa*Y=0{E+3nRKWEjqVbtkdQ!+-6=XtDX z<0(Eile9sO>d(AebFQz(yM_^-s$Xf1Uap*Y-Awjkj2%zwKl~J$x&5U-Kghg?Id0PZ z9V_~N-G*f*jRL-$w)Qa6id8O|f46NbDx4~JdwWkfWdPasLVL@ga|xYf)<#kS8_4Bk zTz8L)t7K0^4T(y3-*9w3I~seRI6CcXo}DPt4{Q@{i4RBC<)izUU2H{Ri@5UzoZVfm z&$)b$-EUu~jpSyzwc3hv$}u<1&yGDxbL*~a5jPLq8WERS4DdP9yKAxbeBWbaxSspM zAiL}+#JG<`<~w^$44yhlNUVMe*640mu+Zh@l%t!QCSyf}RRp=%$9*2gPtvL*_lcoY z{XJZSm#1I$9YsFBM%F?SVf^Vw8@7rX>HT!kBW^C@RsG$;_8mp5ZVu=CTCl3&LgC}x z=4|v17cR^}GDQ6_o!{Em;r#Yi8=p@8poZBatIQ5|FFYWCWL3{0v)vOZTw`aRn!h?X zF{{jFL!Ls!IrvJT0u81$a@0GGobV@gzVg`7+Uu>8QGIjNVK}|~Z+T>+K8RA^$CShv z?}+73TTE`dEjtCk%Y6Q)4)v{h9$-#txY|sxV==K$xOP*~HKO#65|dl%Q?|+6{?AL! z#H_9cyicx8ZWT3e3QJQmz7A1AMj}aQDX?8@@`~R5@31!aw-*41)_oDC(LVZK|~VRgm5{M0|P*JcVJ*3 z+=zKiJd18&X=y}cWICGx91Lv4bbEm8s{fePiHcgWM@L}Y0!c+j_-k40$A`S!E*RK%}5gVGCrY9$NL_3MWK?K9??eDTj ze{a|?Hb~3JB!j&jYinzF_gfqVffv}=St<;~I5s?S;5HTPmxd8?NR+7Y82`q=z}P%- z1z&-HK;|_z_#GT@u(tLI{GmWa3j|tRXXPQQT!ED&xFe^DuFznjiSGSW@e&OwTB@av z@~hr^@a@!SfwIF<)5D=X1%WlnB^`TbuH^Am+EpdRd?PD;Q_o-d2ciNr5f#og94n$! z4Do+-ehU$OoS!s6%Ni-^(W~RX?o06rJ-o4#^4~f!zuxqv71FsBCt8eZ#z7c;j%Fp* zm*}I^XfTXhjW>OnzPp3;kI^*hhWi6A@j2c_tE!61)cMIpjk7Ih!S7;fpGFrPJBfX9 zbP-6tJacJ=0OFJtE&8NAspqZ3wc!~jdqK2MwukfGp3Sxj|z#ULS$zF=8hY|3%)+jA;3#;oBlp|d)={^8>+dB4-oiMNBc_pH=lJ@mI46!2Is(VD#OjgeaWrQKH{vTya}t%B+u{Ekco#fxzUY+={;7XV~Z+j zY88DMM(&Z>-y@LD=9hS*TUoC!JR*e1S;=@Rb(5u;`~*r&S1USm%ohuWB3qr;&6eLa zDphE1i||Ao&y0F<#kV5?9K8+Nh%v1_Q04b^XG&k1gJ0y}hrVaX=u~iFYIvz$2nPga zZ9A7D$sZ71B0+|RWIyiTSaP`n394rCHgxckbww(l$9dX(!COeiP*LINb&W|JAfh_G*ueR@ZB3N`FHVV!+n6b)0J5?2*s|GI zox0_a{5m=gZ@R{0SnvbAa*^hymmiY`FV}@W0;5Nx{NUt=_t1oAvNib>1mE2Wuz`o) zZVRGiA3Vsdel$mNk1a_OoBf?Ru)YhL>H!ezpZxdIc64+EdqtlrIWKo`6fPGmf*3*AiQfXZD-^<}j4K(kMH@|hH4i|6a{I*s;F#XVj?f~B>LZs zHsU(iJ@KO%?Nw+g<7Jm4U13Y!;o*OqP2d+94_ZxmwUW7&$){hECDnNrXZhHZ(v#?; zK@8G4NTFTU4pd22mY<~hmA-_Jh7WwoR7BMUIMG{z5eX(mGb_D^(Q?W~KlrNan;Y1C zAU^kb+^1!B);4#h%_~-UU4WT}q(TInbuMwFTZU_>){B&$jzuZ3kV1$Um%TgIXqNBe2}0fs3?$`*;;Xp{KAetiN!~OjbLF<@obg(c zWn@xBF16B{t;nGIv`Cg^w~jCLtxE z-?%C6Q0usyu%wl7$S5u^UghMd4iLvfc{`(_$Z3iKaAq~~b*1JDbG02D3;uC&sK&CL z_?$z{u$V^w8&$f0zNsQ08y1{&8b-6#ERtJ*2@4F5=7qAaAGw%t3gc{aOQ20uAGz72 zHjFmaYPp-tFy=dBcsOWl&q3t>uNJ^Cu2Di`1nUQes#RisY7UW!g$%w`h3h0wX!4#beahQGbZ$4P3QCaUKfoMG+FOu;H6Wa=^p^p@Hpg%5QK#w zOW5!0Obl#wgF|H0S!1^H@VB}b!{*}YajzU}k}}#Mza@=r-os`5lLP<#w7;@AgPAFs zRw3j)Wl}D9GpiKpo#48I=ZuN6xud4wutr%~St1U~|Gc*X6P$QCvQOe(^<9&bL&nCI zfGj@2_FB2&`u>BkXE1je=d^^)5)nF})n|VF{i-`Hk_IyJKXU(3)B+XhBzB`WJ9p1$ zsT(mDPiUbv&+fDO)*f{TwC6m9D;IKG>5r(Gls*%e+u!L3OPwYj4R8FJlA44fDtMe} z>PcRWYn##ml>K?ai8w(YPp!zsp_tg{odkfv09LsiGzH5gN%`i z^VWIpWh6E0QGIFCO1q*7+5EMptXx$;d0*dvo*nNGbq2xPu^sKf7RrnmyqtqjZzM%s zZOJNG1UPO6es9Z*2Q&{;zK}RGfT3{NgeSY9~5!HBf+kFi#B_T( zY|rR;V8T$0$8eepSb$FJEQ_gXaFKh9AXHCr2;)i ztIm&HG)}Gt^)I^GjLg$^*Dv{X9zEuh5OmcnYcf~2Y4YAHy~b^C&y9q9YJBCap^j`w zYr+Ac?q|*4?voXZQPOigRa)(pSzYn)KE2ge4n%hEA(1h&aby%LQ7O=Ni&P|vMSW3lcz=JNR*LaY$M-&uPLk#H`8kKfB^Kvn!lZuxXufQie@9AgD;L{DK@o=fLEYsv zj&Pt7Cm_B(ht5CrR3YSg`&z18A=p#^%8GV=>n&(C`)ld^F?7xXH*VxZZU zMCv%Ej4VgE(f;CI@3yM(b!n>)q-cHD@`&N$Co!I_{2u8+Y%7@){iV^x8v;i@L7FnR zUj^MGP7AJ0xia>Y<1%zx88>P7fz+HcaLKvIY9RjP*|pyt3W#s;+bmwJuwHEP?Iy)| z>x_cpU;@Ab?URJqVN~)z6H6fg4FN!;R8Qyn4pq#Hs4lCWgOk0*fhcEoR<1JPO<8(G zvS^4Q+dM`1BffIw1=t zvbK3e(JY@`>UT(SeW=AO@ZNH(Ty6Lb(7m3SzCNr$SQqks8c$zN;mz0)62rL};)l^Wd9nkW#XsIo_{&)eCPfPV0h_B$ z63UY2Jk1=Kw$`s{RqX_fBX`FCSe{uWZCK>Jx6er$HvH6>~AkEW-4Zcxr|&(PY$prqwMIsA+W zvn`?qq7i3|HDnXc89R(ZA_VyHKW+yVtJc6SxHwM)Gp=81W-$8Uxa-8sr~ zOd%gi;E@ujB^G0%{;<9;6Ze-Cb~))>v3M%gh`NNPODS8mSk6ve4L+t;ubXO(-_Eq8 z)()K3r5=@8JXPiWTZkjWJvQHbeeLe1g*|o^UDTyOD&2u0ML|Ing9=e@X4}HpH+sBH z{0Xl2J)<$10jbpSpUM$+lCr`qbq-=UsR`nq?KSfMIeqhMZ)@$-Fv83wzGXRDibO+i zHEs^pmTvL4cX+LgG_*zw(aWq@BrVJpy97kWbDVhvyVp1Qh%!CRO{Z@0^_Dr|Ps)!- z-}!UG^1nW8-7)JvcF(6(Ok@i)P7gA@LQ$*{yGlUA8!w_V-xlH((h7ZwMX)SmMOOvE^3lgrJEs| zY7c(2Z*?>ur^w#G7N}Y!f~*p5M{QC%^Ej&iHt#obLhc-v1BQgHl&$7`zSY5l7#t0V z&U}ku;4z57VUTN4qVP3)O!rCGuQwQr8$2I!AdKmp>FP|(YI5mtCVzqZhovR9s(rZi`j7TP|Fi>_o`Udd;YfxMMMBQt&w7`w6&H1(h9&xw)-!6QzP71h?-AR6 z8Pvmw4!-9xO6DSD+-U&YO$V#oM{*h(XK)3R>?GU8Ki4;+{7cNG2sC z&GL-&s-sHb)H?P9y{1yfptdxypH%$IsO0F6iC&DZLeZOj{?Yl$#?G_MIcD4PsI}Wk zx{s1|uN%pq!@O70;E~{FMJSyGQb z?kmqTHf3LQBz%bS%&BFy+tmjKZC43Kc6v#NED1UW)j_t~vf9%5Nv|2EpgDCuz8#)E z73ELDi1G*nL4&Zie1yg0`F=WMf>8tPlpU%^|H-Y$$ zBqJTX#T$B_8hU~B|5z6PTJj&z@gq7-;kgiH>~FMCXZHCtlk`P0JArNv?hKIyP0zSE zvr&T}$iP>QxKD7CbN4e8>3H1ilj+XFEqb}MupxUP%^wI~ zCSEO1C}F%Ga0gD%omjp^RK6J-i9u(cSzef#NO(oB*lalIG!(z}U3+2vJ4q@D@rh1s z0*w*t4#a%%zHkL0=Gjjv77-Ibiz`}@l$?x?7)#zX@I)zg*Oo4-yy;-J&TnH}Pnh?d z)25WT>omy6X2{bUbobj^VKhrmQ4mPD=^1&2yf%ejcanb$bq4`;x-?}2F%g2#=iPY~ z-?Oc%kp0>mZoh{@6J4BDRK`2io{#9i3C}utz>M{rWOUK`v1^b+77k<@onL`wHn*D2bnnomx^9)v1a>EpE)PV ziwsqp(;Ug29Eq8B?PiH4kJZLx zBMQcWs0Rnztu@TU=u@^RzemgEBz}`~nj5;`Kf-@o27WZvhHDOk;T6ux=B zr!{Sa26n!jv}DyB(G}rxI{J<4r>m)%lK4~Quo^ad`5~|X&`q-vBGU;n#CUfO=Hst?fEvf2u|aXSLm*A z=nk)fPUvQpy%RY%(`)E6V!~rm0Xn!q{a2q77GEHT`A)pq(%@H9&j)w&i+wp^0cBp2 z#Zi^ciaJYPLzQ^FBj4=h%a{FTtBh16eB&qkg^+tOq?M)l0Y{J#9In_p?XPVJ0~MiD zpfrcr^GqzKMl$ytQ6JJD5T_UhHT)xZ6EXsTi6H@0cqjJSP1Yj!$}0$2z@L_md7t4L z7WfM#$k{Z{B(x(n=o%aWp#vZsD^gVOJ2Ml9ZO1j)9qNG*7uNyDc^!et@C#rBsyy#5 zy)Q_M0Fv=EI)|92)nwiGI3FjWePZ{!d+F!Mybc(HpdeR9X^<@BoT7Zp2wFng%JrJx;OLJj76VPOm09FPX?RLpCL zR&zhNUrrZ7+Y6f1)MfY;pcEok4;w7~z|z_mk^$?r=@Ue2*UV@3CZD_E8l}yEn}fn) zF2~W7zL%SFMvHNa66EjQQnG|By7&8by=ovDD#M@J3qxtY&c-B3%S7eiaB zLoo|)iL**Gh3>m2$MF~meh*TPnoFgaFM6uPCKD?N-bYcIy=o>RPn8f?aoR0Z&nB7t zm0aJpA4?g))7~e&vb82SFsGW~d$5_=wJzwd|BsUG#xv&)RHB4rhLNcX!~<*Vg1`Ly$F~Y zkNc|~iqeaJE3xduo+m3Qo+g?%LA#kqVU`{`31!~|?It`IX!Uho+}Xa@qM@V_c;8J? zYt`Fs`4ZaJxVG@Acb)$|M_09Az#`kphAZBaA}xGn%Tg~?g_`Cu$hTDHBL$cnjL|sr z7CZ-l+Z2+R5M+a{eVbn%79s##B5&SfNQ2zPKjv{OPvNklte_|ZUU1WxjJz)q`zX{$ z15&qtz#I>?Bi)LQKzHZ(`yfm)vMTsSOfRlr$17tPeh1G*!k%N?)ADip#q}{07)b=- zH{=7Px=y$Z;Zu;2Z}Y1t0j@pWKhnZ{8JCd)kNT?T(!tT zDYBhu_YHx9S0QL%^aS{Lkmbq;ae7iKc3wIV78~YGzDYkmgun@)fhT~hBC8(qjGUuG zBL2FVq$mBtnzmydS75XoEIxEE5P$~aH5PIzm0DQ%Q8TvH)T|XTKXKotAMtbuuMSrO zZUf_HA(#ePf`8GZwxZ_2Q^L#nHlHcfSxznqzqwrTa(I@X&55}fk-mdDtQWXMBmW7n z{x#5q)Nj0>4WVwGv&x31*LHybQ1jq&m-rBhBa~S1x=tv!w_Ub?N~;gTzLV5TEQcwD z;imGDvIP-Rb#<{?#LKIcZkL~sI&%=tO|K6J65^rt2~pxvM8SADDH{fN3+O^~TW{>|BHNEFG{i2P{V z;kWXn`LnavgjR^Ix{|=foL}J5)~`UCL%tLb&yXl_5mHXG(9ghltD4sH&C1=Oz>>ou zRNkxes1mAG$v6ooR<|!$S*ch2Xg+%MT#~<7Wo`6~V}$^aG=?2El6%a&`p9xlGZL8H z5f+B~_QmFho~O(HOEQ!7cT*T6o%aRG}soAscZDmxseJNYfd$FQiTJohq&D`LDvU%^I^l|vp z2cavfns4dH@{4m5bzWJrF+%$u2x7@gYI1JO9OM_Z+yLGehw>d}2=K+a{X`Fd{LZNG zxIYAGL$*S3OfajEwI`Js5YoWPRA|5o5+7idA>fJ01_xxp1$Zl)_$us zWJx&Rpg2rMKkN&T=9OEWp1pKx?$Hr(F>uB>bu)&F`Y(% zGaK}uHg#<`sCJ=K&JBab?JOeo7>m=N3Y#hJ55LLt^m;uc{SaZ+npV|}mJ9qYk|nUc z=t=uEfsE8st7An(^METmP67dcg>|Uf3S(x~ z*|p)#?gB|h;We8a1pwLEqYZqiO&|4%woH3+PXX0ta87MIVKeQ1t#+WxY;iICm}qc@ zNv6bled)K_@|Sxb8Jg6tfVIy}L3LUU{>k*nS?Wn!dzwXc-y2qz6yRgioDL@5*TpY1 zHsL*O_vs2g%c|5+JLyKuDKQg%VLd_mkEQsO9Rq3Ls@cgtA!?Uhr^J?Y3(Bc#o&W%A#YLj$8m= zvTESFB#TSp>lcD|sp+Xd8!ON)hmATSH6FX$i8!49!B!dTlHV5L4s6|RGwu6WP3!rv z7%uvQf^$lQQPhaz&zCVD`dlW{`=kyZ`#pV$tEik+JPVdglRw$EXKRXq&E8V-3W5#h zXlgF`Z(0jlMSp`MQZ^29D?=!o=Ia7yMX_M`JwKo^N8)u)!Njt9B#)dG%&v@Hwl_vl4w~k=b;qkB(Vo=>E|wk#?Vj?}iL>qpGG`(d7QC-* z{>naYOH1G!7}bgF`*c+v4$(3W{yzL4^Yqz1}=?oRm>&VIb-t(pZk$UX=|0VU= z5;8Ukx>ZDgSj5i()u^bn1mOVY#7{!^l|z9Y9X?YT-92Bw<~+AQSh;!#)=2JJmH7+Z z%@O`^QO*Mam?Qf?>^Vis*rq! zPl99dWOl3z`SaAP(FZfzK{fFC8Lcw~>rul#)^)mk6az0WMZgFBnuMaZm>ugS_ zgALtwyCH@As$)3urLeia8(;g1x#tKKW6zIqnt1#gnmt`RmBL|nJ-U1nU?FQxL&e)d z*DoQQNoRVjZH#GT&KLS)>7?{VbMkn9UBY-Y!5JE)N%!?@RC#7NpUx>S2koEJOD9w4 zum+5Y-lQi{)?f=GG;c@qCx=^OAt~!CLq!_ae-`ea1nUkPOsR;|z%2l1UhsU-d9)EY zj1@bjnVR!OkaN7YIM5GfWFvqD^t|A{BSr!xLB~OF0C3)POlM@;e`^6eULsS&=Ysp& zn5Y^Fsu29Z388PV_l($AP=H2MFE~IR0Ln!e@@DF<+n1JduC4l@XqX_>bylyN*Ib!SQa$ZS*7hQ0g0bj&NM zclnA~BXUQ(J{JfCMp;SWR?tMF;p#7owUy$;1>s={NoI#19jRI5%MUMOR0}#c+1|aY zNNQ-~F!C08(B1$`j11ie)YGvRGx4%K{-TtbOC%R~9dh<{{9Xo0GW=|R^75E6sU+(? z!gp{U<@v{6`i0vccODrYYmba8!jEC}Ix5 zl=`Gpk(#m$L;ec7yu5Yn?LU1~6lDU=3QAf*|5iP~dP98QQ&r)ufJ(s~N7chlVis?!v@9I?j1aj0sA;kMXTSjU7M7zEQ8u0U%3R?A(-R%tV8#=M6N)Tf zpD%B+tiK=zYR@kado~n3wn61~cA=^eR3f-V8*NiG00*uDASFVKv}x~Wp{n3T6kDgr zt(YP%o{6>;{#WbgT-9D@lm0J5zGCa~TkUAbgz2k%rg&#NFJ^-G8TZ@ql@D=qKe{yZ zm>&aXT0jR$gHztSXoQYk_X4jmGq(3}Qzc31VXj%APSd(Y*hJ$C>^I`RTIlxX(f>a} zkU_>>tJ-8jO&zMTmQJpga)M6MPbi(c2y5^ zt!yRt(${qYDR(pDE*hi*>F0Sf1ar082>(+)@%N;A^&R?mdLF|IrRj7JW78hC9uL*E zSja=CTwWo1?``37@`^$%aN2;>p?v-a5%@CFgcZ6{m#kG8>M!w(MAVk zNsL{N%V6OSh?N%L=1*hoO3R5vtqY8@rIigeB^1?G2r4!hJ|fN6y!zAj7MU*{y3od! z@N16*Es2M~?u#4yl3h?fGhSj{ow|4}*WmcDN{HRx3I{n-P(o83H}xau|J|xZY*^(S z%ebTP}Koj(yxZVGKeEc7lOZo?G*GzO#j?<3f8pknlbZ#p(4EF zXEh%mTS&anh*0%G13@Zaf}3)}+Uxd_(5G=8Js8xVZ&jPm&M|Z*h#100ML-Al0Ai$q zYeWJX$NjWw0Fe_}rG|#|T&(h;zaf#pj_aZc=)W3o+~})EGW2su^(lhLdj;hBHMZ!YjPqc!SmCeCt{%j?2J;1PuKrclu(GN^`OsW z+?I9J=g>VpJSiOa;$M_>(EgJrw)#g5`2X8=FS5vpa{@0LRH?V%H~Bo*a(q%Sz=H+p zh+it$%@{=_k7T)8*1M7nefi-dow$h301o0i1a+JbKYqemsxZw*RI|kN1cHv1XzWw` z+eKyNAd0#va3lWS1~l9LBP}l9LgTlEkc;$gW(jscKx3iSst67;AE>?vw;iz&*4EDq zf&UqV$BOJP34aN~jfLTUwZm5s5EvlL!(#$(#NhJ)+8|WchUd^Ny#WDa_#uE2jNBK5 z4Dm4*H`(Mcf;vN#z}Qp>-(a}HUrE@YGlo&)a)W6q^p)+PZN0Cs2T&oZ5OgcFkMLm^ zrcQ&ociVbfw(57Z-?F2(Rim57jdC3a?>;b?OPVJoc?)StD)_SEyhrh7RROq)E${&a zlvjz9Kg|bW1c0}9cg;;ra|;U*-KJP!=ROL%Zq2Z+=8cVwot>T7*jQ3ths9R!^UIXi z!LhRA8|E83!kZE4#F~Y%1=|~<&|e&;jL5dHx>i%sbVAvyUEP(dBVWIvf73wnB%I6X zIR0@(i$RT+G*j{NK*6>_LSZv1|K|GbPdjY zwsd~h{-_=)Z)r34u)bBmzsfsoeMv${gCyKf3T=gA+yB6m;c#*}l9cF8R0a7p7u~Eqt;8N@j{`QS$u7TuY=raN?;c1tKir(vsWG`Hd}N7*11eu z1M6_Amn8>$`OB6^Z?PkFhZs?$v) z`$yU<9HS*$L)Ud$xfkOcmK$1{At@e0$+veIpy7nd;(2bfcC*0}<$t8nvSEgS0rpds zo|B=b_F}z>#rUZ?KT(Gf_SL+-6EC5}5+Pmznmn5mFSmhDIPVR^N=x^5#&VG{Nj6qi zlr`*K_{hRPypEF=5)y)OjUiIOh-9g#jA8Z6N*O}z7m=v1Hr_I6*=_FFXx^yXt-nSh zD&~0mrs2%F!F*h&&YnGwIYHI>S9(@CX=Pf6VeHe75ES8=$UU3OR+5zZjJaBF$dKUe zbwa<2z?YZU6b8Pt1EosmG}Dqo<^gNzzNA_i^cWc|#$|lorV|!5xbV6$``zoGu6OPe z2OAQ880y;0yZ$b#T#9&Yw-Cr%A`E;ycr;$GKKW$ddI5j0M9fQhGKIVq+f5-*Z~i2{ zdp?LeL{YKm3h}VIHY5*as5x>JZConqEjz7rGa;zHIu1NUa=SiWl18Cg&&1}Tz(O`o zs=m&A*+hTLew@jZ zl2^k>Dd<0LB5f2zv)SL>{d^#*G&T~9^S%T-S6c2*A7eA|XIVp{l|Sq?rRxs819|*1 zKWrrFs(+RpzF|l6$4@K>7^+ytI>psmds9KS(m*7TU(xUenLU&O=N0TN3}43S+am6O zdvG~xA-8|nm><-h3+lOW&j}?JlWQwf0ZTQ>p|^_KN}#;jP| zngQ~Tfr0C=Twl6Ye6+OexM~l^IZOIkUHyaKwE%|Bf)hf)-PgH-!%`N`A zn&)D^m0O;AtBK|W^ zN%Ax4!&J(}l(g79hJgDBqnK3}VizPJbi=5Nx}Q;U@y z9nUbpA_7*!R?86z@k-F&HHCd8mcyyh4=Ze{h`VfA63NB7;7QHhBRZjujWC?3Zw~GH z9qbt#RRcQUE`1#V0Zt857$b#sD0xfnJpXoYK|frz)$;F(=OJ;Q*WokO&c&x8z-$K1AQu#~R3`oS8 z2nIJ@2H+Kz$%4eh{>q%{D;S61(Nt-2wiGBZFz{axMN%KCf9H(z{wZRrP`p<%nSv{rpWvKXCe@s1Lax84{?UV1m-T|>+xE*aw{(LcjoW{Q)} zQ!im~J5s0O@|l3<5*2O!pr2E<&d|@uOij#)qpkW`!)AV_%8|%A^RbiN`Lx)qL^d32 zM7jNx69ShlovT@0R*`Z&RTxj&u(O<71JBCA#UK$^S!U<^cOKpfm8ZgAImGb(3Ipta zF%e6$9KR3ja=`hl2iv%z@Bo(4Qmsn!!6Xv13_;)+5Psh*^eSopMcA4@vAmzf0*eI+zP*31MFx+ zh^k7Pk>Y_@#@0LDZKxXUnu)lGbRbr7?9cxQ=f4t~Vgi$67-~Q$g#`P<Zd_g4pV zW;;RueS|(vx7?JNJ!TM&^^<=SK?bJ&AMSqa9M#Q4t9#~n)}v+%)4=4(8pbD)Tee+i zY6;lcKzZee-8`te=>)2Hb=QW{DsdfWi@gY(0o*vqOyxI`9w5vhiO`IHm*n3WbAzLz zhH@mhuRp@*z$0L-wHU%^|5>wFZ`aU6FiOXZj1cbksNIDY$-n=p`qF4uo*kx@o{(`k zuftGzl-{uAvT|GWAwRoU9QNMWGv5&oF|8)e+ThJ9L?>hixMDG1W2{*CvLb2p2=EA4 z1!5{b67t_U!p0z;3e!~|UbA1&$x_EjTMVc1|2;<78~)kOJ3GrY5#^kCIzYw9zkaWEe)RLJ%6C*;JQp~BSHKho(Jf6ix#!k=rmjg}N=yZ~KOB-32Yz-(9k9-5CY(3-_btuMBtH1Z z=<6QB@<$c^_@$t*5$;O+`3>>w9-7TLm6holsZ^xOrOG7dvt9PGcJH}&1M-WPj9M%o z%#%-jgG-*VYnuB z=~^MA^{T39b4~gFHgMYXZeQ{5K9d|8JpVl7W+Qe2&cN}{^!C_Hhz|fe6}$szJ;*DI z-{~a^B1zlgq$(9agaLd()CU1gV%7bcJfJqf77#;oBKoeY*`bk>7tjD_78C-x4&fFD z(YAFa|I{=9hlu$@$c%&BR>;_{HU(Z#p*X<>AYQn()FQ)h36p@fAR351g!}~YJp)Ij zSQaH4{;ZY+I!s;vM}xiZ)3dtR84IO&@7>$vwyP>0v(}cSAh*SHR}{_0&Nw*t>k~G3 z5D%^?|G4}8$NBR?)`Mr1R)Q;^>iQ} z@fzVhr6t^GUR&XH`c{>7wqOSq_XXQBfsV&|yWXuo=W0BjtH5F86ktruxxZy5x-;G4 zOiqi=8N~BuZ!90``Rq$~IV6?9h-lwZIXl(!6%oi+CkSNSJDc9ua-hd(lhpvTbKCOH z#kd|$cMl%_a$^WNZgUyFi4GZ2N3%9{naw^AyQ0~$Eh)p^*5S0k8s_J8n5cGgkhtP> zxjg1Ew(^%Et>EEgUfa2SOf_>=4+$|v&kSGk^>D4O!FkOqxNfa~6TaVjmAswad2Tc5 zWarz!Gi<7ds`Vq$ecR&XTkdIG%2`5ERN|}>>6}GbOffya^XjUtCo=NKm#%e@8WRlTWVC&|3n*jnNwef>?t{8}F66*mE`D5J0vb=<6;o)eLk`p?PW-$fsG z&y{2d;IeU;A{7qxEF4Q#Kaht3p{#jrsHFt~O)2>cx! z1{EVyIr;(O0eD|bXa&v<{tBKsXaQoDC9+bSywZdWf%8SrbO4xwaKReR29(GmzzeV% z7|RLSCovbgC06rd>)WZziX&F6PGn(-j>A_#yXoeCa$5$Ex7-Z<#{09 z={W6qRyN%HMd~^R$(-|>&yp^to)&b@ZuRW2c=OATG9ksSC_TdWTm^OIB=wF$9;Job zS}pZyedT^F8l6nO`<)}#Vg{>376kA2c_XR!jxnFH z3Xv+5zI+^@VZs*pta^5-iij$(IYNiX z#k4htnd!tH)h0=8SJeMZojLk|&&3ks-ve*_82&ujk0t-XY7|P(52q%0EwKD1rTfS$ zt*j(z1ZLlObMl@2rZowFo3uA>>R=GbenC}AIrvgqehm=VFVQjn@77K2 zrelGh;BesNz;6L?W)QK@yEai!9asSp{EN8|d<2n#V68B$VETdN8JR9Y8M_j%o>71u zHGSZMR!oVC#0ox-z~P~a?9m6lM((@Z??KS$V??Q%Pe-CosUSpbMSE3EYK3KVcas1P z90uleI@&yZAqIm$MffcyPK698?{C_44@(;{#KO-eItu((qy4>VB6RERc3~hyJ1Pkg zpFfKaCNam5@f+S2Xn*;TLGD{?zqz`)WopUHj(g@lpl_|KsjsenvqVa$7*@~`Lr<)6 zC}K0aSUwBguCw8kxre1a+U$bS9n)eM^KljLDWCZ-$${bcC?iEfyzlSZE(6O%AiPkA(==J=yjD$S_x-NTo|-}bOnN^`y61^ykUA8 zXQCqlMPh1zE_<#aCaHMbT@{Xz>mS%N$p@3@=>M5Y-w#+KDF*<8ocUZ2O0rHs*+`Gi z#u@yTy+=-Tn*l}W(+0G2X<72Mu`B0VSLNYi zjkT5+Mf{Z6#9X)zV`!ke-XNfiHt$#OUOtg&pQP`X`oht2CRL=#d3=*mXKDT{g6E8T z#KDow`DAN~j2W;ZPHuswrHx(GP#m>*N@__tvCwWC_MO(v0fT(Hfj=+TdC&!rjRag+ zM@CCEiq^(PHuE*C>Jl9{?c;Ep4+k*$;a4WFP@WGdc}PcFJ1M2!pJ5V^m&SVqu$9!0 z#h>rf7=~-XKlt{O7GRJr)=fbLS$kd8m+b{T*%arR9Hv-*gs9B>l_Hs1^DpB?Y7#W= zk1Bbpsi7MhZ7uhzg|VF4K2{GN-HssSCnwD{h8x9)yK{M7kv*KGAwiC)pSn*m3+tNv zpVs8276ltf@-Q*I);Jp(0^WZ7nPKpJsncm@jy-r~U_Vi5C{kZY=a)VfF5D7TG=$Ax zZ+Dg+`RkbQ!DqU_8~+h@JsguH#YGyrIfrCoDGLi;%84;hUYIETJr4HEyMUDvRSjDX zA>{oEXWO_17~_WAO0{v5VibM{;Q*M1Uk7`HNYsGjsH*%Pui*J09X=|3qjKa1vK>-E z3U6zFsPFz&^IDe__i#c~Agh~9Nx!eQKhSqF&wm1F!>_&!@Ur>eg1x)1|C&m$Z>n?u zL&L#Mf;sq;Nsn9Rd*6Me2~nttmr#Z>-G&RJTe2lg*&Nr}Obv{R7tLBR5(DMybCdg= z-GLvUrxj_Q>D@kimJ(1#5?|(X@b8?6E(C}M@OD?;Uc2rq)=|y3M8gvhn*~ZVA*Rw>ILMP=yZ+KlC4oXEtd@ts!giZ{Q z~&n9_3l+%~q}3ddhF?LNEb$uC+AJj`{B$RFOVbBBE0 zpUunH`*kNWF-)@`nf@V#tVv>|sdRgqzinlovf}p%$<+^773;%6;ly*~MB=R3yZgZ~ ztC&Y2i_7%yWN)qoBu6ISvc8v*nz41m6yy82WD|HAl1Urr@>b|G`{j>nN1d*DKQYlH zqoKM}hmE8vi$1O7jJ_Mi%g{>0yG!hTGf{zW{Zkx@2JK5+GBuaI;c7<~KV~RTyK)@0 zECcoe89txy*)U=S+^$lpxMYV*Rc5Pg2Xrj@99?jG9vS-Can*YZ9~~-_15Ng3MiIz_ zM_U~d#K&A@G3{nOXsn8NQm5{`ZHGbzA9w2datigYc6YOoCrEjV4dg!W*pR>K>ggq0 zB%gR-9KrIJxPD^!BB+^Ke?}@${=>-M7WYi;ID}Hi`Bu|Q1*1fFalrBSxvGYPuH;3& zpl|YrkL_NT)(7|XQiRA~Lgj6aG~GdKzw0JW zj5>6QRvc^21jh|z=r^B_D0Q;$B1+K1u-)$FMFXLEQtlSDSLJ4%aTbg7PHxxB7Y%#4 zY4H@ktok!$bs>q&5rr6lgE%y3V*Zw=mSc0vQaAv=admN1Zv;ALkr{0yWccz%?$c7( zkkA4DeBKQyDK@##tI|mn09N;KxisXCdVkzYSe7BM-xE#?@1IUuyJ8SMbGoOkQ!}0= zNNTbbG-BT=dDPRVm(tvqJ}P#VM~FcMagfxnM8n4Y(D+O zmQuj;)U~?prgjGgv;W)7L}|UWsL zF#{^39n>mF#@VHw)8lFP3)y2#IOhM>0?6(1!boU-`^g%A^`zCR=h5un3#{82g`xXZ!GAasF z_%$q;(d6gvk0~6yN6I{5HIUO3FDQTtu>HIl6`VOH$_iTLs z=}#)$`Wb6jboH*N57Bnj^Bu$q_#*P~nDJyOdbE2brCd@ihGMO0T0*iikdwnmTwW+E z7so&>hSzqy(AC9qaj?;Y`t@!^!~HfV+6c3hy!PYGm=l56)uvH^qwq?vV*2e23_T&i zo1Wf=nJQusUOrS|H>35iW@!5x@rm>GG1T*NcDg0~@lH6r^_!0$)efJ#_R@){;Iq2~ z8gLtvBWDnN@}lCNb60D%f)Z80pFKzAqTXR2UJYQ2&EH@!QMEfCXpJoNwsD$t#`dTb zT-tBgii(U*(~@cHjk#=o-rm8yHYdaSnC+WPxqIeWIK5REEo)>imKaa3!J)hra(M3^ zH)_5g8F%>nrO|Em{Y}~`c?uO53gAZ~*oz1q^iN$D{8{fRDF~f!*!xsAGm}5V)lh|) zH@90!ZCi}BR_5z$9a-oKFNl?~m-WKaPIPh4Iq5xGGcc+;Mwuz1IGfJZP1ib1|hoc$HF&U5R z@n&VdkJuf%r}=DwW07BkW~mQE?)TkUKGcO)?)34yxIusDlN0sjIgd!aRfb|d`MX4l zQSJWBQ)<&n9jYM{xESm-y2T-p*C7#xBJ@~H7=jNU>eSyG%j?0XEoosmJq>*LFT=wGeT zQN*QRz7EshF)N&M84OHTdp01ca_jsQDo{)(h<+HPPsC|oL%r*@4wwlbn|h;jwpm}Ui82=Pj*d1CyY z96OT9q>|5AfAfyngL-pO7$8Cs0-{0vU@Ri6N%5vWKnj2uK;XqpzIdA=N})qgvhXnf zwK9m*4GlANA2m1EUptZ}%69)O(`Mqi5Q9Z&LtR{5IWKTZ%xam;bUpvwq$D=$(ir87zC9MMeq#rPSpzx?&a5`STJ*Lq`+CmIQVisFhnhI}gyF zWSWINHIW#Yd$ZNipEePABw<862NN=qx2DUN8+5cSsot9kHQLhL9Gi-5tRMB*pLDZ^ z(Gb{kjZ=%(pnPd^@=oZvdUiwZ`2l3T=ohW>Qapd696AZnyE2$r_;slqKV6?rI|+XdG;(knO?uZy&@XKsJstI{0D-R7@*7mK-- z?Eo(1Q|$U7>~Kx^>u3jOH$A>=d#RBx>$qeRZsWODN!7RmohNJY6lwn4)@+Q45{BR2 zQCsQ%u{9ZYw-a~v>no`)eB%O23yc-?ehMaNn@4oyH^Lueid^)kGwy&J6^S=+ygNpY z68N4LkQPCrNae!Qpvox}9(4~MHV~>!SuinCMFOvfk^2pCRLCzF;!Kcv@n4sglMBXF zxSDs?XtS%Ql6L5W0i8kD~QRkan_%ndgH+3K z=Dw;&Z%$VFC~;t=H(vzkRuEkESQw)R)Ncsh4Lf9GY2#ffTaIrL_yZnAJNi#}u~-1c?v|4&!wmy(g8e#! zWf-KZVT5C_>zG$WH;LW-)~?s)`R)bXS`4e+-QZDQtE#JLp1x@P-CZYqmnCz?#7x_1 zQKmxoi z9{YcGlqb!b(MKpJ8W28ko3i*TY5Ox9>;-H-y+wBHruI zG@^@vqj^nAPi@D)75B57FMX935BB6#E^#w{ zwedA;YSMmHFZq09yGlKDkPx)OVI9+DHkT1)6ESxo6dEL(`w=0!+$L4uKjnTe5#41y zTI7;?@4n^Jr3)?R+-YTN6o1Oj$+;!>%Z^MMsQ}iy-E;9-p-oA*9?L@^(uioO)}A(@ z%W3G?P%aCvtEJWCxlwA_ix{D=&t(ovnwpE}^dY#r^sBsRKC64CFD4SJYH(fS^t1=W z7%&A%-(&L^P#aeQ0;QaYeT}+jfROx9$m? zWpnW-MJ%1NY1M;Fs7yf5IA(yJ8;*k!y4-khsns&{|t=^(nHZiwqBb(Q(^= zX;)4`bsVR&+Y21D9yT*g4udKb%u?<%1wBX=d`<_Z0f>de`y6HKVFbIrTF3^>TeMr_ za?cTr8*sWX(&w8=9*)fgFBErv0q_W(aOP*PNjA#uUP2m?Ee$?Y_57mTfW?RX8pXDI z-jm(!gNe!|9B zPm4S-3}O#y34_NB3IJoxYUmRZgRw`E`3f3NQAR0=(8`ch(Rs1x<*8uau`8HA@w?;h zV-IK)-82JkmQYe6xk|D737v@=*Y-HN#2=I8Yn!Wdq zy)Go66L9a##fX0cp{f%90Irt!2?i&G$>zU63(;q3I8(rV-zT_rI1u;jllu}|yKTIq zJvedc+5c?OpkZng-Dgd7uEAw3_vCGAyUpS9a|<+HgtCV9uI`&&yKrR25w@;}&TCC- ze!l}mV5Pt?&~II5bxyyI2gaY8Q=5=@>+&{I0a5JT>>DHUTRtq|RF9XGn0<^DBD6=7 z{ZPe2)U*c2%V5KMrRT&_<&vnIp8Sv(TA^&at9aeo9TpB=S7jF|82GPLoPRCblb|Xs zY9{f@Dt^B>DK=Gy7nP&#_~Opuid&Lr8=(3z$0s4R@~xp@q!5F^P_x7v-sQ(4G-{rt z2)+9zy4;vx0B1W*#A|hM(;u0n^dV)pq5^$F^MNHkG|pW3sAbtf=~|&x!^VpaJ+A1w zM`B67s~!`^>LNE4>OS z=8}we+U&K(Wk4$N)3I~0tNpoh*o8*DwZ$~9#u-r&ro3&DXEnC?;mp(^kDv8Z)6^LT z;)j9)3#N-`OYe51(XBy-lO%OpZs4Nw&^Sa0AW1ARW|G%ji`r~)WC|XqR_?ZEVZ(Bu9anmY;u%XLd*DYAww8{HZeB<8->mw z%e4u+=0dmQZ$*~!bIG0R>~#nIS8w(kC|9R`7Ek;P7{u1&O%)<3E^Q$5_f_5ePfob+ zJ{!q^&ET>g#9++ggu@3XkFepBj}E4WP@*6`fw16Wf<(ddKJF~=Jz~Bqs7^(VEQ)X# zC!|&Avsf5#fE{QU&OwX~2OtBEULp}B*Fa}rLEil!`7A*klr)4S(48kelYjXUd_4r- zn9%8kHWoFk$bWx|AN>Oy+RFG~ZVLJb_(L!$#Kage3%*{{&m$c~e&U#~75?wJInzxT zID?9$$1;e8k-ZAWkLdDF9o}j0v8oX&cwnph{5D}MP$8vd-y`HmxKm9?faCdGlwza$ zThBV-uvMS0T2K==vQ$P&G7U2tcxt}>>R@_!u{0t~%3)y2wI4S;AK9i`G+f0iU=%v| zjkc=Vc6Rowf*`C)!#cHcE9P+B;??S@g8jayTa5*%Xo&RwwlTbE^wf^TtN91r%ck4N zIF1YF9P4vK;dJ@nNc;6WMH&r3!8hIA;6tr{c6mMXrZMnfiA}7c7Lj)PB6sa(R443{^^f>{GFwu$}o-{bt?p) zZ5x(as8*!(MejnWzVrU;Y^ME~6AE?cPr-a?lF^4MgI-^6x*`z0?5mSCtciK=-t+JV zhXTRG9F$-F{g&q@BP=E|D_e{6uJiM*>>*$u>kWP5GVeJF^2M@8z3&(c7ZiXu2EidM zt9tLmB67df*eth>wq-xv3*>%4yY=O^ME&yplPkqpXAa6$KH&<V3{`#{Ny2z(7JYG>#``*2f+`jd{Dg)Xli_hTq(uX?Dz#YCeL; zIq)8QA|r*$IqV}TB4SP2@w>}h2%Sx)B6Yl5(n-sfa;h^RTI{SR4Yofm2OBZc;V3b& zsA;gXWAnG^>EA6depeUl3^I8@?49rZiIm8VfIOrsNxV;n-0H2Vx2tD)tY@j;i_pPV z&tQig>pdNtD<&C&+|XjV*f{Qy9gwsi45>>R0t9`7#p1GGvapQ9!FpF|5H7K zWlHhh!W|a}EB&_;n<8=*z9;*M1%=2C;Kg9oWJ+kVBZ z9__v|S=EHJpXEzF?kDGxddJydslCxNQPv5Pp4;Vg!sB#~sZSNkP%`X8&{1u?SELOL zDe$>D@`TnKj=XylO@vcrFxc5p)Z$M9D?GR>v$s1!z~(d**NE<`oP_RbbG2nL&;tjT zly~;z1yAO|YqgmEQceGZjET~4F8(fia%cv+P2-0xo&c%2rz*yV;Eu&ry#7qPW#KR) zPU@9jA)Q@;8SPG^gh zg~Z_4xL>iNd4;k|vrDSZN~3TYTggxJhcOdoXn%YxzFYi>Wp_j*Z>pzZrapXbDA9gw zgtfdiRw3hY6}GLKl4nc^F#c$;?s#?kU+R)8>EDYE&>GBIoTeMrzUJZSJ0CeUt^4?j z=PmJT4ZPgzW_(pGyMy}A42kYVh84bz zof#@`aU)*X+$e&}R>7L;ZoOIatZ5q#rzZAz5+ea2gH~?A&-F)GD=BQ|vI29HL)9&G z?JcB%Qb7`p)u6=Iry0s^Am3**ozIPA!YgQ){S4 zuW`ov;)~;s&xe>S9Tjx58?k0~3k?Q3E;z!%m$}X|&~5L_olK^Z>xs+JXCxd%P_&!= zNoaf3HTI3Lqp}+lC-2zfd||#v%V%p$_~8BL7Y&U2&5$g)rG@m`cW}?O_v(_0_^_bK zCZ;%#8?}>)9i&-^mapn?Q7>b-BSLu5&`?l=oe22cLGKTm0jD$Z>@v-J`X=&i#ZNVL zvw3|MAo{$Ik&2Nzs=s4@=fVtmrX9|zdNFKQ4i1?(U^Ls;%PWpG|DMMkm@N8)tVm_P zePV)V%Z0(O{A_hI9O(oVksCse!LLQ%MaMv`8cNkm(%UllXi&4l$?BkkK@y4dK3kcO zPE^_JNJQkJ>ltrK-7k*WURObSv2AlgsrTer$>agUfr|z z-}3Zx$4o}5)1g7m3Sj1l&YO1X>|W#Q_kA2Fpya%prC&2>XJ2w)LV?%B zRjcSM4+~5i+dM@Y%e*Y#vXhHJMY7Y?+N7Ro7ZVf`g$K4cZn(J#RS%xO|1Ise%&7h% zYDwZ~^KgHoHXPe`pbnyD7jd$xEgAm&@m)GnDkmKq>8YAv(&mj7?Z1rC)fHDoe9~!UcYEWF{Ykz9c`KME$ujA-!eYQd%1ZV80h_P@)HJUXqcgxJ%Cu!K`L~UMb zD$zY_HhA-qYz!F^JU7Cek3xw`(9_3|rt&D-cb`Qjq#8-Phf{eIQAb6l!jsgkgK7FVJQZAcgdNH&?Jw%{jbc>G|Lw zm=~;499wTa8e6_66pt0xeMx9}IKu{W+#TNQ)6o_b9nX{@|9MSvK$f>W$Iv|Nv0CPY zbZpw{Ycp!yV4DkMa4U=6tzxpGKs8rp4YdoyN;t>Dk#S*5;@|<)u~=_c#|VC3`L=@4 zf~(x>0iQnU=X9SQ;`_Vhy7Ac%PrfI=MjKAKqwPi4isIeV+-Q}-^zJ18!bnjzXEeC8 zIMj)ma~Nf1$g0GLR)o5z(e(Nkxt88Cy%!P3X&JPWU?o2dSzq7Ig05c_NY*PsI}Kjr zy(%fTJ7?}3bUHs|E%wh3{Om_VmmH{5G)BD|tHv?hF+9FFJszVCglQm>lB_99)l$5q zw7weyXOzv@oYRrQiwXs-NR1BQBs4WY)m88HX|>zb9cQq{nY25oC)ahB6c6nBJ+cv9 zdlZwMo@QIhFz3d%nf^3bt_iZhIX^p|PLplDCysRIyDwLrGy4h;HNwNssU?_R7@$6Y zCRv#KfH9qkyH{Nq-bPy9q0$eaHeGGMQaBx>7C6b%aVW~3DNe_7G?LMjE)2{Fw^-6& zU4NsXf4V0AZD9PoB-OLIae)dWoa7hW;8QSgM;6vYK8enB84Lv#YzyzLHIQMm>TA_8 zGkq=8a}n89zC|GjCMEIzLa9^!V)3N?L|ZyTTtZlh}Yx(p>?`-%aw>i zQrmQM-?hRJXL11}vDO;5(bfuNuwKoc+|}teZroPW^GWh@W{H?;MNSGl9#@)#`W-G; zG0x%aG`w_dr_JO>%2{oQPhKtx_x zi7?PtG1-cs;Z1{_1`n$KY577cVa?@|q7o=}H7Nib3_hZPXd1WGqhm^a%-7?cSdIT z`&L)@Zt9&m|EJ?Rebq0;d-kGRy9xws2o0A^2*$&|W(86n&N9;@*&thwttRda@(a;S zdoK_Q_R8Kbc4e=u^!s(D=xx-SjY)2-f*`|v5vVjA#04aMC1M;e_Ayg^r3fDH$Mbr% z)ZF&Rdx{!geej$*b!`UWmJgV1t=W66UV)vldY|5t7mm}H42Z%{}4Gjws{!UiArC9$Mt%YZnt6hz4r4W0xWNyj*fpAi(ui?r_)7!D;ZK?D(OD*QsrhIi5)Bh*^#O z=z53$MyOYx3TJF_H&i+b0j1+qjgokVnn9w8r zmi3MK@29<_)vql&4owN=&XpfxcohQuFBgENwx-zVVDpQiU#I+)&h)x^^8*V#l}pyB zSXhvx5}w=iKmS*57RdD9dhQ2nS)iW_csyN=@mVi7OpF`+eUnvDs zV-rI&@j_%93$(`#F85?M{O4i8Pj3;y#T@*ZJs&QQRMH^xZITmG5`CSe^c6@EV9Ik&lD=;A0F2ThhRsONfoJ-6Q_@%AtF9P zpUwai^lK`IlSRkQpKpkXi5U~W1N6nEdTTmLJ3G51R!eo@7pSSJKa!FH_Oq@}w z5|Wc)e-9V}mS^732NPZ046kHvV!#sJ6KHS)`kQv?+98wKqG%vQ|Bnm|7wdogl36Wb zWNTVG5*Q4A`1yhFA0A4znw6=s*(?`pGWvUajR7~LQ5#CiMnEio}K z;HQ45DF%%B8IpksPRRICeQtJB0%fQ3Sz{SCDqSOEGKQVQ5`zp|d*)~g8|z1S#9EOz zAP<2Fkv1|O20ww5OETY4aVaJ2up}x>+dur7@(hn0Wk3U7m^{S}KtdNDAdPRN1vsax z7-mPa(rS13PXlMv3l4qbeqVqT<3)+3lN*EMewUi$mp$-JI(~^WArbdO{`nIUtfRIu zKFIKs@%$6Z<@GF2OMD6awdz0VNc?tJT1`b0_x$gLzpk&Go3J-H$Om5*_M+qCSG3rF zgZo?924MMqdg`^&=?{#y^Wn4^@a!=caP1w6r*j6psO^E}k)O=vY;`nS8X^t{BhVL4 zM8fT21N5dm-W-){)Q9)Edlgn_(gFkj)Dwc!+S20ne4`R1l(lvYjQP=A`O8sxJGLc< z%lUdFsp#;?h{XLfd080+2x3OGo+UGKlc z#cePiNs@@CHDgX%^Li?kN$2(Q@(R$_`a2yT1)j>49e};Fzga1e7fF8c#&I`JK5-?lBp9!%O)UzGHDVPKQ}i>3l`B%8o4~w81-{Tp%&YL}BQMKkFAJztaVPM)=Rt zDwi)-q9$Fp4Qba_jEzkjVyLdfBydak&$25N!^=qZxdMUs_z{n3m%Y&U-{io|fl=09 zH{iJy{PP;%8|7$EkI`z2D`36{qkvnfd?6;kmDTCx&Jg%l&*o?<*V59` zkRhFZMF0wc)7|MxzDzo{=(xC=e}uszpkAM!^ezqV1!TM>}{o4I}_DG4=b|FhmfEuf<}EZ5oBJEyM0Bj*y(R{EL7;wc6G^-K**ST zFi_V;#}uUcW6aT%pMD6i_Mp7=!pT2NGp|A!55>^yhduw@W4ArKa1im7bLE(NaO~MZ z4Ui;Eg}+^cZ!bw$$Y8Hcd~h3(KsE?zSXSw;91u=nPWcfskk;Sa6S&psgz%Pi9%c(?>vejlou#)O`TpTM9 z{0r44Ov54R*o!r0(ed$YPRH}WysxylRs(AWG;Qu^V`BrfIU+x2{oT1{emt=sSNkp|kwYA$%hB7Lg;P07&8|=2jUE0Ik2Jf352a=YzcehMQ6+`M;B6p0>Qatk;!hl1#|$QV|#^DmO8aE!zHc+8j>A z1qC(=99m6Pz@RrheJzd09f(EowaN~_p1d7$DFFgZnbzv9bt+q(_s8}2$1{C>eT5Iw zk(f=#b%B)z>g?)5lIO)Cf9(UGF=I}r*#LYG2~J5#VJ8G|M~N4gL2oF9%UfLBgw#~6 zn&;s}CO9;JNCz0)tE%FHQ1^uC&DtY@=8j@VC)y8VBb@7SCso~Pyi~e`Nk}d)?;xxux z?J>D*QD@UbrM^Eu%iAO5;~>`?tw=A&Pj~Z7t!q0h9_0)x1Q(GcUZ3fQ#zX!Ap1_R5 z=d(E8lo_PNr_!ht)oNB2=GG=qjr|H6G6MTxsW(10T-sIJ?b!JTP{pXKiic)=`!<*{ z74>(Z3n#QueYDZ$Vds25Wx$SqD=(m}z^Q;OgT3%U8b_H&tC=olLTh++495vh^-cGQ zG`i8jVK2CIFxl|a=BM)kAA$}#-s{ro!W00r{SDFYFA4H5ysMYl>J0UwF2h86*=hE{ajoxQy~ z0Llag296~%?ZtWme*{QXRt5%4dYwOk73T|!(A(Ds`%OyfWV$dK(BcioqRNv@1dz4e z!DQ|aEx^GDglQN{I|+1d0zwQje9zY*bZrTMZ%UC;F=8FUt5=xAxjFCYY2$~gj#D`7 zF%kXq^Y;qmvQtu1f&UiimXU#B0xci-JeO%hLqpNuJ^{^_a3UJeK|dPu5fXe&p{D$@#fNGkd(OVs(Xj@5O5i5+cK}`UZ7!=PuY}NpT5I6~eMLS? zNPP zfq)9wSLhO!{wCf$oLcpu0{@Ue49i(ZVGb0x;a3cF#b>bA-*ZWm)TkouBEHE6zw+Q36jG!R)ugGI;6i#i-6U$3d$X`i z?y@j}>HA}@^=z3!B#b?;+hRhu?qnQ##Y?+FgiL)esU|1Tz2Z(aX7owfJB7vmJA;9) zeYvsmZ60RIzSh}KqAns<4G9BwCjKjR6T$+QCCM;saH zf>Ji@_cq)2+Wv}_3T^4fnB?~uwVTuA#$g+GW3xK_M?K~}zBm;~3y)$|<@x$k1){0i za}CCOI42NMQ9L&cw=b_ccppI>3kO72kc_Dc3I(^pT7i&!zVLHaWHf;Y=wH#oq$|=5 zyju3<<<~VpBmSQ{(FF`FtRK*Qt;E}Av+?7W5;YLkXJ{uJmtnK#`rM+$t1Feu8Sq*U z5y}FPA}0q&Zhk)Cxfuqit^k<;*cO9>gBP2f%iG!jxCo8U-p?cs1 za&U0q!Rrm!vy104Qc{Y$i(cQ{0A7B89dK75nuYpse}9HqlUOv_;>!8OMdZ*8w3^(T5dqY_}rc<`o)RkGd_cpfGyb!e28xEzu{{iJhFS7>@{Wk%tY zQsq?_`;rwFVJ7}$Yj9T_hd7rwwClOLFfh#jxvAN5U;QP@`{opaGp7{d1)q)8_>wV{Cl;niF#a%@l)K%6 zBFrtygIREZsh;F;)g--oBXp;pE^dxUA@9#IgH_Gh+m+DfJLlJpmTvV=PMKh3;s=go zjbb#yqq9C{tb%<`s{h)X0rprCY098QTv^yf3^OcK9#*((bI}wkJaeAWIkys+hJq!2 zA8_a^<_=HYx8s)v)|CO8m1`qBc?e(?VMyJ)FP;bo%^*UiyH zg(xo)rM=6=M6cg(yy;8o4m--BLFBu)_;4k5}WdicT5exK^z20@@OuHqotlF+N@~Hk9 zdFO6SaxpDYK)|tl+S{{yERgH;YUG_+s*x7_HGZy_2WM`VX-F?I>i5oK5<_DB#oHMTFjNr z*o)SrvfJuET<(O2hntz10YT_(mZ(C5KyNz!{v;njSDcQAF}G2`X&WC8FO}Q1%C{T@ zyzNCK5)zWdF)gIJHTS*53Xe(Y&L5>Tr?J}rWG9Mst4Go;aR*L6{A$=f~97- zWNbQ})nj4cRnd=g8s3)>$Zs-!XkejmqhvjWRKtRPWosn^0Vui$?(jAL)cOU2Cs&`{ zT%yDlCjYkFNTh$bH$k_V4Zf#^-_GXY87q_|9e|l2jH&#;cGl*Nsi%i zI5NaO@xFJng7X7sHdH$M7GR)gA-P)=(Q}IKP`g-)C29&NJbq^lyW{%|i5{m8Gog|& zA+{OL<^ zSlMZ-8K{xzm9&~!R!fiihHmXotbO}U^-dV`MF3OFgF*9%VsMlP^LbfX*Sb&u9>g6a zImtUo%quB{OKMZWu%6X0%Fz#JHLBIHGxr07iJS;D47pc$!{co=m-VG;RPn{Ic(Y!W zNAhJo~|^3OJ1LX9(Ro$;-G%eLuhv++LGhInF37gGR zr0bntYg@jy`nO1TgoeQNzGEs^#6=>*XmR5QsR4gB@5I&v@kfH1r}AuLE>&a_BZJJY z5<<38Q}&U!3D+dL)FC5nPdS29^F*F!(y2-K{n@rt!|a*GP+h{6!?!gPHfU++-G%qb z6I4(9T1LM==bdOxvXYaa(auMmbIlokbALz&JF{N>PS|laC-1nljhiVugzV||55WO? zUe{aaGc5(jyfa?(9C+rKM$0t{+TmndUuae*x^eV})307np)zOl_LYn-T8u7tXxSR# zooK5bQMnsSN%ebwjeUlzE>l-jaYyK@P9i!II^u{Ae&KwTQg00${ws zQ?=M$;AN2fYSwd4hbli#10by-1dG>!rVe)slj?iHzq1l#31;I5YkohDB>i8oK)Vzv zX8;%Vm(0N#^PBw%L*Bp2wwpTu_&(a&nwoDdP~ls1(FhR7ZPPoR{t}`xLbNhGVBg6( z#_%veff%xv=RJG=wD?!Q zG{2K;1f+(cE-OSGg*VxIz0uHOF#Y7M$J34V0RB1W({LI`V|6yTToKpVKK-n0N)XHR z{8c)&4zum=kRWjGTO)1N>yxy%AZDYxgWJF^e(>e58=^UQ7HS+ZKNG&^@kcqDP{#&J zGx_OL0$k<50S6P|PadJJ|)1-IXnzAHC&{m3wBJRJi2_p(6EbcPi<_ z2*QPoqr+E_8LP3Ia>TiCo!f*wK(~a+6AdjS<=9ibyJW0P?6{9s3!;W&p<(c~tVr>b zhsVwEYPxkUMUOJm-80MO!LhG<)O;-IxR=>})+GlHzj%d? zx{ulJbeYjis!;6b2a2GF;JX?g%Z5aL3(V{AQa-Iqds0TF=yrQ z22w>B*_F$&r;APYvjgW-YnpZ3x<*62`{Sf)E^}7-g@@kyMmZ33a{e2vgMTQ_`343l9rE~olQweO33lF3dWpR z#9S?xeKm-n*N9@hk0>kfcVXej{ve$J;bTq+gqi1wS82r%}iGnd#~&V)+;^II3@Ea;yIirTM@EddoH-s~K%BG+b0< zP^T|-2g+LzwBxy#nB)KU^S^2L*WD>lEP4qBT?4h~Rb%_$xL*)2$jAHb-)3vlH+J=` z&gQpK@|wkQfK1O|oBNLSn%`b$&tdQC$(?^dnwdhNiK}*)d^VUl154jkMDw0oam zw?m?H0Ujpoo-t%|P>NjL#j0*EEoacy`n;q9({Ld^-osu~RZ$@GX76a$>X9*XtyeZN zio*0{H|A!kz`x#J+-3B+ZLRtKC?zDh{rR!ag`s?_AIp0w~6d$Z^%o?P}LixQ`BhL5v))}N6RtL*9 z=Ck9N37lX>$}7~gDP@mV%ZRqI7<*UW4+mFI&US+G@F1+ES}i0_r#U}oJ4J}I&2X~1 zTw{=HdCQ`&d`NY)zA$F;5Kh7Yc`T*T*CKB!%Bwd9tTF{2l6wKXi_H&5?SnZ z2fJkCy{9S{<6~-+gino*2d83|59dD7c#J+fBk{8RX(m359)0~$g*p7rp zN$ZCK>NR4^`WcgKFG~6fYYy4W_B-Sm@DCg?OuNQsPJkstPBD#gcKB*~K6E+aboxB& zpf8fzx=U5W^(^T(U{DR}k2yBJB@2MZqK!jL6oK;@ngwNKvN;MO1ggH^$#9_Xt)TxG z{Jrh&6Qcsrz_o#u!KXg>uMtc$&}3baO1|kLS74hW)WB`?gVzyq+8lAz!LIzc=btms zTJD9{a5^|Mu*Tn!!;iwIv))$xW1aB7)35+@Kut}J9|9Tx|7t6_E;)hor=*00`;*)J zX5gH6Dt<**neds+BQSYIiP5o*9sGe^%0m)Exx$HJVzy``@H!2|k(bcTFp}57J-U_P zBtf--*wP46aSj-!vT#&CaIlf#I?rSkqgCI8(R8yo%IEVm<^Ws0qO(#L(6_dV-GsXv zGrtrk;q4>4hTp%cuVO(8Nio!(CQq58GU%|F{Gw-kBqv)_bu5BZ7{ixUo!5+?N921` z_zw75vyJ5EzuLmVVr4!-I>|g1{v>ioYs8 zY}E4H$VS?n&%La?ZGV~jMngqOt~7yZcdEU>ENcDK7-``4_>7JJHVLA3ClJ?nyDd*% zyFWX7EvBmoC;NhP*e>10P+vV8Z`>Q(^7sP*aj@#+I}oNZj(VNF#jHBUGr7twZ&M$2 zi|dIC|GN%Tlt*Xni!ZHCuIg9@8DF<*!KSi=_DoDH<>_2D{ue)V2mVkxS`b^$nVkv*>~l4qnP2Tc1)KB2@)oyt*RE>qD?jV9RXiEN zm4WB*-BbRD5&rXp7g?j;iikn~r?0Pof%EoGj{E^|7qzD6kK9QAp@Z#1My2zypI|%? zVPK*FMloEAJ;;g!Rcn0Aby#K0eG7ig%udVt4XUMFUFC9; z^@2d!qUj6Ke6|+)qZ9o(>(#=}^OF3qAjl>LL96uln*CYJX=i(b&G;@4Z#r2^ zNvX*b(ZuvxD-RUi4w7Z2r8eK0oZy3KXh!mW$K^XLLJ2a(W2V<5yeUx1IIM z6!YTZ>ox9gWT%q!dxP2`t>;yq-A%Hm|K$SgeHf+TWUnKb%U{Yr8IxJ_q%0x(n&z&j z%&%BFkV)ZBt^&KM^t~AwVjrc{_HuDBXLyOT+D=`}+T3<@_}j~(GdxsC#B*&m3mq@R z#Hzp;b4QCBo}8I!Q;=qa%Giv2iX&UyChPf`GE{4rT=p@5&}Z|U#^FT52oLM#`#;M0 z{{{kUE$sM+&(FiYT_2v`3-d((SBUw~3u=A|WhoUaTduY=0(r@D$L=eK| ze!36H@sEF#tGG~ux!+nNskX88RHQO{K6zlXBiHsEry0Yo68C1q?*)a#-gta`Bm0Z> zgMR0XFyC>D+~ush-16C8_LL-TSz-&+wk`#|q5S&gF?UDNuF;8K74~y9EdO^F`=`Ny zq}Ho;HB9ZC?aD92oKsK!mRG0nXZNEyBe5G_7avX{toZk)?J8UdA4{F9)Lk9pV$y9l z%pswbUsuARi4*k(#Vr-Njh~*E-+pI}-`!_6v(T>o3IA%OIUzouH)zylKyO4Bq+6wS zW4)QGJBb&$H+u7ID?awe>cWJ?J|@v$Ke#U%iyOsL>TPmn2fq9% z?Bg>!R7T1S1jgqj;qf8!G zEvZE~LkPnJpiRLexs8vGXnd*Xx5UK9_EH`%3Awp;yCgcF~)Q7$x;a-Hm5su&U^PaBWG0SySb*K0aFBW6%#F5@^?_O-^8#?6rDzDbFpXOYyqz@vxO%^~_8e*gJ(L?5e2u^|jiqW*GH(pYv>VywH8xYVW*=7`&urf7p==B9Eqlaq(1j1? zJ{&IW5<#=y|90a4JQ1=4I13g_b-9%{VV_I1lnkS(h2cqjyxL zEZ{)8yKA2KadJcpN=3r94{vhI3p2AcQMGsx3PL5TVuNJ-ai=TWS9_}IP)r$rLRDVQ z`J5~Uf=YFed5LT?6v4#6?}fv z{AE@a(=dy1VJJjS6LcJ4oQQN4a=oN>Y4%MUdNdun>wAAOo*H9o@Z`DUad0FgJltCFX$-u@qZzIvL0x=G9Px!-C5X`+ zotaRnEr)Ov+ewnUOzoRM+|~P=R(rs;m4Y z9kV}wS`iX#!j~9A{Mmo##O>Cm()%}1(X(9DcVH^o*4rea$1Q?%HPfzmIh9QuaOAyw z>e?o9Hz2K&5PKB}!2bd?d^hD%&V|`FJNYDZNX_~KHJ#LfT7y%?oIy;kfA>!D;aYcJ z(Y)U1>bta-FYZv67|I#N*H?MSG=h3_}`Ri~*; z-qhl#IiVS4Z!jwR-t%Hey!D*i|a_Ye_i6(D|x$G}QpQI!1jmhEF-Lbi=8n<7j`^*pD_xEi} zPlgkVI$K=$UbeWRKLvA!|9&J&C}*DTGiiSs%C1+e@!ksy!h1PiAiH_|6OuhQN^*Bk$+{9_Yq0aB6vTa z-?{@!7xd79s_Sat{9jke{~{ijKfvt}KuZD;_3}KC>+a!9u|yoT*|9bxkbdxLXfU&` z$gPO=Z1%g4r#7l+M#Ec(icM-eopD}nzXCwGf44LD#twD zT&*#Tw&vEdZxb~6DZ&<=tD-;kkV#Mo3>iy5+Nqhr<&s8tCXL(?bNAEk80VE)*jwqh zoMX8j$(vEsm->+s6jGk_1r;V%`TDhLz@-FJ$jcH+fAxMfEIzm^eauy}l1(LNWKPea z#J6KkE)cSM8rt0~_8=eTYrpp{0$Rjn-jxVeD|vZVE$^&Ce*W7k0Uv3qcx}N&9M{=2oy*A;i?3B(&~kA8=h`4 z@#$<9?cg||`^)QsLBv`U5|g6^g&Gku3i3igp~>QKzBf{dl9q-pzA!-()&K*RsK3e% zk>Epf*W{;wwbw+f#nxlIQqeH-+c>O&4bO)cT}IIvNN*O<%NnCIgkNS4`J78`OM@`4 zV&1)z^Ve$fdO7$u)cr9xZ{JH(u>^meZ`tG2w{M#PA&-scY1=GLWNfyfFwi2Tg@89Y zD;87VD!0Jox)l~R5)aayzDCfw3OpB+p~XC zku>~EYyH;8bh+Y_wB@!$1?dCzy2{gaD{M}SzMzUEaZ^&2J!#LdDs;aCcsMU!R{Xdd zIwZHdZLHM+R(zJKrGlcKqR?M~PYN^4E~F+d59|dPojD`$4mxVBvD8dXOIl)5j6TNu zj)&XKpp@5j`08L&W7`sI*+2tWK8{PJ?t|}Ed#fj$atlkNWFh3Va0`BYVrb^(pfUY% z(0&kUXEZG%KK;Nyc4V@Qa!z_`QZX`e&?u^h1mkelrkB!oy57y!kanJKxmNCeQ4U9& zX(1B!)>A<}qs}7m3vSjQqT7M-Do zks~@@V};hgb3a>7u%|j(E0(-W)|A!H+R|#R^~mSPdB$%@5~HG?{J4=`x(-)B8! zY;U+$PI#sHR3&rjPwCTep)%*vob~XND4#dkGTIu2J{Z(vu>!PINZ%e1bb82w3>*DF zzd#ZLRIokXH>`JNh`~zJHs?HAKew8`g{K(|Mjm`M))aDlV&T4LdcIh1H88?!@3Gg` zN0*$mxZmrXZ9PS~6Uh6_l!nG1VRn4_f0f?D!~S(gFi6ZdJS2XB9g;_&L|6`#vfIT5SJv&6h%c*&=Y?pudGm2cnz+x-V}a8In`}#me?iFBV#_4*dj!i zYP24YmjO{@B+UyJqc5oO(j@kRPZu*vN5{$@wPdD*=>bWUG?=}7F>>Co+hD*?W>c&C6vA~V!)2kYItxK>b1k#V`a9NKgIHy zoDR0D;PER{zS|Mi8u`tjUCrfwW0{&3;)spKbe<)-+=Y$0&yuAg%KI57)=-p>=f=4x3Pv}jd2b&z#5TLnA#pxJE-As* zU(@xtIyC2g1F8s;6vfcOr^LCFrd78n40PqH zff+9s6!6^IFD%D|Z9iIY4dO~oPpq*#8%eKjbhH%x8%-$uLp_aZC+_<7Z0)L?%z|fn zNF5!L-=5>;e3q#=nZN^O0D=57o1f>6MF#H5zn5b5vZSpqtTaJ3#MgLCVnDQTL$eP~ zc#Jc&4vxx7Zwl!h?r1X6J4&xsG3LrFcx+O-!m!jpn5!n>rNL{L z(G*Ti6e?^U*yM^ojQRQ(?pA&>d~GIK!TAJ1F!&>ON2=fV-(HMf%`C@(JOvXD9gTyVWa*?YE@N&U51`u`ui z{7lBf0J)^=@0p{X000-C?D^QRySCd&(QxK#9CisV$Uo2h#3Od+ZQ_Zthr`kBM!$o_L!c0GjIY)GQWWxLre*m?!UEb7#X&vuZ~Nzo&$wwtXeTE*7Xx}2KXL*uSvob>L=hXO6qU#R#<0`~F@ zr-D|RXH2Y5eoR|9^)>Y0J;|hn!RLy%w0&28{~WybK04|uWE#q54`S4q)Q~ggX-L00 zruc{W!}?zI<-F!QR?akYerbF?A$gCBD{jrHuBbP7$~{ZjbICc=}>c580g(tFungr2>~?)9%m~ ziq(^oVRHX(E-&2zYnFFNMfg$Se6A+&SM!HVEs^PW&b2$SUxW!Y8ZU%gc(R>yvAYmt zG{!6aiR!r-daUgw_g1pm+fN{77GOA#d0cDm-K1~pa%tFoXjFGh#)!%&LQEl;pZoZ@ z_d*E)6aZ~?AqUYLsmORgJ9;=S#=_Uj#Gi&2_48$|F7UMiySu2f#ka0A1o9%NKM{0a zF7C;PmL|EPI!cFJ=Uo)FVy)SU?jO;(SM*Og3jdF)w+^Uc{o=icmPY9YrMp2oB&Ac3 zl4o>|YT?`K&)g#;m#spY3b3BS=( zeL-u+&x;Wj{@8J1TBzv!LW$JhfoH#gCWUA`>aFUybpDf_?;G}1dO59{xP+UycT#OF z_qTO(uY$(yY_!~{?a31~<$k8pBz()S)bGB@02#60$P=O^n>fw4|JOnZ4uj+O#u#6G?5+q+z=^YMzY}JCAQx7CpI04~+_g|I|bzE*F8BtK- z;UHk=ocm4E%0z1-Vlv%u6DHoTO;Hz#qYHFCN{V``FxMHu$JOL$kDHJ=r~Z=XS-m=Q$XGK$Uyz<_NGSYSY90pl5(>4 zbX8~ASW_{cA6oTREW&ol@qN+uWA=J2!Rb(cUyIgHR?0*XPxOzLQwjm5toqLy8!b9V zgKkJaSPoa0kN8M&+maQA31n9Jw--j>Z|o~sH{y`LvHIn|EaLpsc7CXtc_em!l2%)^ z=v!&?Ht7X~i_f-RY`hR5nm*1))Q(~tSg5)QsITc4-vcySPUb!eD+OB2zNJtN4NrD= zlUwf`{z>@Eo24uKUu{_b8qRe+tG;MTB_$<#^%52q7PptukGbPuViD}CE~yM!YYX*P z#&)C>$g6x1nS>cybDVthO_Ma;wxtkR4^~Fn?Z>~?*DdmP_=uMaMwmAD+nn^C`KD^IMH@9o5voI-;iSSsHfB$Bn@_?)SD0~+ZBScMe*yzpX(IfFWVr0{Bj}Bnd zO@}*}M$IOA<_L zek56GRZC3VSkZnlEP@z}THi-Ym&+&H6X)BWCB)VS2HJjqvj zbb4X;B#C>q800qc+^6e1rg4zA6{b0?s~3sH-~8o{B-I2n{?@C~;Qrl+0HSkbZ3@_2 zT1V)aXws#DFYAX?6hhwe|eOW4jg*dR@^CYQ=-TUST8K=IJ z>(H2!U-V4F!y&%QkG7PyxDsd@XOpyP?KTd|-SI(qP24mZz4J3UO4|ORXmv%bD%t4d zp|YqM`l|wyx%t_#KZ8T11#}}(rpAazkA|> z$(nYw8cXr}TEF|qednd4tc@07oh&%`y|}!!1JcXw$TidGrlc0FteOe2$3zy>ijVrF zirL1XFu-4pmJvrwfK+{c6`wgL-X)KWz07 zBX|sIat6I@1gfEPs&Sfy1r8HqxZQTmNSY^z7~(3V)NY8(7FrDhz1vozueE$AaUp?koOGLS9?}kP2OgM^3iDvLhwDsc7H^RO*RHgc zHggRe`?bR<)kOQ^R$b)jS-ZBU?YhnTLE&dD0-4eKCd`8t+0eI4+Knk%f3T}}kB^Rg z_*vdVIX@Bqwp1v3oQzrUacehqSQwh+l{pDHxejfxP%Ik!gbD}oKiqqsqaU%~ve7HS zw|#niEqb=#)pYY9au{3u@GO&Q^)Q@hYn>rL29AW^sjmfa>tB{V=SN+7p?m#43%A`Y zPYL<$wL9n!S%z)_+U=XfN6i1Lmv#mH?&uKBRmR5Q>aVu^8(N)_Iv>tff@(D1n z$bX7PRazAEWu)}kvFW`3Vgq`*1ndmE#{RGL`{8NSLfqa!+8|N8%;a2-p+t^o-hS_U z{bP}|vU$Z_ZN%fRYU($rx8X~3H(WI9e*#?uW=&+anl-xJX+dRY^5N^dv4?}aTgyeL9l1Bs9#KiQq4h&b5IkZuisY7` zv#+s$AMb3{4k^> zg34vgEpaKr!}Y;Lgd>BZ$X*ZZ*Dbm8YrG=YvlBK5{77f#(g%XqM7m1&)wvg>5?f+c z7D7lYK2uTtD`8hRb&gybl*usYFpBIcyCQxGb+F+50<)z{ar@G5i^_wmWry9k;H2mXE$O_MQnbl;MSWjPe?A9lE_*l z32|q?5;BZNo~Jwki2L2-{y@PFv5y%}WVxi+TlzvYHxN=fa=T=OFC%35riBvan)JdePp+*K}ab<-| zX96A)n&XS=m}4tuw~AFJD@&3&Vr_}HY!Ffc2N zKtrEUl-|wU{mTKHl4W}e%cttyW@;td;EUl&&S{aiKwzWoUZ>TBvmEYpYJhKetET&N zFcM9&le1l2u+NT5tl-B)Tl(&M98Z7c1xLsRH3PtSLKZ>YlG%?!w= z;Z6T~hT8r0fN-7Km^Dqzua89U^sQdm&L04k;lCsPs#N1l%H#Oh-9z$U#Jb)VB-%i8W`C7i{8mZCK4!Xf$l3~3 zJeZOL7`?xilf`ncY_Lt9eOm+$G**m+F}Jrr8i50ARF;I)bUWqAN@}1VbAAWeEZ?rZ zw#%~Vo`A?|6|HI266?D^$&?`hG|nx$I0VfR0uMjqf=H157*saI4MdZCHIBre8jf4T z)mD?mC%l)^BVYxB=eUV=JFs~#GJ)T3FXIq4cG5X_)1_#40 z{2gDGR+s&1*>W*SYWclXdh)58cBSR>(7Q&=4jiO&3SgEAAKhzr3^C@WS!YmA^ z^kMpEYQ+O~MvLA_y1Yn#GTGW;UQ&Ij_b;5DvMlAj;#LiW4K!vgLdfA^smMU4RF-B%9la&FzBi^{;1u%?hdwzi=5Jw79nIx6+{R0 zE1chRV9{Y;_0!Pj_9HKO(YJd1G9ZqG!i0BNZe2ie-~Oh_hvW@^O)^@t5F5XghoZJa zsk2}=d6FkvY;G_*Lf*G;n#CqL^uJyJZ+h>N685VHE*Q~+y?vnnyvR_VQ~&T>jqwwZ zE5pC+s~1%tk@r5B+;=bvC3Py>v+nw-vHmJqK$!sB%d5LPsBeTThwZJxOX4T?7uMW( zM8}`NtSnB3Ds9>kL#|>Wz!lUDz3lP4;+o0x{HZ=qr0_s6Rc8>pk5DvD+eNK_l~w(x zV2L|*z5{=mv%Rc+A}oqn>ks&>dr(HXUUbQhN|G$Df?xSQX*r%Mp7R1hB^Ui&m8#lG z?sqn1{4}Z_E8#NzI-4CcVPowE-40k`k@!0r7Hg@GogOC|=9N;2-#=0SO$Fckawx~y zcK7h1?Q9m7Rk6MA=Ck|p7$tGsY55&eS&$ewLVtW=X-tTC|8;>**jf|>%T&Tdz5`xTvOg~UI0z0DBx zm0Z5e(WUAYAoMd<<3un=mohZh2j-zht;1}%)SF&tZZyzeBpcfP&CG&5)Q2=7 zbWBXOm6f2@j8#sfKvUz+K=M6V+DHco8vXZ-zxj2t2kC&r`FIq|bO zl~FFDhJmOJ#b7;9c?G{22LG zrl=KeJ^8Z06iCWCkUC-uVLK^%x;WN4)#9x_UMd{vk{6p+hzeUFj8(_qP;Q+b*_hjQEz&Ucd1>V(&(8TH-vXhx^ zXlc`DtI`%avGf~J12n}K+MiIuWjkQI)yQ}CBjVeU<)uQa(f}28Z5gEkBBdH4AZQ)R zfwFUO9>OFySe_^vOg_wzq4WZ9aB#kqM-_{RlqvcRY>loW4l>YOwWOpJ8UFNV;G)Kq zr8jg$bKrD=7L<%jNG>no=TnM8bWBfOw4fP!S6H{s7>iUZxjP~?U7O(ngq%ta7C^39j5d4;>VKKe@2)|eYo9)U1{n@M-67cE z?a%4>o*oN{nYkRBUY!LGC0qg~1C;QA%m(m@fES){8u~C6cg8>xi38ndTJLgv9Y^5nHM1eO1*G*7YN(^!>92|bNw6sJ-$jiN0R$I;t zPfy4H)(2-NOG-$Lb6DHjp6u`U8?hO)X@Z|DlRG{(1~$eKM)t<|=J>}?w*NUGuxH9Lx&C>VpJWL#IC>(c zJ+QM%l;MMhfLW^$5g03`WSBTn@iuS67ila;x*s(9=0;y`>1?H)Tu5)}-Cp9WB64?oUS@IwA_P8>J zgKPAtUx*DNYGt+@y{v?2ljlQrB9mNMUhF!@R{ar8)}}(6(1I;YAF$`HR*$C`rSd_zq=lb5n&fwHzD z)X#kavmsxr!Hue2hsw*PRwi5d%;=d7yfyQdZd$5>j#%@WPN&!0>Mw~MypkHYnX6HHB4UxCw3CkOj#pk@I;3MNUFIR)Aa zy&ka2z^Dhjlv?&}^@%{50>UTgEnyaxY9C^+WS1ILaB?O8DxbdIRYw}wH%y#?AmwML z4)$>0zb7|Y6UO-q}s7F{HWWdO089(BmrSs{n7TPDT(Jq>Ho@) zT?k^O>XG>Dc_2OH?Ci|m-kx)yv!eq9uXLMK(NBy3psrq7MMbYfBzp`TN_()oi$ci7 zKuasFpfEBu#YsjcT&nJRvb;1l#tJ_BhP@F9%z9-71qF$n(ZBUC59ioHDh4}yOtJIb zFL0PJ2#MKpa0i(qAQ6KA7k6iSJ3b)+v)bO&gjnB>9f$F?COOUlfD&PUT8lxZ>92q&I`x)QKM;`Rw z!Rg`!s&t8if!^o1-e!7zQc?+yqUTD2 z=Ff&>)#!4Mx<up9@-w7!yE-!R-92dYWPSoMP&xZ?XiAYubZw!2a)6 zNngCt{Ghm(yJlXkoZP|&I8{e^b1O4dxQ)BxNf-?rmn|PZjk-*o%a18d^K_(-E_(NY zH__ug3~#V$^$pE?C7ia#az;7pxN<%sAF!7@6E|sVGh`;u60+{eqQVJx*J7Uf{VJ-t z%Yx=uD~J*O6<1COg{_N!u}z~gaZXc~Q79lTIdMW|H>hfCf=J7wrS)r1n-fmBv!=4r zKGU8w~(CPxm}x?!GPCJw5#ib74qM>=LgmsoQ z6cr0|u$fUSi{FTUqmxO{OKh}EfimbHUytE9($P*L5O7m{MAOX(i+x+c;B>doYZ`~0 zza6K@70*OfPgkg&bzo*ARh*W$J^hk|fcjD2*m_uLXUn7|-;ps_9i*zwz$Up%G^z#2 zio`P&LEeeQM-i8}AKzqad45oVmxG0;k&#xMzQH1YjdDB_8{=?Ytj0Z23_ogodgIhm zMz22sH)kB%TG=3|_&(rA&+PP}PBSs>AEYjKj)}#7nQ$u{fPi0v{&PY1x))q~KSM0d zCppHUxj%XIGxM5BFz@4%O-(|vO5*v?QVS%@k?s2l)0Xqd1_t|cn%=)(C=z8$Q%IlL zIr$?*q4L-WWzGOjnj8}TxXNTl^a^GWYGz+;0U{?%2CVW_AkM-bmgp{E-lE#Ub@+t> zsDrTA(>D8rWJsUjVPV7|j#me(3w6l)7!~NNP{=joBXWLiQ0QbxJcwOTFp!BFZf)os z-~jR9#aAgMrb#3Vy~OGHXr2}Y zRhKsuiM9I+<)!jlZ-OW66sWa`h?8VtfYQ#H36Dw&QG-HwZHZFbN%N4E&GQ%Sg&IF`TXv{<50Y z+{akjpIu*(J4#Izt7ZjHfQ~Xd>C5_>l_&y0w%2GQxU?6&82iAiG+I4vG~C~raC35- zDYV5HCqh{Smy6Uf*rBMYAZ4R3FrDQNOCqro%UZG%sc2YK(lI&1VLivkS~UW&^D@5^ zEZo>SDwL(-z;#f^xGyVv7tVW~A50$X{ZmIIDdK!Q76w3MFqA~Ukgw+jF9Bw z1Ong&GYHSK%tHy+jtc}^ z#>IiTxpMQ6Z{NBv`pgB>ZEb9Z*X(W|aoT?MfvMwIVT>;1+T3}AEf}P4=Yw>Vl$750 zmj+dH{a*&baD8HWIwCyWx@L$QD?40l4}m5G z9KX!Q!}GIugL8S70|gaTjRxzTM3~#};)&Qkd1-0)^UYw;Cjv!@Pft(DSs)_% z?bY)dd(=ELIpEIJph&FNKcPFS_(sSi(H6tk2m;qcnI(rg>*^GX z%L`wB&vhu`)sR$3r7IX@EO-Y6yt210m3b}SAUYEZXnMOl54F92X2?&}`g}797gd;S z2Q`U$-*Cx>pscKi^F!+@>02-%%bSzs(hGZ+{eiHLilq)oo>z*qe`;FWNKCX52Ni#! z5b)&&aa`ZddG;5WhEMFrtR$MQAcu|=ViY@{aJz&g(s+#o^IBld?%=n&SO7Pr!@jFL%QdCsMKMCE@=gL4CFJK#mq%aM9nCCLvBh6QS1lf zJUQjQabk1C97nV`6_RZ@Z2$$XM>Dn%xTy8U=0DZf`#dVxnmZ%+a*x*M(WOnY&>NOl z2oVI_kW}x>No(B)Vhf5a{LhVzO&BR38sBOe(@c_(ppafg_NG*Apbi@&HqG9KX<%}c zcZV-HZ2dSWDU{sN)RmWERxTj4{MaFe@^e329$@pN-YSPH>T8yHUh0Jo$1eX8-g@?;2c<eK z8ryy3t;ccBW5MKLb&`kT0dMQPuH zH15czyCq8^Yi}R2=Uf@0nT@9d8uMy$gEdCx!_)7G-zN8dFxeZ|5F@%^soqF8xH=y_ zJ*T|rJ$(A%OddSKRzbQmQPV0@w(!Fl5knj{{iBn1^Xh=u{2IDUjQ}+&3r^c|Qwt*X zc2{C-=Y&)HgnBVf~=MiBrrU+bYnr_wo#D}2+ zb)5wlqr=u*%UYWg*MsE^8PS#5j_JOdhnN~0Uw*6QuMBJ)?!RXYpX-#I_*!I`m|h7b zO#4}ECG|DAEIR3(Jlvo?Swf*Z-f=wXf-3`w*YlCIVD-((pMxm#x%UfBhQRmIOQ@eZ zo@fWXc^DMs6k8%90%lvJLVphioSoAbAcFs)Zp{&1`LjFjXcM}FbIqn&>TwEgp#UA! zt_AQWgRdO}l6Nuip4hSnM;N{$^K4H50t;wB;D8_k>Z0bds8^%N?R79vyBfi*+oVD)6?pblBOVi#!Ik`)+{+Oaqsw87fefPX(0h1 zHb0&SA;@jYGkCUs)|Cw&qGGG+Py^2 z%*@To`Ztvt+C4sHeO4@EY@7=QSih)2E2|>!b$?k17Dtqo-vs<-;o$HBiI_S%#6g_~ zEO=mu3M(=@KhK62W=f|kbOJ(jK#M*yG(;&q-qG<^NGKir&<}4bKgnWz0OPwj=RA<* z79JixF+M(PRU086x}mVe1V(#~4ccI?)fh;U$nG;@<4V?1QOQ=M0pAj@e(@vUp-hTG zS;MxTrx~O!_K$*8jgs>6$nbDtc9XDQT(#inDtr4%kT3`W$O;S1Jx@Zdl5Yg*ohR5WI#b+0*1P2^#a#hea=gj9vNmy^ccL(X^v(-}0!>Szmj%!V38 z0H6W4*9#|QnKoIm7+nxY&Dp1Y*GF=_6MM*>+orFfW+2m}@6n)2-3_LkgMBr<=74#d z)63YHAMs2fjdRxdnn({#Y?xjS`PQ`4qb;A9E>?yT`R$C?<;0*ddo#5x=^**nEj9N( za>)^D7ltPkz)I=UX=jMiJ165v=;ImAa<$_3Uw)IW@NvrFHF!)(0T5~g?>3&qF`ud% zb!I({hp7~Ma5md-jMyFi#jA(mwsE6W#ouwXNa&9T2>t#i_Yn_$5>bh3H!4vB&wU(G zulu3;Mi$ujMt^SUiczB-4N!J%)pYmm;SO)gBgmT1wR(>-YW#vVw>hVtwh8v{hzmQ4ViBlowL{&;w;@t6 z??`-14p_UF)?jOeDfX(7_8p4l%#n=aCJ=?4IQA`s!`MQ^O1Z;Qz%y{Jc4d?Vdpu-`Oc8Gk1{MP}CyoqUA?7IIfLR>%d7VzZ5s*n{>yu(h3zWx7%%MyjW}I?-wE6HLu;xbM};GSK0X`ebU!+Kh=U%e6FGpG=OD|*N zOoarS_I^5T*9}ba)i%2Pp2kL4%a^X|Q6QqA4Q=uGk}&3vU7N~E#$n!{t0$Pt^lXwc zGoEYptCi+@cw5Nzo;%h(D{5=(Gmdq9Nzzt>9qqRSPuGhTL`UD$Sq|;2B;<52wYwqz zJwZd9u#G?NQwmk24=;|KI$K2mP}=Ov^<@6|)PY!)z>4_mQGV=O_3bq>r^=u6O(CKC zO8vQBaLmh#b|gkTF2!QwbB5IWo9>s0W;CCb++yEri^g!s#ro1PfW`K5k1N=uf(_ee ziHA|1`!VRkLCobb?!D6X4E+fj47x8OoG-9zlV924Kw!zBXh7y6gJpvav2byE zW+zQ>LZ}KyGd>XvYfvn6_vV% z2KSJhj0^#<6U{f#AALq&DnHR+fyo9mLawmxFFz-(tK{ zR#r|TX+Gd)D3EAe8Rf$v|2mosrWE?tc3Tc~y;wMSw4A*jsZqQOmd^=)!t&0H7X~pU zLzg0C(B8Xv4_+0(HV*+#PZga)gJ+86ND3h#CTtp$#DH6k9b$JJ6Qp)V02@Eq`e zV9Qtj`u@IzjEr(5kuS=wIuomAGiSwot&eTN@RA>C3StKIwif0ON*<8q(A(%Gikks)`%{E6=J6{7p7AMI@S z^Dj{fW0{Rlx4$cm47rG+Lknwe?fLdyS2Mmoz(~$FQ zXx#w9C%9Ag_O}pWz&DPP7U%{L(k)pux9lIxA8K312JYCQbXlt*;936-du$cK3@>2U zpbZv5!9-6*MFp7n$;qMcCzJYhhfxgvTv4Iu+zmEyaSOD9$m??v-M?Gf{ueNx3vMIj z*u=!8UKuDv#brO^;&Q#nu#f`${bNDEWlJ1fPI3;o=I7VZ9FoV$oGiCI45HCxkI4Kj zUWvRIvn7J}9|q~)t$aR$jjL3K3}dmuF@H+Q+vlq5?-uAu2L}%?Sk=70I669Nx($zj zP%LOoP@KnVn^R?6|noi~OZA8f5 z{c|=|j7$jtn_H-v5>#(wSzhS-gac{+q3G5~(%;I{FeeuFg348(0ovT(&`>_pUV3tGFah~UsM~va=l_c4L=|i)w%qP zp3fzREPLU4uTX=)Zgn~+#s?liE`Q&8UU5>E7qTX{&$Z7$5hwkJ@EN0jZ)u4*EH_c| z*H+g8-IwK%9VXd1)CxiXXoZ@EW@$7A8cT&)(A$aH!TCA%Az(btucXf054s#1Eb@@ z%2C=w)yL;JO
XX;MT$WZKL$(cZiF z&et~}WCV_(A;HDn)ydk)$;kx2!otE|zkboleje&e0az7ACsX)x2->tWh*y)LT>1Oj z9=rX}e?|KN|6mWD_dGK%q;O&3nvSkZ7ooNHVg%qw#H^`WVV{|kvj&Bg5D(7=XxQ_U z1rY@2i|=SWJY^iloqd6uu(cy2A|k@WwW?hmy5fa`JnbY--20Ff1wo!i49Y3(KY!v% zkp=?G(9p%Wp^^3L1uLUkp#kiYo%s$oz_fI7@q!M^eefCPbKUpX$M1^0kK(mS{`S+? z|F@XGjut+V>2J@K-LFm`WBJLZvGo&O7Q2&(ShP7smSVWf+m-C+U9@wQ;U1M8+;u_o ze)qQSg-PA5QJG4oGqr#EEC8B|_7(kEDu52fIO1~3(L9utKFMYX!25cHU}0eR1q2*T z2ePOOMv`=ioSzscco?~+r~&qb_mM`teft(2hZ5`BHN09&TB~lzm_WVf4KB>jL*FKu z)gMgE_5;+ZozP2Hu@(n%Vh_gYAtwT+5#Z%DDH#FS(%vpjef`D_N-ncc2ggnj=(T02*8#itU4gPVf)C zIPaO}kjV-MDGMrg1BsU}S@Gid?B-X8iy+?w1q3MEzX`k{G$@u)R6%}zK!1BH(K9hI z2{~^j9>`SHcpO9Ejn&n?(v3<^PKIV=AL@R6z0b@yxa3RdH~eZZ@r(?9Du{50#G|Gb z?c*a_jb^W`9HENu?FZRgJeYjA#C*^wyhi!{%_5_Y3eVFcz;QuAn1qZkZEf|`_V(6B zfgI#Hi^he%o1EOj+B&btBp@(we{J;MnaA&`DOSf-RU8}~e}saXnhWstzG`OWn!@m; zj*d?JLT_&`VEnJWTxzG&knx~#0==s;>XoCdE&fJnf6+knmCKheU%hHIT$rZ&L5n%E zD~3xdqgUwtg~fprz1xn~^f#?q7_rTbIAa2=wipGwfQ}Lp6#NX7J32WoE~`;{6sl8K zGsZM#lS}E*{08{>Ev8X$Iixuh7|T}Ua78Xgctpr6D%!zrf)7iYg5RNa36=8fZTTj1 zjslxUM689%v|G$uF|E=ld6jlYQ(Q|rKB(~Itj^sZBs7d4+Wc{3hd}Vup4ED}R_$~} zwaMDn$*Z{Bv+;UlE=8ui>gG{CGdbvPyX|tXl-^;ZnZ?~%c6hOi z!0AEL#)S7xrHmVKM_z#$yT?c4S+7f;TgDwt+#Nm^K3ub7eWaC9s-RxBosFUtT0YY5 z{^~*$gYv;M>ArJvxZ`{dj8Ec~P6oaBuWEI;T1Tb|a+92=XZZ~QcOJ}sy<=K^_ewIR zG8T2tzs9p#k9~W9pBRY3$MO{jOtTLO|5I z?OQ>!2XaRQRGS?29A)}&*o(l0ut!VNhnKqkIL6)Sz_#YGF#Vcrm*RQw8@)g?@ZG!S z&NzV}uA)A(-7q;}HRaUUfxv&g0Lt_8^QF#P(@->doa&}EhK&MDinF~j|0X>UnWo>C z!dp%{&jj&IIM*M1aNZpPC9iK~dqBf_ylmi^t7}Ewi^a*>S}6Fsy1Eh)5&)6_zLL^? z#J(yD`41Bl(|YXNBLsOf3D#9{aq;*CD3}YYtD2gc7J@Yu6(b-B7iZgQX^l4pk?~lL zARCEvK%o$1W!;1w*`aj^cnHwMJ(-x)ezU;9Kmb9%`s{PYtVPpNZ36ByRn?*QBpf#= z_-EmxK<@^|F25RE)Ts=x+kN5>x!o_X`PL@#{|#g4j&9 z?(`}ZM_bt2*c7m-m<8w(v#{h*2snOEd! zxEJp-Pjc_xJ+><=8yh;1grcMQ+z(!EvBHHyrL>%^sfH~KOagG2Iv%^>&euKx-AxYc zB=6MPXj$#iGDDEq%=)@8v~r^$()D|?K&uA5T>Jt$Ht9_Yic?V6Fkb_^qoAO$4cY}M zmalHTuS_(%A*?yT)v&fXsE6c&&YM8z#4eA%ydFFQLLL+1a1k z=AdFqNlK1XyOn9@;at16wYh1+I|#ubDJdy@id202cnfIMU`nA8Bs~F%Uf0`uU9w(rZHz<<&Xj6Hhzck+sy_5RvCngTg zJ?d(0Dj<1?hz1uEM%UEz1Vu#7-x6t?obsa5+NONZgYwNvi%J?9_IGxZIz z@44+JCyiE*HD)f2kDLw^RZg09YdYDDW)+oj*}5wh^%+YXcZG?14N`x)=K(U?Mr=`= zug~TaqFQGwNyX_s5g z^&u322(&F1WMW!7-Jh(rUs5nIm<2@hIXYTRQE>8N3-EapVk%_-e+4UNg}2S^l9FAG>#(=pV(hd3Rgq()tHw=DJ^h!Gs2#GM z`hLMzw*jeLZQWX0bD-DEj?ELB{&}UO%=siHhBSWRIekCyM?Zf2=mc&XB!ZtmCDIfg z47_%5bR;AQ1JD7PYZ-3PN?d&zL8-vQfBVK;Fl7U}y7z@kuKVk8A3k)y{9aMPztseB2gM3ln+fF$Ci+vnAV{#Y z|7vOylYU{&Nm}sQ+THyGnogj68zWLvX`o%*@sN4@<_$tj3^1RN+g_B5N}%EJ)=YDP zfKzUIdee{aB1QpN{l30FS15@1a=~ILPo<@AvRrE}T$b~{<)Q?! z{L*bEr6XPjFVx+V-W%zXh>=v*yhOG-guKmjV;sAUaidwprzaY zzIOdO6ts@cPQqR->)KOC8=EZv@9%Ch?vmOVa@e4Ia;Ln&S5XPnRjk%dh>cyDnF;&+ z8BjU!ss^PX;raXf8;OA(53&dleZawki46iDK-w)9bJ}_3r1}z3J_S)8xwwVfzPhyK zEt0E-x-hAlRd`xt=etKyusK-!CgF>Rs8zs^=tE%jyq^bRY=JhD*>Zxg>RjkIJkl z3IZg(YXo$-&r&8y;6858wm|y*(Tb#oX5rVb*FHXh0RhsJ{=m@!hJzvp03>hFj$9l8 zVP;!i)uWmZtc*t+F|U9CG?ulp@Onr|IXq{nPocApF{pAoIoNVM zOL^G-unSa9=nbaJIALM(6f!9%kQ{&Z1zK?|P&EJzx<2zMp4`|0P7I8peqX+QI{zy)cRgm%!D9H{iO+Y5a-WR_ z!)I+QcQ9f=yCb)*zf~o8R%OmgTywH5)tiS-G77CU(CX)7 z#RcEKIX4E}N@oK2m7`hy6PiiGVF0eNo&r_B?bAmAHh~ikeWOx`qnebtq^GDrxBhCc zwQ%)Lr@%Ce-26SuKBxnv+}vXTYwwC?FPR@jew;HiH(zP=6Zp*-v}ZYFa>z(%ro5Ps z9X;Tgo3Z;O@rhs;S;{Mr)o`v~mz0%V2c?M(y@j>iLoVIs!@YRtX<+s5$$su$*#%ln zui!Z~ySCNEks#<^fg3n5ANotUj49s=C9AV_&8x$zf9dp#@hK3YkCSjG@v?GqWMpM2 z*$w9WA{FB)Y7@QU{~NHW6WdxQ01(S)>eKH4^2!=>d`pc3VYLK;5~(2 zPCc|cSApiZ61sN)?BLT0nl!+`!GUctT4)$3nYIiV5Ax2Ec8B^_@gUh7-KgUlnU+F} zXU}@}CwXREn`C8W;bg~vX#GuqhXK6H^5B7K;Jq~`I4!g!234cb!a_;X`S!97ISy<` zD|f$pqF~Zy1n@zu|p|Npk_h@tTW$;Wc z?8Es$l(d9~#<~GUAlNPJo~s8(D|xlY>y@u>S8X+sQB;H5*IOJxp}3QB>Q-HR>iZ?- zGxj(icxi}4V8aJ(KMnqA^~yrSm`RUy(L()fZaU;ci4Pwfyc zWgVhOPU z;vgFmDt(8{O#-Y=N(u^mL2hYi`t?hJhzOQ7`PgxRw-e-4TG}Uhg9M?n`=ga2SACMy z=3XR)Lj{l)?E*c=bBe^o_YU<$=hOS`FP!T=a6tcPQuzs)LM2=EnYOm)&b$ouFB8sb zHv{ldrKUEtwPop|spk#G*8GV3{F$DCAt-b}IbA}gJ`lhT07QJ9b3HZJwq{c+U`EJJ zCusx)HS-3`Zl5w%K)|HS#DaPP2kq;;qKT@N_(LDy$zey6C z1nkZBkWGELz(awS7>@OxSv3B`9l6EDx42=^EU|}q!P-~s&HgMaDA?eg)2y&x0wai} zfGq;?CM9JCZrq9%ZW0(AS60Yw-c(XjLPFs&TfbPyrUzhxVR4<{E$w%jRAZeD0u)UW4B20;4b|B{ky-v zAH;;;yt2%Z04F&|#$T`kUOPBE1R&5aB?yElbao(TJbYgi>nU*w=P~dE($eqQ)-_>8 zp}U@%nnL-56G_SiOsk|=>IPb-*dE#ngf0r7!X3Fwhc{2T3W4kwu}YQrEVyj zQS642ppY9G(RJ;>7M^A(f~*Q?1PBO#L|`L^&I8cj*4`dCpy`s0=I*LRaCIo`2zKpM z$!6F&I(Bw+0If$M?EVeH6?$X@wdug_$oi;Cl05h+P63i!^{MOccK|mtoD-iahSz2q zI0}^4VR4ZcBm+6&!&I*GY|!0LA+dw#4{d3COUnZ$CU8f0wYI`#G*z3IjDT4Kvd6}7 zQ3miGP)UF;b-xcO2HZ*j699~Y+p*epmpwSU#{|kCxDNoJLxLuwpzuI0_CWlBFb(}s zdZDeVYLw|%d1~q`(ABWf`T)hMS9QD#1;61=1s(y)S~)1)3I)-YnQ=I!m{~Un1B| zTk=~$!` zp*F$BQBj4UA$$`QS44(C5<}|TG%J+IniVg zO;ap*TY}cC#Iv?8py3%A%|o9L8XULP7;l_xWLVhUtBH`9s;aADQO#J1L8J+x5}_m@ z-~j#)?2jwA)^icLJ}v+ThGr@+ zi+VN^UzfRX6_ks>_u2M`54e!d+iI12hO^a*=HV4L&mZ`fHrjlCoMiGyyhPoSm6&o( z4MJ0$_3z}zpH1QI^5vf)qLABolk_JiB!Ag)f+W1o48^qy(tg(IaZq77%;> z{5i;n5#%JexUfljly@8%gJ&*3!hl{VGxO0A31joNtA0&%TP?Hd@RrsXmbTAOu|Afy z=>RUjkJWfZX1^Kun8I2fi;4p8+KA+6wt?IR?KBrL78JC)x;h9Kit3?=O0d{L+o~aQ zgVTVO80lJ#_*J#9?!X^6S3WCl@!y+T`SHJ_ENX&XFfLT09;C)d1n~|$KCH&X{!b{* z;4}rntT<$G(FDXaAR554?|@XF(xsL<$iZh7twPQp25aLLf2ghXdhsGrlUXIJAAH>t zrIxWnR_5jcHq!{0e|m%qQ7@Yy90xV9C(yDSJSV~0KT@Q#`U@G`3bWrf?+E7gE}i{E zd8Yq8im%RqGr#AL7Zsq-p1hLM?AqG+vm$6GIY80#bBeNYx3cS+qioK2=iH=A0W9pV|0`tCqTZqGx&IzHv>30&QhHW) zp^3`eZh3thmakenhcI(>+1#3wyz_ z_=#0-kEH*f*G`@N-vnGEE8JZA@atEcl6IzCv;hvWbOvi<{?F^j(Et66-4~Fq9w2F} z=ZImE@)LILn3YsiRRO>XWcrE1n*Vp$+kOnz{^y@31;o8+0R0LJKl)vMw7B@Bjw2|iZ?j$3T$Y5=$sQM^!J_QMkN4Dii#K*87;Cdz?^`O&!u`+XtA;F>S1r&mXG~Fvk&Ak zJo+S*7^GU`y%&v#I!F7z(`bKAi27?P6lV7TD*&Mm23%9q2VFZsl-#aJ6OaYVHDwX7 z4nVIxc+f&D8x$N2${)K>O>(jxSU{R8fh__W5v*F^7y*RLUiuEaOECV!YXP}eOsok6 zfFN|_az6t#b#h^$E%2M^=CQ20k3m!fZnfe4k_XYINi2tPux!RLOlB|%3e(dA8_U4P zwgoz1lbE=8Gw4a?oG}2F%s68pOWBcw&u}-9$lBbT3?Khjia1WD5|BVzN=o+Nv_VPT zR$renjrt*|mnLhypzN-$t?jyc7#hyODL`SML}sEsw6_==)?JL(v_UeD{z+-5Xa5{i zcy)yqFJ8PjKRazaVZ-b1>qE(sASq$vINIA6mzFX!G4(ImgNzT1HPjwK=o>vfYqp}M zr}gwhwZbJ0N5&^7b?C$Kdq6Ukxzr8J$8=od0RQX^G{wM7nZv@(z8+*oS*Uz&#~`NS3Hi& z@o)e6=bml;)Fn1r9V!5aVtNo%a$8ykhAhyoQpAE_#3I<=|4uS!S}v}xl)Nu*v#`hsPQXfLD#2TT zEHGB(Qq&VF9z;4fKTj9`0jwb4#ROl|ObToR;9yXT69mOBT4xsG9;zcCy1+mw@D+Un zGp-v!sPl-w32<1p3);=U|Kv&j$e00DoL=k4$uQPrIT~ z1)fx=BaDN(NiWj=f z;2AI$_}{0YF&HT}1+DDyl|^($zb`!^SY-qq-*hGu{PF$$7W;!It~kD-&=nuT`||^J zswnL|7z2Z{5OE%ggVxA)4f(>-QZ!g_A|s6jYjShf;oV_s1Dw0$&4vaB zoa8Y5qQM-wFgweN1qvs)rX7%r&-D2$!KUhK)A+E+MX<}*$Y^h4vKC<1^cDmwq->BC zunA@%q##u0T!a}I0(FV2wXItY;1}vS1CYC$o5gY8K%*cQlAV=BaJjCv^(zc(07fFb zoK#vmaPVa=ws7-7<;HM?1Wkz09x)z;h^MAB&}R@lI-5onv1$)fSguLM)4{5EcUr^T z#;&@y*Xb^I!jtS%L0DsIc492OBQ-2h4U7co;>V^wACzBTsjjJZN+D<}%D=mcA@Cy@ zeth9@Mxu6kx4Ua~O?Ea`k1^Wf>>v39uCu2HkK_-u@xIZ_+U|YfSqKBv!oS}OaH(Bz z1n6(EOWn}k1H}SC0Qc>x&mAG5abyoT-iWPZjqFZOPMqT*0k~RPT0VL57BT?rY}3yE zPf-2 zHX#gbY>=|RlU-0+3ONY)6o?Aw5K$C*7-2e0Xn@Z50A?JXvdPj!w!t}dw6r`xHH%+> zGIICsU67nLpqd;ZO`)BDQL-XNjpxrD&Q4rrw)o?$KpcTB2Qi3h;pFAz!I-0{NX~EX z=LfRcEj&Cn=gk-TmD+hCHq&q3uFxcdZ4y$>{&N2cv|vC%W=^`weUZpivL#nhS9hIm zxK<#s;{kRvIEonCY{uYKD9NiY6wp5`IyN>o_=vvd=7JYnPD+ZSX@zWDNi;&xaK5W_ zix_(O;28jv0F@axtc)%gY2!zHe)?zCyEwPTSaHUcr8&9<99MBS)NfEp6+eE^^9>p< z*s(&Z`6-<^0?Q})cTq_5nT!nfuP(IBeB8@PFjofR6-6~JoCnQYgx?%0Eg2aHIFf@fR@LGFHmO+&crJiJ z;^BH*UVSf1-k_s66 z_H)k1A%eZV(IBC0isiorCcZ^3r2F#;2 zpLcZRpLADDb~%yC`(<)1!7lCcV;bCX+5lR01Dkz5M3%oRff_+S{y5zcQIFg{CB;)D zbI(I++GkunSCZ2ZF8?IeKR<-t2mnSkiBZm4Lw$J#=cTj$KbUn9YkAI>N#q5?iiOD=XJYugugnUR4T5y(2>^-n#4|EX} zOsLi5&0sMMUvPoEC2Zz-K?YB;oUx76I#xTc>Er=FG-N5^{v^J40K%~sd@lkzK9C2X zN&}gYD-sveBCwg~Xl^e(9rxPn#K9G?qwqd%y9yFM@M7zuWqC^U5C9MoLG#`&@skh% zU?GEDM@3FP2DJ?I^+kLB_=TXNSkSA;`0Y)AHw2@OJ--%YYbAQFbImVGUjqS})VT98 zGG@cxfSeK#6lD1=Gu^0o_5p`iX%MW9b9`B0;W9|1)zsblYZHqX+iPi10sQ?j@M6Hp z1MePL_achhlC;?dJiif6;%G+~7R5O~-;Z9ri6_PBXL2W$t4F%6qE>G9;Z1S-$e;cu zw?ogX-?{sh;+Esa-EFZrFwi+dHdeY2=G z2&f;k-Qc)~RTw9^V#zeJCe67ulZ$SYl%aEsF^FTwo8@#`_cLNKH z2D%9Q0{Etg4?OV>MJ&O+HIyDt%R=2R{1kWAI;i zOulY_N>K!k>%hNtjdCFPn{y0rQ!puG9|`nozh4-F6FJ%f9#C=LQUm|%*Fi$U#4 zhIt{VY)~(OBLfiH*_k_$)d)J2AH1Ujkn9C{Y-l-yD`dI7b#;#fRRp^tKYaprMWelh zXG}?vL;c{Bnr$htCRSFt85zc4!UDL`z(38{MjtN80dVDeyA&$or(UVUFiOf=UhUbD zPdv(CZz+(-c4~noBp1saw>2))|8d~b)HiV+z-lX})qfZZS8VyJF z`i=SBzzgCrMV3413xx{w@F3bYO33L*r8pBz?*8|p#yN=pS;xPB;l#^k07bvGy=}IP z)>~yO3P8HGtINsB3FI$+r}fVTLI5#2rAVo$Hm0T$$B9tmGZJhg*cOL)~HEf4@%k-Qs?zB1-lmYhb+i$6hu@&f9UcJDc+6 zO$LYYm&_6=x~X_|zOmM9im}|LIKY`w`Zh~Hbpz{5(jFt79`!$ez6K%w>u{i^r~mpS zknl;0OG#RKIw2QHDwxf@ znbp2Siv|Fk3Xo-HbS0$U_SV*qEe1L2CG_|2f9XS80^d+a4a(Ni(E+pp7bhoNQlKhh z4XA$*=+DHnK{5hgIM*o1Ipr|O2pt(HEq;N43=0RqJ%gtUAbbs2B0v!3#Dt}IVPMc# zTvle?{}sM- z$aQ{Q))mrboAAS84ODgu-@6G&mu)%BY>01pm0?#jhSe3g-k4>RTb z14P77OLdfeV3eD)&#Mw1s zDBuBS=cNx<9D6?Myb`9pL5cP4!r?>w4`}39&z^syzr@Or6EFJxSh@l6&!1E;e?LFu zdnI}Y8=D@7TS|98`lw)j<@d!FK7fPP#ibNf@i*-i8+?unky@>-UTnf<(w(R`W1Ar2 zMM+NnIVR@o%JLUpxOe*cGcaYgRDla|L7nXCOOTj#hayUSWV`;m+e5~eSk9MkqEVqy zFu1EaO<}&k=)SgyiM*2i3MkM^x33-4G%(x#U#R|?L4EuZF&cbq3VwS)S!jT*eO|u~ zFuIF}_Z)7&^GZ(H<%^G0RaG0&ndEamO3;A01N@?=Ajs5$Sq@5^mI~?|h@%z2+3>{z zi=ev0q&j2{7@CFN4-(pQPMc`Zj{N+-T$sANNZ-?*@tz@NY5Bs7%O)XJSxO>jJ|C7AF4k8;Dc_*rwzlU_ZRg3-%%SfDGRT&ABe9 z@6e4w3quG_F7p9K_#O}V4KM=m$zgw%*fhcX05QCafFsOz>7s#gC}qW5{@G`cqu@{y zmi0#MY3i?}vfjB@8k?ruFOQ&IOeA|LW9~xVUEEOP;mw%S@%oiu;-dZv!}rSXA>V0o zw9+qTymHQ=t$bDb-E8VU0UO>wf0k@eQLqSBF9F!4uoIS7--9O;+BG0gN^CY^oWKL@ zhXCGCev>58cW4r6zg}HJ2P{C&`|<@Sq+t4Xh$x=M!ot$j(gITzL#PD!r;*@IF;5qM z_wF5v#sE(*rLg+}`zj=VH3TRd^ z-WbLM!%1B0oNR z#0BE)!7?*7SSulMML?T?VIfq&)wXkwf!3j@_z}PtSth_5zk;j`brqGF&4klIuLbd@ zK)1p2#fu%_*Yx!?G5+-^R9Dz7Jk^dA|LY=i6roj3f@bku2ie@`@1Oh#V=& zN4)W}rl)O1OGihHdii+KRN`cMi+WuU)UIqNO1jBOxQIQ97VPyy90#eABVu-olNM2! zIX_$y*lfg2C>JCP)StVhb&z?Cg{=COX1RZSuYA8_5#=eK$1k~Uh zy4&I2yIt`@07TrenQpw>uC4(Dw#EJ&6s+a}_Xe_6@*KodkkJUkC>DlMC%`}3T^n%% zF$bwwt}-(SPd>EI-B5E_8-_0S5E$zD=ESo=N-8rSDs~!okp#=M2p9nw0$dH8O8}Pu zJMQY*0Oke)6zua03u@;3Dmkh^tt5E{CJKTuDg;@H7!26phG#sDXLix~ERTYYpd^9- z0~t9_w*fpO%KC7z(7)^~_0~A70DqxWQ2<=o&K6VN_brPx1cWCfE8myynmxcHiAVG+@sk*@jJp6F&0o*FyAwD3uje8ugB@>U;y9)^m z*D9*TW~QSGN>(!H&8Uzto(BE~yKa5%N^;vaK%`)_;t5^-ocT784S`sm_J}G8Fb7z8 z4sem619@R&WJ(5LGsXugMihSsmSZHV#ul7?@oIk5%_YSHYm6VP$IG4&`p5eEgojO@HD#N{TROiDa=i=e5* zF2!&~U?GY1@Hh*sBPsOof3y9(x`;iCkn>JKYUh1B!yd)aMC<%--AFY+5z@~Rv%Y}h zAUX`4;-g9#hR{slKOsyDajM|~fVmqe7wO)E_mFuwq&f(0E8GTYP_*x^kGUOg%{83v zJ$7(9TPHAro9(!yo zEH>SGEk+`DpjClnGe+2j+Y11ydvCBwE%quJPAveig>P=UGy?S$Xm?PAOhCt>?}qPo zJv*sN-;f_6*L~Fu>_K3TGF-oI0966%NPK3h-x&arx%09o73R#ME?cR&^bDc5czCt} z^l6F!#2i?#;P3>Ru(lV)5PZNrffG<$6AX}g18k?Sq!$trg3_CTfk~*8@~Rg^Fu);D zU2ur7&O;SFpwxmnAyELxR{$~K?OK3r0t$mP&|Lmi8=8rI-}O^w9s_?55MZ`Xn8UT| z7mI|?Q&CZ SB?|dKll&tk-^+Le|$Ie$~hISf?BDk0*R`rMJi!1~x{1D|)MF13(HQe^>Q0Wi!!QbE>F^?nM0gwki21zWU z4$8A+?<7cE07>Reb_0;^1k(>_SphTy3#`n(@+v6QPJunUQGaat=#lT9t&&nNoK3D) z)fUjtnj8;-LIC$*XLmr?Y#l~IC9p7|?Y&)o3`Q%^u&qF=f@U&Wxd|1y5s0sI@AM1~ z9?+d1w7~zT<}*b#J8KtFjglu6a_Oh21r4Ck4eGfdN+9~CLk<;CKO2P zun3Yb{)CWaIa+CBZjPdG1AJF@Q+yvLP)KCbqvl>B6;|-GOV$Yo!IT&%xflFBk9$5bxeVfc$V_p#o~}j}z0Gxp2Xt ziFqg?f!5mAR_(UmwP8~JvbVXfk3{@$Or)U8vkZZR-@vQ;;?L@U_w0Ldh_NdKSTx>r z_S)FQTncr~50Bml?9f3_;3I*ckd#EP?I4z7y;l++f7R6F5^#>{fC3|UUhM+hMPP2Z|_V%p55b;1?PissNo~_eW5JU59xeltGGF z%5LqzkN@PrM+Jl===LCO03!ruW05?cVltZvXas))!~@(?Sevb&k!)?fb?es8Trkr* zfxObFU!=~;&%@&e(Fe3e%BmQpKXx}JCOSLUMywh-J327+jEqnkHOL@PGxzn~9tSlV zSTiISSd>ChXrj_>k*4Uf)RPA1lyXGy6XFmwV6)aM$xlrXU=&d1`~+(8Kd&89>Llv8 zL!{Ou8K=@fI42=-zpiZRd6d*S7a^g-11?|*fj1!}Xb2Rqet*H4Gk1Ymj%h*;JXLeK zon2kv0SB!T1O+I*vgWI(1T0Sg${emTD$4=AJ_Pszb1%z*d{Bby2WnF6e(>>Wf|LW1 zCy4ZRsMI4EqC&=3+=oGwE1=cgV<1Y1iizPj?USkr8is2QQqW;PcaT~EJ(O7oWzZOC z+49#bHYTRrd}zzc^ElSWN22IjCsbUJ2hO2XOAzzS*XSuWgqzyl#xDx^HpsQQq2Gol z8YNDFq0-(flVnX&GO|73*(8X%Ykrphr1eZ*{sT-(A|igluYpqsY*c$-mn=gy2MZm| z3wk_ujpA9j3)&#={``3g?jnS5z%KESNTm32ERo7D2vtDjPObrTPJ%e8tbyJJ;0{<7 zYUlPq_yw_67fk5<{h{cKOfRa5FdwZfGMV+v0&bX5SAw=17OMIA=cR&+x-+Us%1IgR z$S)933%LFbs00%5tf4_u0(__R6SkZQM4pIb2@Q;Ufu&mk|^c6ezMT@U61GQbL;vbg=ZYI>1NF zz0&X6?X7BrKQ4pf3{C)Qo9aFbumwGUk_HHoOH&66u()6_jRB3WPMK+VI*#To5>dB3 zb1=gKZZx8!vonaPM$Hd2e~^s=*FmLg2;?cCP(gT2cQiWy$1E^^$@WA=TRw|n5+qR1 z>mgVJvQWOqo&xT0QjE;TZ;~rS|0f!`Be55;Gp6{A{#;X3DAN5V1KkOisHiB!X3Ro3 z<6+up8X_XaADWOU0E|GZ1VQ9(*Y50hVqgVr`P~w_b0>UQH^&ZUr@)j3fDxX9G)sCC zT&VHuAxvniX;wp^0tv|Z&i=j_$hyCm!TS3F)0Fn%eOTHtI^rQHk$|D$OM&aArbE=d(F7E`AvH(aMSe=NrvpTd5>!)7~COk3eLz)k=l=HzOZ`KHvi9y+RwnPiXGn^a( zMlEF`?iylrTK`?&0z8;Vu|k5fJjs)6tetl60Qu4=WuQ%F8@aO0NgenXl9 zUylh?(f12G=6gL*3kRgfw_YBVgJ_GKj7)VI^;~n@y5%1b(Cv_x#wj463#A@Jc^S6x zU%$$su7|b8X0|^#s19U{2 z>HIo$5GuLHlt{q8z|bvE`*V1>%AJ6=y9yz~T>;ED*xA=X)mBnb!BN!)RBe8KRV(Tj zpaKJDP&=S#&d$C-z#0XGL&zRBJbW;#*xlXDJF9{`4}>&mlek>AW>Ezub#$&`7V!Rd zA%)nl4f9>Q_W39S#H7*l&*&f~1LBqUE$H!|68z!A)D8S>&kKKFP*WtY$WMf7zE+mV ztBN1MA?q{W=kbDNUvAP3@}QJl0^iGY>|{Zp7+4YAV1c3TDh%L2#|wAn#z0JTG}KaO z=Vu$|J0))%ds3w#V6o&l)WGN+LiTrn^mPWS?2G|2=xptWKn5($U<$5eg_K7SvS}eA z07TCl+X)FB1&;uxaT(;u@O(nA3#$?yx>F#%pzO&UGi^acYrj59=_OA8;rk0%DP*`l zP_99Mukt*vpZ_{MG^C-fegX`CK3fe&B56P?0Cw}ak79Ge6N4hN$BWAE#4Hb&9|Ja1yRBc?AiL4QO__O7(~f%&w(QVleud45__1=0l1_I4=nUOu(&=YBwP|_2!&qv z7*$U_d7=#yk&w7W5Qw9^J%Abu>|_$$CP-~i$nxcY6rsX=$QG6efXKH=0s((hIk$(k z4L5b*l}}1J#9UHp6Yb!~Y1zY}c6N4EFH|GTY=Wem`p?1mA`FSka$K}M;q31KZ> zKt$o3WDBfm{EJZ@B~9I6VCdmd%<5}uN^7gJ`c(?_yT7>rBg4b_>P6Ea4S^_}TM`3o zl_7}hF>4;|JJ-|pXbNl-qZ_`RT2&&pbIns8V9^=Y~~G%il-44WZz5mm#r zwGD%M2+}edD1u?+1b`e|7hdQCi;YLiEuMjN79v5G8Ollr@GOu>Y55Jj^*8MYc_}C= z!~ll4EP9{^l3uvn=+H6vfvkQVm}0O7QLPb(9NO>PS70f83LqT7PPPz&?|!Py`o zf5n)AsR!rp%Uu8{NU|?joh$)~2>4;S?O~#z@q#vrZ?H0b4QO)^Q0PQoIYS2rzKBiw zn{XXKm-pcQeO|VvBiS@XXyso45Cmhr@hZCvD8|5P1D@(4K>L{O#V`&aCW(lM_~BD3 zE)PS$f&z^1@)GtZ!?ZRgHWpkWHCV}jRHDMu{R;S406U;mYBz|!!|$=zHgExE4`5Lg zK+Xf$1w_LLa0~$CKXY^64ycW5Yjq~w44$6nfI~GmH9_Z;3Ed~EtpvORPFr1RtP25& zwC#NUBao-qPKl-eC$ujNBD4t}>lNmk=|Ozi_@DUlz+v<>S0N`)8;18l705o8=VJg9 zJfLMkF|!As5VS_H>`(+M)tns1QV4#~t9tf3FfsH{R%zA4NkB0X2lhCNW1@_VjNkzZ zB^#_5cH>J$XW(#30JQ715@8q#KzoZ?G%)U{sd0nS8Xh=Q-wNOhDvFVi^z{Ysl6(H1 z-0~1dsmv@E;Mw@|>Um%exUCh2jQ3M7tR%pQGQ6cp$4{^8vHzsy#47fBs7FwkVQt}w z#96!)g|>t&K;A@R{D-+``=R=e>?e7us3(;tGu%Hj1}G4Z*`X5xl=(xxOs^1NWgxpN z8#Awir3FR%(p{E6bul4fsHi@MJ)1RR98=$W+Y9X{@o!97@5{{PH5_^u<1iA?pLnl} zM}AeY@OJm&yK+$&vFddXd2N#5|9l4jMF`YanOmsa@(5|4S91q>2_eq=o7Otl+C^p( zmorJfg@#}dansAGl&qYsw&+qJRy|HS8T*}!m1x9XREI&G{{|6RW$OQFuKxKAne&Bc z)l|cXXJzZ@q~*pECTy$N7rgy6d6?SwY2-21yv~q^Uwt-x1YF@_c_K2t-cUnx(i0~r zGyWIk=2Ecu3*-HB3;udfjsd97RNjQ(Gv@F6D|>QCL=vT*dSxn{FFw{&G)p8U{Y5Fn zC5&@sEf%Z}uSFuaXr4c)Iaes{O*lxlu(x=UYL0_87NLT~MNIU6(BpqrYXKo@7JL9+2`iH)^uu~_E7G8Up zrTs`-qGJ2E#x1;0IVJr_==}4{r4GQY#dF1Y?`^3mBg$-1@+xmByvZD^934@Q{>s}4 zZ=^&Kp`=Io-%NqeAN8|_#6e9k%6^a(kg0R!0wy%(11j>T8Hx`RMPje3Wg|lSy6;6O zMOy4KJR?`jh_dif%)$3x)fzQiGJY^iS>|RNAj?#e)fObRJUP!0fEUQRd|Bm2 z#R@4;PgaR7q!g+F~ z1-rzir#vU_EtHa06Z3VN2~lW8Ass_1+1a6Z?7)GJj~CE?V1Jj9fA1-+lJU_&du@xG zU%%#?!}{BG`)G(&VW(9a0%{&ZiTt^c?VhZAp_TOE#Lb@ZwdH(04 z|Fv>p_3V(x772ZI3T893FHoN^>N}S*f#LKb;W-wqBOlLOHO6iANxCakuqr%WRdn4t z?rGU)T^F&Rq{SKV!y`$ zJI>)|!{+L-7WmI^?>BTZ_}bN9EXoT;a3XzeoUl{>x(PGtB5T zwdnT;ssaKN*?EzkOSd$p`iw%u7MjN--}#;0Np@}Nd92+VT$t2KV|-a{O|tFBn0ACU zzm(1vdIXWiZ0Sy`u7v3$G;(dN(3~!tPR1o%ZS#!i_GalHKVpTgjHP6g?`J79wtYjd zIN=*sU!JbA+~54#TcdJRmC`09*Cn6D{V8DU1KT$LTWKv3t%shfRefo{O&B86THc?# zsI4x~H5~gJS=&3CczHevIcJ|yp?8cfve_r>eBknG<`#A9qr=q;w_cfJ@Vkak)#w=@ zH-Cd=>CfvzWR*kVxr5CXOYab!*OI@!$1#sn*oh;qV!a?pORE!ZEZv#vw)8n+s^Pdb zv7ct5&c@p>*D=H9&sqpQn}PXq09xB2apQZmeImTbNOfC+MJ4^I=abe|8DNsHvyOe) z>lac28TDtJ=O17n|4*XI{@(n}ct&|ui_0TIc&ynWz3cu&^u_+fMOFvx^6qwa^KZGN z#>C3y?<|$b1tW(uVu)ANzH`-PH!cc>*BqPpO~!9U-o$Eew%nr38=Mwl5@c@~cb}>@ z6qOFkSe1G8k{aTEb=uP{f)c;CHz2EZ$!$~e=#D1dSoOh zOc{~nO0_ETRVcSK=efYIfdOK_U$U{}d7r9fwF|9N-kY|q(qkc>-87ai-9I1*#SL%# zVz5*Vzr5+`P_%!bf&?wDlI50l$;HwA1)ort%J0+nRU5+x;m&Wh3U=5#Uh!$nM1+Qw zbPS5NDAe+BVR&_DU{~p4(dmSi_~j_qyUV;-x*jcYiHHBFlBFUqj)o+s*eS8fe2S^a z^rS8OvbE~mthhNVdfRO|{h3Lx!k5=irayhS`$T)b#{AxB)X;EmaZYlFns?)mACa}$R}g~%^!-~PD+uD_M!3ZvUGHG998po(gMeXoZ5UJ`aCvYD6$S4xc`3frFR+E$Ljk9HqWBp zFb=zQ_)6VKy|k`WSIeLOEwRh^Pr=~�K5|0WXO)Cd2;)en&+S1yFz;cmw785wN}g!!njHqYx{xfegO zaC3jM3BC9HB45N4%3}R>{|-Qg}? zly6paEO>e51E1ObX>Ks2=%{DAM#*l;u8(XU>(wT$)tArx-N?8EQ?D_oJb@`ltkJDZ5)ugYBB&vB#u+#o36 zyhkhVzIX2iRc3o&ZeT!g*xkxO46oK9iF8@{Hmx`bgxUQ^-0fyjqi5)%N9bK9W2R$7Po2 z*o!yry{gS}_h+^2Tm!DMbRa&dG9~U@Vpqv+}+0P>T5qo}R%kP4`L^9_c6F$D+T^bs+zuI^M3|kG)}sI~XYbcez@s zepAnrdW4uIlxE{g$WA`qh=07c!1H$=0NTtuLA2^G(U&{0EYWxzv@gPx<+SPFDa%qC zsJftOUV%~!UW`};QRQ{kdWFn&tDBV!(eWXTLn{mN1<5y(cl4wWA2{oN3@dcf+g)p>1WF_`PMpPSSjig zX_=>rH)W8tT*VEfp5q#RzJPt_;nNhYrf!cfH})eUNHu6(_Zz|Cwpp%Shz{08QsbGNYpFAt|)EDZh8B5&JnZx?eHc2qEhPzY24Zj zp|$$tKJeaO`b{QYxX#E`a7ZQu+zoHu#I%4eB5$Gx9KcVy>%VU8vWyR`1`q1GTxIhftxI(DlqD=WXEjigDQ|?Wj zWNvV-^M(~xK(2y2B6R!yv4cF_h6*C<7 zROHEy7PDaKX>DSlA&B0fr-)>@3Lbu+(Rx%v-yxK{!{5iv!1DF$w5Xj;*%0;2vR4$} z^DiH}A_zVN5Upt&+8C=TSE>n$a9z`84wI%I7?#jCGscmYxU5j0T&|rJ+x5ak@5ralthHjr}WWogXAyvQqi^84UIBSBI2@J-OX~;=pr_{oV~b zo7>$c*^m8|{8!_Hbg@CwruFDS4mbO~mn(}W1EJ<|SItRmZ20=wvhl~ARA)thsLYH0 zkf5ZYP|`HE918t>;nLF&1lA6Yc3ki9BXXu+Cr?eUuP!qM2~npz_g9^_(Oo6VvZq*B zl3(VN@_!;fbVn!%ZzJf%4{quE<~N#6%p@*q)XnGGXLrfoBt&F>YQqo6dqPmA(Qwx| zuNGJ5-irmj%P#UsT3P5HLO*A37SJoR#D~ZlrN~N26|~FTM-}dGu_~pbq;$$%!EsSFhDZ|e>Di$fv&zE)dGPy43xP*v2%5HVP zC(X{|E^JtIycaIoUme#f{}!|9do!E009<&C?8+pmO19tj!FZRloX>fZ0!NFlQKax6 za!|}Mln@3r>1Wj`pDYNj1Q*8Nqh!}jY>Y=$$PlPG@EoR z0bw^kMZII4sJh1a&2S8o_`3r)7ZSdW2pLwQp{g#e{coYDgQ5R&pHCWGTC0;o%bYIn zNT^U+;}puHRFKr>^);1^BbYwhO{~O zZH3`s8HY_n!J3nF)*vO|kvz#5Zf<(WqKr_6da(A*)KtkrV}O@KLQiXdiRxPwcTC4e z!4tlE-M0#C9KD#EyGGR>v+ppf2SE-HRQr8CeIoe_DM5UQi5X6RLanWK z@8ND-tARnL@#rV6XVcl9tbYnGQ+mkBkK+&yFVC@*^(Ib{V&0zUrrH^R{ptVn=g+tP z`<`v}7TSJMCG>6%jw(g(I!BW6sEn1PX#ap6w7)=#?v)RJPW-=j(@WHVxn9Ko`BEIFCilPSk;3VJ$Nh7(hv6+3f0iN9oQeIheqdmW%OQz5 zu|zWm*XzA*SYL9+<;AMT(8g6O!`egpCC>O8QHX(!=I15wA3-dW_Mf2t?}dQ)-)4Ye z&qsmQTed!r5JDijk#5MMuXAkPEJ&}wt9HD47;%X4Zq32+-_3WCWCyfSnw7JRli0Ed z$N#Xi(E9(I_x0+ zx5G8kt!WA0d>Q)emPVqW;2jcJz8bqx;WDe6vHic|GDm@yZfIz!8aKK6!^f`^ zI)sEvK8{S-EgA2gWAJvbcwHqWxF|N9QCz5&zR;Oc zQM_5EdmX6M@XCqlC-xPRa&=N^teA+wimvupEDRU%?$`xVB>KVT9FT`CbajfUD-rkx z|2=o7Pyb!I|9%mEv(EvIp!@$vJSjC2AM}tRAwKs%i_>4Za8OZV8VrUhIsGsR0`?kA zv#Qzs{=5DD{>xk_nzWMFl_b93*M4FMUN|8~a8Ay3&Lbkd5|D4d`Z6PIC#im7Q|Jil z*`mmp7Wkal7cf@6L%pc=&}p#NO|R|=Oo{?H{2#!%LA@9GN_7$Oe;X9m|2B`{$?$qCCunH*SG*@clX zF#&p1zL}>s&_sZ`whV1Yw!@k%{yVv=*GEo*^6|1hkaKX`YojZOFh!`pNL8CV{~~oc z;0D>DiwmG_JE5?Bt(vRIS0|(7qx-By9a5h8E6kbRGh(<8`x_HKQsJM;_acE1|Nqq0 zD%9zlnzpSkRKwO3jd%X1iUN`cACQu)=P|?(6<*QR`ELWFGSBo;Tt%RPz@^XB;o{{b z0jxTIy6iep0NgJWK>k0kelgU~YoZw=oc=G|9Ay`v8zs8$Mc382OYoyu65tAIug%Rz z0|;z2`xIXji`>7<4ZcvJhZP4UF#HW+n*i}&*bX-~Hx~vvsm{V&z1Cw}|Z7)~9 zFco}z|IGKHfb-m+P6M@IGI0g}ur8RHIXQn03{>=~z!V-BtO?wPf%#^P*|-4ruMp`d z)o%UoDJWz8DPWQVi%UyaLGTEg`|m2mOtM#knt;(Zz(=HlHDc}Hqn7Z$D>K{6)2Q@E zJmx5#l`i_xVl^$)??;fN{;A)S+W4)2k8our>t3=DD*9Fzpoq{v^`s^&`B#hNz3J>HCHcc~- z+QY2`KDta=4=@?EYMqM)DTVA8Ie{GkW^g%w=|f@L;6q>q-M}Lbc$nyUb_le8pvWjH zDFNpM_UcSp$&n}sLTQqtIG+Pw6v&*ULtsZ|0TV)q8xTNX<_^38L9o{iQSQlBVd3E^ zff1>y^)c9Rg3}smO9x==qZq!hxm?;);3Ja(zbR$lqkpC>R&)GBe8r2(_{=y9oyN6^ zO3_Ij?|d^D{CP*)WQcZF?-!bkcMM`K$MwZYUd$>YCuz7jyQv-HFLnMR2#hrH}0y$-~TMcb0ZGHO>VLfAQtL zxgdMSZ+HBgF|o-{=SbvHd(pE6k9ET89c!$v3}5L2X}sg{-RPO^9vdnG?YiO3lVOL3 zTU+OvQ46bO9(&{qH;+e^SB{cV5U2vdb0`*W$?(3^&O+)4;aV*|cO&}~^g=~VoZ)Gq z!Uc)|0;S>#2#AJBDbS;hs$fzNcM%A*18~hc`Ke%B43>2bK$3&KvNiKOTKkqI{%`>{ z>h@s;@WX8b3W^y&Z1A%LN(L~V1Ivk6J~Mvsm0#<1SRL%1m^cR>4)8^Q&AGj^1I`=U z!0NKK3-Uzg23j$&A%M#^2P*;|FjlgaBgO8O`@N?$BqS*2Yv6_hu%*Dw?1Il%P!NF_ z;S3yCBsc<>DfrE!?tjkI!zcw4S~w%!=ikV^fo; z`)vfpQWx+1$8?5|H0yXEYQnHq-2?2R1yk#88Ky})by z)x0N?i_>YlPDe^-{rkI8{gcS=neEi|o^spUH8we#Z&Y_R`*oL6@s7@&=lqYJ9yxxT z+Sgru*WO#7w^D7&r0{tAqQ`lB#D;+D(57UWYY#bwm`&I4QrTI?G5YM;UZ3g6dC1-( zS*hgC)bNJb(!q2ElI4D6!20d|^~SiGu#ZYjb{FipNh7Dv4_3yWu$hcbW7#TNHl1${ z+!QyBs+IcgyPJa}VMa{BaC1)eWGzxun%g-6L*=M0R&Cv^E=ANT{L&lEx`^sftKspz zF@-##5k77{(po9EQ2X64-DA?@eMepZIEl!dSF0@1xv=hH|iB#xK%!fUd zb1dVjpG8rTy_gfOiX|en-glZ~*;k@2x2O2|#Cq=SiH7&yT6M0r#b9dtfR+y9tcWDxzu{;3WY^IUe>TfkS~J9JNI;h`BIKK)eT!%?oY_v|aOEiMpi)Qj8$!Re}?TXOdUO z7sOvc-2#6q@Q7x8Rt}pT?SafH4ydbTWe#B24s1g3XbW;Azkms!Wfb_Pu)_qe45a$p zs^_MrM;o5!HE?A>;t0|JF^}_8r_L{sDhz;(43@4aA`uW>!1LD>Bwc!XlaQe!ICMpU zj0gE0)U4nE0}LK=lHdCY50%3d*Ze$I^}`(X_fnrI0JrpP((gQ$v0Y`3*TZbe^cq>d z{$*Z;x}C4t1$L1y4!v()>J_2FdlK@@!A#`q%+fBM?rA{w!=F_SZFi#3`BA~ zWTM~96_BOAC`y@NW)#RLws`F4>Oxj0I{Td@n2cBJ(fB&z6r^p&Ejd^Ay^7?+eB`3 znVnG+<;8&D4-DHSuIzQptg9#pkVV- z+jqa~bnaxx>3qFgL+{sM>9=R`WBFpkrzG2DA$(hrwbox zN0z=^ag2~_i`$|XxAdL~s=~Vjd7Ylx8Eq8DBe!Fn4h_cKA8=C9ijdBEWvAYD;>;g#gmIptvjJf7 zPyLkX0WEm86>vU5EC6==KnMoQ0MV1*4-mkbrUPXGP@=cDw|Tg^1F4dLVGg9KVm$P>P{k$NOj^ zGA+SXzyXM`kJGl`@SqF2nlT5QL-U^0?=0{2;CTnD2}sE(S&uw(IPgX3KzI+dV$fdW ziQghQIy!=r1AD7kV5tEmvffD2Tf7kV@_^t*;vj}0=(q~Z*@b#=s|L0#RJydYd+ZU3Kmvyken1Gwe!w{fua;*e^4m{*uRZW>1TTMA z*KFl%xHjPA98AU620NC31OepgRCjD=f#-*EOD#N(_BWOBrdJEsUiQpB(L#8xYVGuP zb~L+_=j&{Fm}z^$zB2I}=T2ami@eb3>Eoqk&b0DX;bC%Lw zP-I%LOn zyUo@sE?KE#T$vT?#NBK-sp&g&cPc5S8#pl4n3_{u+JPIotw z%v5Tuwg(UM$??j(Vql!N(-0JJ-`PzQVo>IeR%;RPX2h0<^<5yDB--WmNe4Mc_TAOmd+nZEt%WUb`J1I-vx>ubg9p#&=F0|@ zhAu8#Pu}=6u~6U}T4vE(`!;7F(P)l*cFxeKF5xN?{RNY*h{NHfh7I+*55l?mGV-)N zk56#k+u|X_E%Z_o#kR>VJhQj8F&&etsm`4>Gfvq^U##Z65E2|SG+bSOn*aHC+BrvY zsp-)#qsf*O(q7N4qj3-~TNC z`Iccv2h<*5p;!m}Y~ZY}!WQ>Hdmo4lq^gGqg81+_4P0z*Re_;inMtLsG!Rn-LErfK z^K%`YaWU9%2$U5V&L}~Klx?ZNqKypuXf3w6uHSX~?GJLapH4+F7Q^}Z4D?a_wt zQJM8MepJ_&IgZxvGwEwoCwjK}nySf{K^v14D^)7e@zc!F*!cR{nfMjLE3a{54uwTU zTUOUQj@N&5{8)drAAbvo`ZO)T!bO_|!a71e%_IGmDJW(^TMChQH)>&HgAT}?pFUwC zAmGM|y1N4B6~eq4Y_<>zdxufum$jOJPc!qoYH%$8Iyf^C%d|L#xV`-j)Q@H~umjA> z1`MMA}vdRdZBzW9K8o=sN9r zOde%xJR_4<=K^c`?{H08zRZXn8np||x7Q1+iep@0GgqpR({Yi-O5)k1E;^~Piulxd zxT#2q$gi$)KS{Pes*)Lp_dRNs5{gnRh9<_oqu1Gz) zOWmn2N*ow}m~Ilgx(nBI4^0kTLCQnz$75%BhqL0tJagJ4U=+UDpUV(YzR!6i!D@D>*6|Y#uf^ zGD?>WRX%L&Vqx}lVUMa@JGpB1NT~Spa7npD#YJkTLbcAp<4HdL(u(}F^Ok!9IqcRB zRlJ*%`)>Dp%7|*%l_K(vHiJ$)ayZ&zNAj7jRh2Ad7&w~U%+uk)=lo!dnL9zLcnM*@ zxZH$277@lf8^xT&wHs>5^;}nnAbgq+S;fR2yyJF(!he;U{33RW<#@Y?9=Pq}mosunk&N~sS z=N6hL#~&l`srBxai8&yS(Gc`xdb~)n@Fhot6WR%N#DXBg%!}S@Kj+3>kd$3SV(W1r z1Fy{)1%e^;yEL%rw`x08DS-f-?Ux!W~cu*bg0_n!n?3JVP^& z48dHkiW!)HGZFU*&o|E9hk-XPpd$dRON4F2M6|PV=i=SLuM>JKs6^6bqfzn$@EQbO zd}UP^+4NOoQ1(GU6rjKdyrL5}Q>YJkP7$A9A2|k5?-+nWYZ@nO>%r2OVPEu6f;&J4 zfz_@|`rzs}P6(Pn6{l(e`vO2*K)PFcCf*MI9Uu%^mdjE$)!$;h6%JYletxD4$;{$z zq)kn7ks@igvYAbGAPC{$Qy`5>!2ia@<%vQQY)%ESR0J9?*ty^ZxSoKET&KIul?#oY zBthee?526fwqaXiHJZn46Q#d5B^w^D#3pRyU|?AMN;LanHBn{#a5ix3FtNO@-tKuN zZXwmNiGhRYNshi9q!Q?{kXEy{PFQO9w6Jyp_J!K~`_sBk?pH)tNOV;3+9kw~gG z`pxl@vgDAnAt_3OGl8G1%w8b?S$CC#C8@Bo_?f}%T#M(cS!92z=<$8hft$uRZ=DE> z`1{%HK5Xj!UaA?G#-`RR@klb#-}0=-@Y4fx2W{Imz2w`(V|MSXj$cPxZ@yWN&-ZIl zNjymB#r3&|7p1CcKl50@T52mfQS|62sJ1bDGmGM9`u&Sxq!jGY{4FyDyD!79zufbB zSvS(u{ODl#nWkcrNoI3=`P%Q9t%TF~;Law@bOokIzI1X$6vB;lVQ;)Llo-dW_nOMz zzRl2G`hA*F+u)oTl!5h`qwPH#$v{}1JQB@oav|R%WF3EH9<# zE`LtCca37sk?xgnM^^RG;nLjn-OJ}D(_%8`9iAdnJu%}wW%FltYWXx%2FCX}v%kjT zM7R@u0Gqak}$@a*A`2;0Nj5nQM_ z$wfy{OqOZb&ETl>EO`p-Ah+Z8Tc*$0e?`nQ+-GTj5-i$aw=waV?>_m*)u+S_I%4VZ zbv{KO@{~wYCC3Ge4JYz~19Qv$Jr1lL#3fSL?T%&-#>YVcgIVjl!7z?rkZPRX@yNr; zg7cymrM~B}o~!un&&5}(CQGoczgm48EX=|`x%i9Dl5`s3<)D7hWUS;?%D>=U%j?nA zoaRQ$#aA%-DUUE@H#v8qxj`+&%%bD@N+6vi?a7?q1d$2_hH%%ACoApFj%|+Z410W4 z_GUC^Qcw^BZ;9#Nb#S zYGXIi#HExWRD(8})sBzt9Fg*?AfA7)Qx@~?OGhMmkO!XHfI_?}P0sw~v!fksQ;V*| zX$nLV*RhS;`a4X-&xy0@USI02bt-~_#Io01TeEe&zLNQ(XYI(@@NebKqHcM{{-c$z zxFh@I5y%febLl7;Op~o?x*bfMp63-kAn+=q)?4{r?3DU#Z#;9OsP*~eZ1gDt!4Xtv zEn58v`*B}+iPZrAFMakDjxJAoF$UIWSonn!>+WuDLb02bdOus~v=0}amZN#06)pI? ztRoPpL%zxzBy6yPFzGF07rSI$5}!IgJ@)s(p`s_maeaqwH$9hQ9ll*HHdumJJC;$I_-bO9kdiDB{a+@eAH6e~XQ{QhkYP zK$UfpC|ENlmV}V(e24bwojvOQ-UnZATzIS$W5>aK@=p0Oe&G`#{Iw~|=SGb!-aU*s z@d2_otA4GIxw;9)P?y|$7NPd&iu|EqJEE&2$I6LAlIJ>E+D9X?TGDXk1N)Pg%l%n7 z?}NfG()EUuOW&sL;}9}DekC|`cXPJM%~Y>qi*mTL>He*0J5D|2u4n>r|GUA7!?a*J zTyG;R_+8$Q;8Ui>dmZigj;2@qUrcPL+2fwb>pAe%xjug3Wjx=X6XaKt=8u>9D8YU% zanKys%V40)Y~kG=6HR%3?X4M0d)egNap%7Cd=u#rTJ7N~&7ea;`P6FV7!pS|QAiQn zABM(z!i)`q63kk~U$$@eSm`!|x7Z7RTGKs09@7zA5q5Z6R z7t^hB)%C=bod6+DcuBj3AheO@!Vif4rQf-yW7!Wi;&X&(63$Wg)pSsFrM@ z^#lL;+^_bEJfDZH3zMZf+8+~6)Nk9sW);__Ib9v3O-J(zS6!C{jF!2~x-j5n~+}pM(CnEA% zB8{%VxL%TA_lB(IcyHN+g=|w0GjE_Zs3%PSI#1i}$Fm~#$|KW3E&4Jn3F7Mf?&-%% z1#y9{hcc`okKAX559=``clPLr9JiC@88_(!Sp|er#Spb?1+c>hAwWS`zH z_++e-xU_;mqB|?HHNQChgvN%>>3! zupu@SJLiB?iyh8}QBf`Wiuc;XI2`7$P}n4=q%@qL9iz4sdUYUwi$tf&77GEJ2e7fS zn$n~xGlQ>=_oLBjhn%DI<*A_`}!(3|BcY1t8xwA}le#$wb5 zXx=r)_Xb0jg~EsuyT7?whk3G^L|yP+WEY%?5?Ixi~iuO_Dv*)K%y*o&Qi#Lrp9 z&@@@Ie0*mtCu8zd;8d_*+x6Ab=BRw*NrM9LboCxu$?`&)|F)&w(?3Xc7y{W6?*URO1`l<5?#|;qk3h4)T z>iUDyp51vv9~$S-oQ#kB5?1E?aqDK-7M5`^UGofQ_*QciCpHE`fx08xD^O`l{#m5F z{7D36!PiCxo_v0u66R&@5x3+NzuP}Ztu=MsNl0|V^M7=vjnN!9#}z-XBYqT$V70`s zPkj-)-n&Z7ugf#5KZQp3G^+bLBYW|Wm-4b|J0oBGcp8?3+n5~fL}QJaNp3&pSeGk5 za7Qw{|3n}yh@haUaUU-+>6%s^*t1}}5u)B4$5ynt{o$0$VB#~8NPF8%`vz04Ovty582iKe#L3cAZnGuL>hd0WeWXloCZsWGdLSZY5!?qbnxXC8fGwIm`o z#vM3ZZ#wGz$+kWq7Ee_QAs*eee7aIIe(v(C?*qR3$`?F=vyBxO4Lgsz2WW^z+BK^B zMfJI_N|k2u&Clah*F~EZWQ^AEIleUVd-MWr(k!Q`_Rh%l>>v}V2#Jw*5tJjoo%K56 z)K%qI%glF=zNt>&pR%V4Gaa$B?rvZE;j2i25XYg5UG*0n8p-zXxG{i-@c(G1`%Z)K zi3q{P0{*MDj>pWANe=A|xNaSLG*$ZTfdwz3@y?|T581SZMnp}{a_?~6i1G=0VDQ20bh}2mrF84uxwk;~M?H_B8>?QN>e{Q|4c3JJgR!#=tLj_Z{ah@% zmF^Dd?vhkWy1PLdq+=lpNC-$bNOyNjDIrKVNOv~~XZnBl-tRuwbv_(E5DR6^G3FTK z8S{DW@w+eIN~5rPWHwnYe94b~GHAXktWu21A6PVQ3~avpJv*BF0=>|Urg0QyL_yue z?ELr2Q-_cLUON%NP}Z%28*AAe8|h(;&+d%xWipl9<3?lOa`ZI1v5T-Vv+Fzl#Mn!% zLZ6y%Dd`h#NQcVhJD)R!&!dX6F0&{1U#KL`Xhg@R@)L~VLLHVr9$yPI#2=p$uFm=0 z>=OqvttI6LG892z-Mz}{jV#9_pC2q=%AiK+dJQktl-1r1zR5~g=H;pX-rwcnQC<;H zfI$mz2u0=a@bc4U2*1bZGEpW|n&U^^`vq@he1ET}Hl_4=t@(<0dV!YrW^d#`ZSiaQ z%4+5#@ooMr^=s{?qw>6&r;abB9L^)dO{w8h9H4ckxL;x3GtVD;CK7|{6^33a74)2o zWZsDmPSOEM1Y}@5}ynD)&V4&a&TTyz&5RsC2R1HbRsm zxocQnh4}Wm937#DkK1V>*R!+wTMP|VN?*$D-sfrMgHWv30OD+emR0KeWO}?SAyWyH ztplC%V;Wmx7^19k!O&-)^hjMS2)HxxJqdQR`@81#t~SLp+o zrTHH&fVQglXI_Nr8K=UeU5<|6DZ=kkJP8jiC&3IzEbGv(Pik?AlGTd%7ePi`sU%MWgDli;AWQ@Eue zb3i=xOlOvtQ>!bGn${7KbZRqRE zd{>hOG>+j*c{jL3P3`9#ze_wp2Dz;%iuW(HuMD{9K5R`tq$>n9)=qWdEqLzyGM~lH zMce*bTm2CE{Bv}|>T6oq@Jxvfrkn+wNLDeEW{d9aHzUiuNJo+krXg zw&|dFnEa)#zq!i~LnRyPZ64t0tQd?x*(azZc&j_@aF4n7tG#GdcCa%7 z6Ba~$J6=6Ip5&fs-dNB=>cLLHjarZvf&(j9WdTXy1Cjexr(; zW~cyEfl43{+pst|h>DW^tTK%WMr-7M_^!D)mvi5u=&RxSR$6IeP0CZ;^13rn|dR_}6Lry(4E28(~!&XiBFW1yL zBjiI~U;v1Bir6w2m=aNhS-eEy`&#ePWkd z5?2!eZ+M)o%sLAh&~j0+0$SXp>}0GCu4TQt6WWt52@wSY|ImmFH^fOv`*|!eXl2FU z1Lw}~sHw6>2+yN<)hu5ge10n$khNBSJd|f2DSjbkta|c0_~Q*K-O6~=-aFbM&)vpd z1!nFS(YNldW19q)ALF&CrtFAFGim?0@H#18e+vsG7DS98Z)9k@&dMzJDy*F{XpEK( zr}6mHaEplu#AF80mbmxr2w~u(3h-d`S(cEf+CqIB+P#>6jTT}EjMd7?>_>HSY)#~;e#7vVo(~HOP+H1Z5|d$$ z_o|1|h|hM(w`HXb^<+YTE?s*}9uW`z(Oi9!+a>Pm zU3q}?499$JA=1qad+CKmwN9i_u8KO4T#8Oc835(*n06@|BOKfrlp`AlFTncD> z-ajl7s*((ttpbc&=K?&%^ztR$`0sTZW%K8loH@ zn#+l_T(@6y6V6C*;Qus(au5~`vO6>f%KfP3*gm@zz$8?lCKjA7F$2DFmfWK;VMZY2 zsTJfvpb2FMa}$X9EU~ukgfMz;uI0XwnQqh?HvpJO&?uTEvfOIFSi8d1lJrvhc6jy>ygkvmn6zt$S6c-(C3yfLF^;tAy*&Rw- z6xN6e3GW|0;iPEDLtzyHqRrX4g(y65!CJ*s7=(m&$%er`_5hImB1%5=m&99i_a&vu zvbLYkZ4=Gq*w@#Dk=ASK0U)mq7X}G>D~EOPYi=j9Uc>NrC<@?P6F{D zEa+t{k#`Db=47LK8j|koy)t*(<16K9m3TDZLqb%wNTB zop?8X0h;hO03$1F29NC*$y58i0!hHZ;^JcS9+a|znwdW!!W=|T;{zaO^Kf^!+2rNN z6U4tp%7lbXpr{m-v4ZrXSXy0C6IKA^Xj_6M5D+1ShK@cwG{nuv=K)q>YOUY&O@rnN z#>OjPX%=+i13BLOAhZ@VucpL~Bp1~yoS>u2S^XHs{}MD80DT?6Qtpr$?;1ixW8*bg zzve^-&0#_M8c1|xkx>l_3Ia>NpcPCDSk(>)5Ce8D??Aj2$lVn;uCV}dDIi!2JXs1} zI*1F~1Lf15H)|0ky&%;Pge!rtA_b6()(Qf^KqwzrE7k+m&EO6JEShWjg{t&{BD_~$ z(m}WysJa2oNGvQiz;Ep8`2+;KKxFCP_hn`l2GO~oIuRnlmXrlTxz?1yeH6i$julZ+>hZ)xrxd;EOHLWovao3z>mfAh;^iD6bw&U|LR1SxMJ%5 zpRzIr)eES|KG73NdaXBu=x*FcXrd!V#eBEp_^+OIcayOa*Duro8Lm8%XzYxW$a<<3 zUy@T@DT9kM3qPhb*BHoZCAL~@ataQ;`@SWW>pmx#;AY*lw{>+I@ohJi`+f6(n4UYW zjH8&#M8$1P)?G+KWf(tzIDdh&Lh-fTawZzz_i{5S!UtrH$Y>tMqDvEQ@$wv07DTyybSu`bply=ix zsm(tNz0q#ui>I=;IyM$20h(CIbHjwsM?*v7o-?(w_tlHH20rEKX;c@gp~-+Z31uX#oksf3B@UbubF|XU1S%{1Q0N_O&rMN(ShIUX|c7swfUuE`9{fEF1?O)@; zSoS1$ms{dk(gtWE&u-q>h@sH7MzsciuUImMXqLF(CzZ1D_N0W!jub)nlr2O$#)qPi zi}X3dgRTNL3^$Q0J>WC)TfC`!F17#MSLZr13TF7Yn>=sq<@?%}IKX_ILbUqM*^Nqs z;J8Dek}1}jQ8C{6=8fdlQAfSQ4{@XiC|ic84;6KL%IW0e?~b?jjyrE@&Z6)(Dqc|qjj8LyZr~!FO$`M2lu1}U)#;I zN9K4fPQ=g_e&Q6c-=1ry^+hzuOD-N2~h)+wvH!_ZOxYo)t#GV!Btd#8bmQ#_*E^paz?5Z^$KeaV4z zKOL9LIg7<&BxR zRN}~J`%TqGWRjMa3bVZatuCZZS7WRBEU7Pcxidqk>^Q7=ItAtRr%!YFd)jMoP?1k^ zm25{11#oOCLMFde79`=D^z!@lPrcD&2muv&ydPWVoB$$Y?Az@MPzF!u_r3sKAexc- zq)-XZTAti+Hl>&M?dHKm`4jHTo6|9L8FN-YO%sW&ZG4*rJ-t;GwG#nD;ocvgl2X6& zh8@^@-$tM5oyc)V>J@50Ph7ZI8GbFln^WAM4|!JiwXGsNL|xa#vnJI2$+=G5)owgk zPB6b-v{F4^o=lsC5?};AE6q5HIg~3!B8Ss+VipWq{I37dYnGC7qWMu~Pj8-#u7T(G zDcvK*g66w>0kpGbfvRC88I9LQx21xcqr(9%v-*t~?CI+*X-1-Sdwq#pq~2N`1wNxv zI96NFKYw?;(f_f3=9-o!N( zWmWnl*Sz_IsM&BqP``HrpVMAgU%|mKuMB~orp-Y47a(Bsh1$X}@!io}Nkgn=v+QfW zQ?d89lf=XC2g>(Yr)bj#O4s)~ncQDA;@BhJM01V$rpJFtr=My&NFY^D)h{|}sKS=4 zV4=yKoS1t}c&+lO+P+LCzxhoa22&jzcsj%Q_m)pESoF*Lr>c>aZ_}S?;=NQ^RoT8Mg>lvt+0^ZJ`clVhw)2-I{9~aX9~?=lmq>Rj1~W)(?P*Ha{wrB|GG8+>a+gxp#2-c zvp(7nHz4QJZX+foMG&;NC=u@g6%gOP?Nx7p9Bn)b1`QW5x|OWZ(b3^m7l9r=)>-~q z2?3xCHJk!8O$04+<@%1n(Abeu3nu%Qy-|gfvUX^^T)pC6!|d{);-(}6rTY46m%>%B5chpk)2Zv(kSWdA@Dtv7OO*>~ z`>9gfUbFSq@}CP`fG)i*q&1N;dM?*V#bFgquz$ErrvX zs&h{x{f)MQM)!bk%k1LF?i4%@JKre+(w+ydp7#x$1JJ`iWPcK~N~vsm_ELqmUA+Ky z?apUoDw_yU1E7o0d#7(G6@%j9nX$O#=E6tM?N{g%W(1~A$d$R){SuWO^;X|PC~K@* zv`$CIi5lB{qEorgD}-^N>^0p7)|{=+EoYDW;5N zfV6GC(2_@@?+!&F!0^}B-5ch_kS)fk{@~8Vr%vJ?RRe~R=-K-4C3>&Ji=)nuhBGq( ze8}|MSEISInWQ3S2RA95!x=ui=bN2~GfZUVMaq>i#FsePo2c1%1vJ)yCP^LU2YX#C z#I-wZkBi#e1sTx`>SJ)J&v05jome!Se(fc<`A6X@jG&qWZpR|q z5%YI+A74}P_j#C$8XQ?HzWq>^2|)z{Z)-Jj!5m(84MjflKx+${`cbjNfy{XPqAa|K z{{)yP@2MDC3QaT1>z?kBRM{6R`Et)0(&9Fj51%no<7~6=Yr?Zl`~IH4LY5$w_ZqUb z%V3&UG?5J<+ZBFEpyoP-n>OKcU{M%oDafkScH)_S&>ZpuxW{99S(>)wOtP1KRe&@a zNcd;`IrzlN6fQ;>zvxV4yB<;DToh{vAR?N;y7t*G=nAy{n2l3d-d{MrCDDp;a+s0N zN*epR9o&@vEaip&d~*aL2cNBj=Kc4Qn+~4F;_GLTe3Lt@#fTSXEbkFcjt7jA+4vvN zVsYPk@;6?!%e`ZZ6d}mG{K9LwGlKl-EuE3u8-*-~Du2Wk|IMwT6JK6uwN>400MGcz<2qlnUH+H+506shE0#w4# zp^+H~vGcz%gyjH%aKmLD-$`Nn!0wb9CR6}w4<``)->ZKSg705hn-dAB7p({k1wrfL z;+=ahKu0(5pryBHC}?v6eoWT$zIb5^YToJ{L3_k3{$tz4Z18*z-*xj#@T@lYNH91y zkta`6c~`5r@ZLI@1p4FX;>VY3)IbWiT2lV^LnHwtu6J?l!pnxfmE9AHgf_WYj!{bu zM`;MM!qEm^EHl-HvdTtHEyj0OmW}SbR;aWo_l-s*w-XPi-TOcJB;rwS`i`T~g*ZKj zyDb@N)9y8>9`#KWo7Yy)oZS>8(fW@-Wnz(q>2`#C*0gP8VuE;>S{q9?!TrbA7H^Rh zlV4{k&@o-$b4julDvo1^oe{F6v(JAMJrR_ur|}7WW~(;3P4N0n7NZ#i z(Ts|qj)@2nu(+xI9_kzFuf*W+@Tvp>t}9#J9Nfhtm1vVh#(y7u16qiAY(M&Ryqw3P zAy=pj^l^kbiDWb0x!%wKlrKWQ+RzvU!;g_}UbeUX^e!je?eeY|QE4XdJZjJPbJ%Ow z9v^oJ=Gk0JluN=~dfGkvish4WdunRN$CtM+@w>T}H}5&2%{x0264AK0HAPiN6rF`S zNX)k($^{SC9<|Z@85~hM{~Mre@jNs5m6H>k?ud4;v$#Yi)4mjShiAav zt~K;bLsK5T?pocoiRO=x3%ktBG)XNkN^Izi0T!~Zo5gwEa;vscsW$>iR74UQQ7bD) zOXN3>F?=Qx9A#`+Kg;40%bC+3ywqHHKr(l1YC;iZwL{V1gSGLa^}=S4wl>m1vhYRk zl(_NOZ}C#)gBH|Zd#}g`igLcS6YOtyil4=;LKIzf;Zm6gxduO$Hi|T4*V&#NX|D)7ZW<`VpMxqsOGA*0r^kz^Tni8KHpp zA+4R4nYVE|&D&c~UPDjKC(A`jxRp+%qmo|*k?&V_Jw9NE!$6dlb`pL}zxjd@07U0r zvC>BArz*tX{(g}!gbJYhNEs4g1F&aN&$-tT>-8A0BLr2{d_{is+m#QqurzJz((wQi z-g`Z8m6(q7KP)lv^L4dT#pk`-PimD{=5U#w+nkqMdqDs|6*q-6!EjFEwS1~Z{Mqv< z>Wa9ne&ZpZjrq3+lun^d%+dRc6DD>qa?ZbgapUDNk+8dX*YB1e9)Sl(;%hF+RO-En z;;mZTy)fSoZ&9|@tnP;BXEhe&Q-8h1)}b1;Vjus6hQb^35H80Odk@)9JTeH7IFYkl^959aMu;*b9li;@3({oN{EkdQtjirH z0E+`4zz?7kI;%D0LD+>07{hq~?_yQM-(nS*f%>~326@Tlw0ACg(qE0G%t~3CVP&Bh z3(qEtJ6@iQiwa8(GJ9Z$JRCPJ2Duf~RkGtW{K=+pq(BQ3#mDv)q4b-8=XdG;bS!4* zrvA_!E>82)$j{kg9XAwdKhgj4I^y>!#~3}u9Nvt_PcMErz^3|XP2ENFid1L0v!4ea z+;Bc@3;ww28}Z-5ev;v!{ISNcH~2yu5x&2@U(m}}=lYo8dq+G8QGSuJN={e2!Qzjt z-SkGu0kJllLj-v*>7bkA(GKo?-=3PX#Ro4 z#I2eyL+9L2fnqrk7cAgBKDr+)o z0gvLH+G4xIgCont5G#su__kgYDccqpfVYvgne!X!r0Mpit(>jJfS!g1Qo%_cW{$>g zDjJMvM^^E&^FQvxwCg1=-0?#JqxwKV;>MkXMqZHQGX41vY>Ggsea>oQODDOiCfP0; z9cr|pfMB1e4)Z-ykApi3oLWu*$PCM$_=0!7A+5T&CGJSENaEU7Cdnw1Xp8RG@vFAX zMfn~0bOgZo&TsY#i&w7gVo+~w`C8&vK5y5jiGp4m<@3lo24bT>^2BWMqkG0RQL`zm zlO9}ev{ZK|8gdw3h*joDviN`R9~E1{$IUWUJ>__3G?>p?np75LqvG`~@C}Z>Pw8j> zg=A+4P~B^y8=3s zHOtdjcv3t}Xs$KrANj>Ag+N$`0;QLV*N+tJk)%jaaLV;{3l(nE-4Oio!)|G%{WLwIJPd>#kefl`ioY?bF-zsWv=4(bs@Z{zO; zXp}}Z1THg|m%^a{dZ;>`%d`Ln`B=|{^mj{_-mT?{j?{DV4@RFsGp80xB|kr*CS;Us zUHn5418JWqwcwXaQ@eufCEYXh^rD z4Ta70z{J509^e_BZ%KGTf`mwroSf#2o|oC{ELE-X6HHbmI_o5l%}c6n%b_(H6`Fsd z>xTzS>XhYN6%}RRCA?G8c|0$xHnbNk|u{+ET&MPs&ok@>R^68C9MVp z|0ic+)5V3`I&OX;zep7$u&Uu`1utNmif3n%w-{UF6VsJ>c&7T=SqG z7|5$H{Foe3i_p*Z_^15?0?IYBbP$V-ngRY$y@EJ@+_wQDcH`9Sx$Y?$rs6hvQ+eIH z7K{VB)4i2~s2Yw3pO^ARqIzr=qAHOWD*>LUxmA|LCk zt7$$h6GFYNzEOCJmr&?*?=#Z=`a^sCL9kJJTW?C<_6%qcw-O->8t^GjD1038XW{ zGBq_+l+%&E$JWy&6rbtQZiFPw>XMx60$sek{9DlCnF-i}`!Rn}XX(xEM^g@*G`_3n zY)Pvr`rg-vHhOoAoRw>ceHKd!Q^LOQJR7wvtrGcn_$Z$%JgkspCZ*w#kev7i&E04g4up0vKxwtY&7YK#4K)nE5Xgb_?_*q~TtEzI|aR_n++yEEwW56)D z*oFe9pe}S7;DOZv%yb-1iFU=c`=pds(On@OtH*&BxS7i}tUrhbp*8iPLVa^)A4CrnN~&YN~0xAqwie3?U=(S{09<4q{z z<-KlRO(F*~`m6+e-nICBHKN8gmYsiIJ9mqV2xyg`rikKwevXh9R6|XDmwt1t!UmYY z3kaz>k|L1Y;CaW8UhSmJ3d{-TM3Dg z_1PO-u1kp!v8ozZMoOg3e52&o5J~KCV99G0Ub|{E7_=kXw=OUA4)muI9I_rTSp0$p zd<{}R#_(&v%gsD^xOsKP;o0ObuyLE*noPB=*0eX8lhMZ3YZ0Gh=_Y)Pda}M2sa(u5 z_#>*#$5mQFmg)AQEZobleJjO%`{;JavsAcXDlGGdrcTGuG#y*1mWpa`8CGy5s?tKU zZV(1R_OnO23=tvqxxmw8tT$|&T>U@J#%p@R;1D!@S8Fx(*{2gm%XkN}Ib%?eJlTYA zm}eHH;rG_eL?v*vNgV~ATJl>lj2oJ=LSJN24Y^XtJw7Z25dw%qjOY~M*Zl^`kW9Q6 zV56**FD~0|`TTIVi29c5yBkiWqfX2Im*-twHSX@`?&}NM@-Hv!_o;?FD_wXjHM~EW zwu;YRy)jMrDBJDJt|Tlbb8~&hct|3g#d_sKM3A+3mv+ogL55-Qv*_&ks6Ro5|8eh~ zOqKHwxgo4Fj!o{6FLkX*Ny|#CzfeT}Y*mnM*)dSpm3*o5l@njW3z_mkIDac#Y~Vk6 zijyNA;O^nje0*0r6?lB_QMwdPgrv+y0GB`UA}{6H8(ep(N_52eCc~j?_9wGj;z7xl`EkCRD8l~q`$LbUax{V^7H>;_dgV%#3?=t( zFOjaU+0~qBrN^Db4=@>45*<`tI3c|COsmIJTcEdfrApIcm7rnH4*ZDTtb_%sIPSFVN&}1#&b{1zU*lDQ|KPUYpfb1_taT!&sCqjk+gpQ zR*Z)tC;<$TA!|fh=?MG%rn~r|DTX7LJV2_TZ2%5Xt7K8u94Ca@=X`ASTQY=GmCau$T@%Boadl^5?N%NG;|eS`uWXLa9bW556PM7UVs36D@4BMV&@IP`2VFN>=0A5OpAC~(+hmb1 zw-4N(Gz&=CYFc`y?1gieg`u>S?N2qIe|Pb363A62U|31CV!o8v8UMycEab5}?XSZC z2?c_cS%+S$w%wJgL}RdjE6LsEWqcy15xq5_&ReN|2u&(Q|ie!c#>~42iMa zAQeATy`%*67^@7QmUqf(mA%?qeEWr;;#GZ}&X9!pSArQsMpFwjN81X5H5Ew+pz`7M z!eiV6yn!M+yN}8&nheVcf zFtH0#&8+w;vMMbFQUSDH;dIxb*?UA!t#C=W4mO8Y&#%A0U;Cl`ghfBrHRWAFJ2&G*%}; z6HCgm^sc#CWq2{p--&rry7s59x2bMCnw@B-z6aKPvyooo<6$HFff~iqW;lE@fy2PJ zR<+h}$-8Ak$JQrAPkBv}+i|~Q`pBa!q$ukykL#fG`G)aK`5m3>SO;*O>lTPk57{Wd;itHrKC29h_-x5PCPG~6 ztnY4<%U(Vymh_X#^=E}K{=wDZRsK@ugV_E)+AW+Mf!n+J_4l%fz5Y)7PG~}1=i~aa zX2&M9qd%Af*<#sqBxfoX@yhHBAWm>O-@(30m%J97Df5~2IOpTU!O5nNn8<5%!Xx05 zd>!evslqbT5%)ml6{thj9s43t{rJ@XR6q3F*|I^_d~X^Y)V^UxLLW=J`a@`}tHX5r zL$Zwwn!R!uKN{3NLW~f$6T3Nq8oBsbEA=;ZqdVAe%alhe~bkttW5E=Lowpxlj;(l87EqGxrsCy;-$F!^S*(z^Xa&I0lTvOS)2LH zey8l^o$m9hs>cjY*+-M1UOYhs26r;SA|wLX+i!?LtyxJno{?k8Lu*E+QpnP>q|)s z6JLMOZzFR0fOxlQ@659Y->@*PTuY-eYaMFTA?_GM|Jc4&+8NZ(GS4=9UoNHm{kl|H z;kA^a#_1&9+d|@uAwD~PD^t&k3q3{qT$XMk+pUqf6!yG`2+XIe&79uW=pO~%C1K*8 zypI@}y3!6Y?|(`S-0ODgte{E0F={Eabv)M#`y=fBkZ!hjF zK``hm=%0{Z`6Mz{;%fwe6@ z;P2r%Bnk3=nn|(o?4KlHSR+QgUX&?e+K8i%R#=F{7g{xO8tgFb_d%1fA=_sCwi+e0 z38(;UvR2m~|9*4Gl;)px16(D-P!+j69gi-HZV(ViBRRuC8~mlF z{I4`)SR&kWiU0hse_jNDA`K>7q`MKA**|Xv*Tdn{-ax(QW*CF`<*O}bXsD@;p!EMt z0%Z8FFExs@O1mxvo1Sxp_l^bfVd_x>4!8Yl`M=&1xmP$;uY7vf z-GD8r&&-rCWYV<~I|Tg7Nr*0#@%d{}fpwS!<>OmtWXh~Jzlx2&22{WbUbGtxZJwnq zK>z&?|H+a{v~k4%cTsLcCMui9HZbxL2dpz>?+d>7B6hgjK#TT0H%9gd^ZXs^4}b&C z=5mi~7-=NmrZh>t*mBpfFsm4t5 zJA=EYxz^F9f-h{CT~D+hmeazDieNg`)Zy)>-OJN(SMN*TP3ITjqH5ZK zs;@Nn3tJMI>chW36_YAjdalk)qGN~M3(^l%%RvIWD6koYQNzQizlJ@&p`J%&Vf*u2 zV{bW&2+ib0?h=VaW{r80vG6Ke3=|iRA}AqT&KxNPI-+z*-liliZXhNaDsrm74 zz17oF9P8*q#7Mx;M%Po3=bQUw`T^Res#NfPmU2@6btP1Ju4AZUcrl0~53WPzf}iI# z?@TIc?%zC9l@^DF?;uIn$!(mL$uHe=yJbfU2p+(d!F(mS#xPIdYk)u*yAudKga(#B z`1#|%uH*Z0M9S0ITKF>pnFd@uJU>m@uCaQucFUtNEvu5SFDh?TU1Gz-OKNiew==-+ z)KD0O%izWV2FvRpumb|#;Xb=nHcMCv%F3cJLHGbY1OS|%@-7_&oNS0R&>oZ{5mrM( z{jcsk3(Re5y4hEH4`(#L9=A+Yzjdb-=NFIqM<)G0odf>U0JOh-?m0=+gh>nW2C!~! zHlLSpMFHfHIf-X67}5|LnX3k~ zdTPv21{0ygoW01ERa0wig+NwW)z12oGLpEo&eFAo zy53R}oUrnL2P9#5T{I?0C*)Od#LR>ingGCH$~Fo_j#u>%!=~3C*?9wE`Rfd zEAD@HRE#lhx#uYv4?JlQBPF^C=-l`VM$e(q(K7>w*rh~?#{oHj-Q~C1LFHgfU1XHx zMJVnF%0M|I^*`e(z)KpB6Illx7qEulbrG3_~Lz3b*W3#DPV2lGD-GlY-;5~1vG`H)b9 zl)5&@;Z7wSvPrOC$Vppb!Uaeyp=RRzXVCw5L$ImY7-zxnX55+73;DvmF^fSS+ z5g><>ZI-%WtKc;uU*OK^v|e-*cab6^(kD8iAhbZ(x&r=b3FFseoL|=*7%EULfEfTF zu+Ud(l^0=-5R4$(uE_EBC>R#C?)Op%(B4NU5e-d;5oTk=7_^*X3C=qp5658-rLp5= z--k2Pv@eB6CWvfLyu?F`UkuK~edgE4qfZo}F7LABn zK~BN;ngyFB=Vvc!qke4lm-99|+6pUtN}qLOXf`&G&1O6xeG2&ZMgM0obyyI3KFOrU z`LDpp3@zg?bU9eJ4&7w-t+{juF=PLGZoJyehTlPbg{$k11H9QdrOKu;L5=?-lfiDB zp&c{?8vhZ;0Bs)_+7UE~ZsG@yj*9uh+B8Rb# zA=*GSAZ_3w1#y@<^?QV>hKXn%;hqc$f*CLUlxX z`lT}dl8NQcSAc4TAStf+fbE}#Kls$WLvKn)x4eG;_veinslbyzn0jzOB+O%3X?pRP z@pa43nO{m^16@!Zv98 z5H~_UQKu>16>0T-lT=<^EAb9>4!6@rn6^)=AONt&vbMNJC!}v{;;KMrk-Z}96#miW%l`K?KTFFNzX?Dx zUdnVKRzWlIOUXv0MB{kjS=N)(_lOV<`#N424e>V_O$t?nw+n3E1e{5Bq`~t_w80v{$ zckaIfVNAHHX(?w4iy#_z3JYFCYE7%+zkl`jX5@{7F><9_J)0jJK^jh0LZ9+k6`&W8 zqIpHQgZcqlEg{D73>d>m#u9;w0dQRq6@-d-@-++|1cRV{3G^<+J{I6uFvfv{ZAbXNp?V%#htLMo0?>BQH^L7R7`}3JVcH;plP%)y8ub{;EP5mm9u(L0 z+Jt}ueFswnjv9cdkc`1A`)$X@FFRs$VC^6pK+l4vC5W@xR&;*285trj!%YRH8D9;e z5u(>29DyT97>0GACDNh{PzV@RQ~hUsz%48(F;a;c*d|b5@3*MY(45*V{qmn#2;R`| zl|fptN6dam5{$TuBOAq;bQT5Jxv{LC>k`d=v8t9ThP+CIjWP*$m&jJwf{?7=a{Y`0 zz6qiO2ZcGpgq#t*7}O3Y#`~m|RBiv6Tz6A6cL5y^WmnJ+fWR4X9#RM+jBP)Yu0#0_ zo$c~RQM!TNLqtvBSK;aymyX~Mpo9|a*$gJYhsT)M_;EHQ9TY#rc{o-WO_zT2fPCh8 zICK^uhFLcoMIju4bJa)!vx@jcTci$Of(KiTC|CO5H<9suw@Fd4ZI$M%+93GJ&W;#7 z@GzG__n$#~D_WggmN^oFs3cf#YW8a3PI>SMlWOA$`On-n|2qN|Xev`H$=~6raPL4} zg0Yh%)&Z9XH=R&-eNDprwa602Uxlml^&k`Mz1Zl)M6Hoj-h?GI$$$^T!{DwX2z(is zRJArr7U;E_xIJEogc)lXXc!n6G!XN|{Q!+}nON{RaCP8646HLb6(Qelf}UG$g6)X(0ze|(6 zaR~@sw61^1)?RtGUN{_A>+E_~h`=ppZIQ>+IvR#grp22@NXSSW{(xEUd z>No1lPL*TFJX+GKG%$krKj4&kiVly60I{?Y;o<*m=~X;Dzq3bon}8_~&<2;6mlr=G z|KJGhjavlHWG1E(ri&Rz$yXn55f1$B?>2vyzs)C@m>ka(GJnZDfT@={Z~cY>ZN5r! zj_j%Ul1zY0n0fs!;rPaqumE+aB}*Z}%0WH%XdqoUHSI3tht+=a43E=PwQS*XrKCrG z=Vy&8j)%4~hds}Oao+*sw@hlU2?~Q6E)NsgUE^vWc3!&)c~UsCdBKT3-CG@o7vkXUFwaI!{N#I3xW_;`)M7BYXYIY6%z+czk@qa{qN1h_)n!4r`%kPu-Pur=4;K@Ta&eoMIU3O z1DRE|C^qs+bHpcI%oNhDtZ6Wf{oKo{ny@9L6>m-&;oX^0|ve=1crJm02m@Aa^{JpOTl=*imC zHQ@BX#VEjtPK$B7ZNRFOi!k#0!1ab=Vok}y*nC6mb0m3kkgZH6be#=vqxRwgBT7lJ zw8I~du07c~G2#iEruJJOGhAS~ZiS#S<2Ml8*c<41}29 zyvA3^e|L<3l+mx zp%l}+#j7nmkSp-%gZt0zn#1(P$it_*kL^u6^R}|g3qJ!hjBmwm-`)1?Pdgdt=-58l zt4)j@Oy{m7!ZyY2%RZ#?G>(qOo$@k~8U6{0N*mHP-o(zZkGOrCbVD*cMLRCl{qwU-gUp?gYQE!VM2M#Ppy)O!NnwRMA`&$i2L4IO$DJbHsF zMS`RTTmjD@b$rQhl1eJzz{x}k0N!(73(La*#^@v67O>t9k6YH5e$TD^yrQ#9)e5@x zZ7t-GlW72rc(+CgiU9xmYs?_hW5H-rhF# zF|1r)v#h)J&*zmm)d)I!5!c&hWMlyC5YA|!5eARfHyf)M7(Spj!OKfsq=&1s)?izK zvUH1*3$<7PyyiGJ@j*V%X}(}QN9iQ9gwKQL5k_kglaUA?ZJ%c%34 zV4yi{X7G8!J{)He^1I97_Q+1l*FZSv91E8&*NBQ`kYX7#e4z1QHv1m-i;C?U3%*}+ zzBSd!7<;T?8g>r$Go5Q__~A!eIr$0~b8|6$nVW$?@diFMIb?n^6@+!wKzh%e0sb+5 zeX`c#d_@*4A`OrgS&uCHk?fXP-6Tb2Wx8EEDxpG*!)M!6AG;z3JXu9{STC~#41Mt% zvg)%Q*!f32-1eElFyl(WrWs)pO<&XoWLbumUW>WIsnNki?o;4&W+KfTrx600)KzuP zSE;Vo)kFn@+1D-ScNdvAk8thBYu&&$t~8aflUzMg8_`RDBC3Ys$-h zMIQ-rpWojfJnkd`^}^mQ;#L*i4Hf>lc$vV#O29xxuo>s zkDh_u??3(6g-JiZ7yDWJFCBcy1rA=-EACqqY#bb5-ir;j^+%>Ez$glgLIUL6Rca+M zz<8>Z6q3YYb{`PP75>`5!JT7pgL9n1H3 zGOff7TOWUpclxnX1>8>{nky}pJ$b>=OWnM0!wEjXg4Z!R%Snp*MQ`|COD`0pOOQ76 zvQplT>ALgpl_yU%u#{y`9x$xoN8j`>)Hl_8QS8r*O_q-~Ne{+yMXBdou1)>veSf>x zI+>Q`FE!z2L4fv3EbJ!M1&B;H7>9p>AsIR4UxipG$tbKW&DA?{Xm+XaZyGqJG1;baG-Eq*Dm5syHet=13R@*0(vC2Lc#SnT>-lo}on(62p;q-5Fe zKR6ogbUFiK(=PsUuWLYTHRmrGTKF0J2Z3}=XTC0n-$QA$n6*QA00-xdKVny~p}bHO zNIT(_@6R8yGy9~JR$3foA)XR+X2itF`2ou$q{Oy12fWs^TLh`Y0hqYCFwyY7VN9Uq z>LnZxlfbRhn|DhM8BvGkaS4}>%`L8~DlIL_&Qwciblw;{1Ac4}GO~`|Ic%_FyPWSZ zozJYHX8D|(FJwCJ*!;_p{1QIp z+byX;QZJWW=CI^-VbE>2_eJbvwe?+-Qgu2#yG1>TgTJQZJ`66QwVs%ppU>IdqCMrp zV5;G~b|7M`{<$ZiuEf4c*Capc54FSoJa~A6!5{IYr%PkGgy1AH=SYE}+(tek504p? zrcqh!WDNg%J5z1DJ6CG%uvbfHr90nrl`f_M`6=_n(Xc(QxazpsF#G7!AwM zA7E~Ft1glc>ORYWr;CAA zQ)r?9H8D_vK@s<|C-CoNPb3r|3B0R0%W#0*i1*3=3iyBK7Yzg zOj9O%@Wyh?hWY0Q-__@BS|`FWtJI4CYH4QPco3`{xb3VJkz zbl%0F0VSDT`b7M~~q0Sw9=N|%ANZ)i?3-x4O z+h#6oEEWh~#?-H7jbk`XOwba;`O(C?G&84AmJOCBXjUIi=1ffM9DW$mKE-a)eqzp# zWt6+xWie=pa`5F^TD)h{L2IzFaSHY;G?IXE3i>$AKG5H9|Kkpsa^-M~+8f$#qKB$w zX%2XZtH~PPKU4RZVnyADqVk{i&fPU=F)kX@L{OID!uvZe&G2GncDQRRu z-q0p_Qe~X-ct| z3)|Xz^vOd50n^> z+*N|4++>ovzWCen)TQ9hi(~zAA@2$xN6e?fFUiY$DZ)xG!fJ7e4PL*uD$me+!!Tsn z+2YkL_WC&d1aGb8V)FLBofpT*_m!wo&*(VH=eQh-z|Hejy@h*JPw=^JtNVsKC|s%- z7n2T-EUitnsyux-$vLExekAw2hvrO#Mnx%%yvW<$X!Ldnl~RmWEZc&?MEuS<;K=VZ z0hkNrPJ@~n0VKDRL19`_zP5XIe75VKwbW*qWMy+j8R=z8)m}bBNBFgab=m}-uZ6EU z_1UB?`2@WV#Jq35;#kE1b5j}(q#4^SD^t7`>naQuMj211?oTT-->jt~7a@8epwYZf z$IASXAnN1--^j?A^-AF%Q}*U+Y|<^*HtYh_5kv}9SPiW{muvUo-QHJQH?2a128h6K zk zzVSd^cRkrv39?-%ZNEGH!(=(<>dMboZMf0|sYd30R*DeS=l7bfABtdLuyiA-0(xjm zDMMe1v9mLso8)yX%elS=VTSga*G?Gvun3#)xAfBfHGQMVz6}kIz+FnJtWZi)HsebnpOry#uXnS~CFOe!4UJJ5Wgp~v&ZC?%2rVMo z#d^ji=Z*{%`bc&%Ti%`Ciji5FffP6^mz9_%(#=6$s8!{+)&J@?HvL0r8{c>S*P&y83LZ9&IzZ z;ifd8Lp2b#>SZF4x3~nl9|ASVs^wU-mo?awi!N7qvCT#UImj=YY}DqQ0EXRFK{xg| zMR^fLg{CDEv}|cYw?12FW`y;vMgWNXAX@s@QlQyILUcfqGBd3;IeYE;H!oJj$e2vX zSDmVei#U@a?j9gMvKt9wh|5KSaHAAfEWI0R*weRn6lfFEwuc)5 z(Y|vPgYd;XfezXmcK`L6l`xTgcVBJm$Z0X~g+Wmdz&N`6AJp}vn=1l+2Ik8g`)s&L zb(d#H!J|PJ2p!0EQr9`uA|H0%GgrrsgTj$EhM-%Wb-x@v`3r;XX>03x9@!mFp2a}* zKji;@JUl(uS-z6< z0*{cgeZtip_x&X`1Br`(Lp_p6;!C*79-9x0q;{XjUNG>yA^{99dA@d0EJIEb#-Jx5 zL6@6MiFIX5zcStBR=vSSXs4^o`BFwsj^FM46#B|<`-Jh87^N;eY-o_k`f6L5eqLX1 zYfv2$|Kh)xeg7~RTlYTNd~ zft{&o!Qrt#dph2Oew`V#zZlc>dPi4LyTJik$csq8U*N23{5926AQiel$J!v*J^RyV zVd46B$={5=fy)X_q`jK>5GAN1oH~;N=LgM^7_!CugHB`Ca!_`M9>_rGj(1w9bV60_ zivPK)!29=&^BB}okI(ojjM~4m1*Eae|F%E@{S_NRhSf9T9dCM{@2>q}=Hs<{b;L?YdEt8;#YtE)aWvf-D{Nr9+H^6|Jr79j z#cERfE=NgIZZY=0ZF!Dekik}pO5IY6VUJJaXMI2(_7oiHV?fO#Z(4%+}SpQ1Urz zLp`q@p{IC@^hRJ=u->i!$$cJ$@UTSpN&lw6GmkYTC7Kkvyw%6p=%M z6DUJzE~p^qlLGlnx6!pFf7hY>Hsmj+5hu1mewp2(u{TrpAx}mEdQa?II)Tdqgr}J ze@EW)%xdt}L`*uljazXmu8Sv?$XIDhi+6ty{&s!t=qMO$#X&};bX7kz2*fVd<+We3 z^@9Y*G6FRv7jeMuXmT5Y zxWRNko`y^4M3Zu`V4ueIYARQR?raesqHmb1^jSx>GQ4dp7v04wp#Y3P*3QbsDreFFmu8 z7VW-S+*k+RTjslic6u1JTqDI^y^8Q8aPdsyzS9|DRxOVi&xrOrUhUf8DqQ~6o9*ey zN^Tn>{%jp4k{5BmE0%Tc+RZ;WbTT+n+&jXf_lOUHf=mA(A&E;Khl0Q7@OtLtt!P9# zgaVoff;Alkc^r;w1dPc+z#Ztx3k}1-Ak@z?2=|PP_947kcpiXe(>)I$g49I2?SdSO zbxH>5_fs=`_Df?X`!~H41{p068~vl>I&V!4;%dR^28;?Yh>0 zVU`6W^At(}-;oh|w>!bcmFEqP1BZV+74>ADR16I0KAWdp=UD1HD%$`(Y_YO1cHSh<9AwUTi|ML@LUD5{PT)X+fvu zEH$&c-L_DU2MAs{$3bfgi&KN9|V|biL2*SGO2TqkyQlVA>4| z%3U+ah$Nr?8(y>s4si8!YGa?@dY zWQ&Y%U&=$uXSI+>to2UeJjeU+3zz~8XI@_utKFvvnf%rw1VSKK;$^Gb}-<>};i@is2RG?{hj z)<)j9b)3@>o9!k)RDy0~sevr(16uz#P=ZSA!3^71;Nj}P4~ZEfLUVR*En`Vi_{Hh3%j*kw2$|Py4pEZOz<9$6JZz<|JGTrRNbWH&<<97TS zsy0Z_5qL=zEa@>ivFgiiwDp|wLXa=QZ+jXNC(*9YsF1oga<|y*RveoHhjmOCWN+_j z=<3P^ipCC(ig@z#Ge*o5>M%t^YqiOA6d8!JKAeGz@Kp;9itd+V#p8Cf}@fqjoquW$#-K;{) zGi&*s@NZRyqV4p?+xx)=DUDllcX0v}5D2-CPY2T95Xg5I1F<9N83q8BkC=!^_}deL zcF$79cE{=E@$rp`3o^aY5z8e7r#<>~7SE6&-KZ>Oms0i|vL)X6`UVnK&7@q$b<*-Z za&ZzKp6DfyeSrCapY^p{jEZUTH1h|smK~Nr^B(K6mYYrx8xn_iUQ?hL+BJw(4{cf5 zzUqF^{W*DP?=O{~B%lDazKSt;RMhMYBYtZXGx?l~S+OC%8w*hTa*G&1*SqnBE-79v zMJ&Bgot&Ol1?V0Atiw2o-m)^=>*JN7@eKfg$QEn@4>UaoF(eACc>JD^4g@gREmr|x zwN~0a6nAQX0dR^3sa@ZE_nQM#!;4ekzdP@u{hj%u{+Ob!s+uawtXDgESg6iIa~3+ZUB+JxYsbZn8Y`ds zmXu{SV$LP4oYv(~Z`9w(LBoE1TR(^rh!YV_R#K{w0A)ZtSa6V4Z5Cr--9|t=#gkOm`MVi z-yCY)u0?S%&Bn`abN`B+dbc`3JK`X};>O;|*t^l`uR2B;i1<2VrH;O1VoIn}Wr#>j z&+G$+9S~)|Hy?PLg1j(Hw5)Vk>~O=)t}lyVI;?8=X?6^2UvGIOB3-w^-- zw2vk0!EQ5q_Y$RATTzThXIe8h@=k4pnQO)3{F0e8mKuLTMtR%0F3jyY=n49ilM%qe z4G9kpIT^Pc6aq`R+Tq&;>}g)a_JhHPZ29hkMwd!u0A#qsCF5MSyWJ-?6wqe_nb;lz zfTGR2+{ZQo0{RPfi~y0iFX9J}`X; zohuD8q;UeM(67q0+}zWSt1arDkxvB%HRbh$Sb2(m{rUym8@8)x_%}H=oP(h*7W5uf zR@py0MS)$LLf0SG|62L+bz(n_!4t`^Dz{SceLAA9lB|=gZBe~66)L2mF?S4Y&B^Q= zACnRCvF}z~m4a%PAc1_J9M@7;u0Z@H@K^-?czoHUEu|}`qM~9eH(h)wFZO2;k3taB z$nfPJC0V*w`SZ*71$E)n;=Jr`&_=v9EXIZRwWeHTIevpVZOz?zG9mOzQY}htf!n`_ z&F)-VR+P_ZH4$TVReIgB7YXc!13^)n7q`H9G807WIQfe_vIupZAoY8y1UO5yR*ajD zp7Yg%>h3!qin8@vqxzIai39x6PAjSIz<~{OF+U0wPr@9hG`%jGkn;cRCWU{y2}r$? z1z6(z>R9`XIR+G63Gj9OgbrOzJ_MS4Fw{(mVwZSvenIGkBTRC-dBWLFm4?BB^_t0W z0lc9VC4)r68T-Wz2gGKIFBY71n+PKZPu?2j z2rSrs$*s7+^3SW0qkQz36aWS{--9k{Mn>GFpkiKq;?dFVr zR}Nc$DG(ns_vm82+T*LabWmK}rPrhgXkF`MOP?Co@Cvp#`SaBQ zfykKJmGyH~6@X@_tZ<0UzIRF{qG8uEf^zU|a|d6RlO7YaW@R950Tk?aCs1w6DSUg( zf|phxH?f){+7(PW$fm2`Y_3Vu0>|dLJGi_^zIo&w=SE_FErieAFeyL&JHU{qf+_L} z6In>^&iaI=^(yqLf$gOViO!LyXe=Exue}OG>NQCwS60zUe>u5ihAn%zQXKE^w7WNr z{l<>2Ukpr1jXSN?bbz#~b*y9s&#f(AKe(MQuGQTsrlx|2-#6I2Y%8+ou~xBN^2KFf z7Gp+BI1={8$HmO7EGsCem2E*%61={VeqKr8=X$;MYJ+>rAg1~FiC2_ zxkxeT6p6zDM8pqK)6ya{CAFRVH{7E7vxe1>WgTsThduhYl z>-WLTq)HTI>7p&Pm>h!cdm>gF6eq2lMO5VghuJArnYOD1velg*9#Od4$EY-O@T+jw zEHyJYDTiC{6?VJ^HkN3|3Hx4zyciG@HE`mI7&U;kdwUq4 zwg!ECO$#M;Y^a3tpAVyncDGc7OA|QqIZl7HlmG-ScF@whZ&PaisY6{~-_i-A;YBI@ z0lV^=Dm(pbaIMb*QtgoZGk@A30V@HM$fiq5u}Bua(eIhPTek_~dobgZ3#*UjB%0P) zf3E5~`oAhm4QRX^jktCqI9?UBKy=upf@Rp>J$o`eql$|wKi@b$y1Bixm6n!PRKg&? z*X^s5nZLsBYkA*<%tk zD|pRzYuG(=qgD(p<6OL)@`|#y^V2E_jD$F3pCJP$-Lcxz-RvQle1%BzXSv<7_sN8! zg)$Jrs|`QRmqT~YLiMB=GzOH;f7va|C@TYaHK3pRRaSQHEeV_h*GEeg))E$7N66;7 z(p)dnEnyGg5Gsocm)E?RsdwI z$Zj_e4?wv?Ef(niryNnr5tr)bIqzNp-KxitA3XBTinrSP3lmb>>?Z@$sf4Ma9JiFWp=v6lfL!!&w&Rnv5|6^`X)%O5P%kYqV z*?UY+Z~;@pmJiNdlNqBAi_v?DDYVER*gC*S7|HlG9~E3o!k&s!IGp824pp1@=8{3OfPw%j{Q}Oww&G+~BU*0^s#t&|a$>Bf|Wai$?C5(`Y$;n7gcG=mA z9~*NA3FyZFZ;pw|P{9gbgP1B3f-C@GSLv@Bl?*?;KS3L+8X5*of^V;mGUZ7vDyqB~ zk>}Z`qrV56`{ZZ}OX%gnv>48Zhpaq2c*G>~X-`Wki`2B$-N!cON3G%`J6F1xs602% z=M}po!!QH{1Pb=v?b*D0?$y_JP2=sX{P`lq!715O{1xgqa#~C>5H^m5Kazx)xK^emPqv$ zO8|VmyFU-l&fNdnczA?0Im4Dfti0BbC~BOPJ=nmDUCVOg}%(^2nBpPT;n|wlaQ{Twa(G11#f8(YxZT{sa<;;u-t95Hu9Fza_?s{L z5mlCS8j8fph!f7;E9g5X77#lQTTo{y{=H!K6*-n>+Vlt2iKyxu62A>i2by8&OX>XL zw)Jz{JtoJKUpVfV8E6l|*c>{Lcuj2IlA$duZIq2*v`Nx5ERNvX9}9#i+9(pq)c@B) zaek41Xmx(^uT3m4Qia(N3@(0y9AZQ*u>Znoip0a#w^-e6ZG)%2%~0Jm@zVF0vdx zt1^yl)O=lx>J>%5l}d7dWdZ0jAZ51OUgYGNO1Yc$uC;ef(d9erI?NqMH-J*GEJH6% zlarl!zCkjx*(pcgOl^l z_Gd`G%aNc5aoLE+3fxiT4xGM|kdW}ENjy+g+2E*Kh?nX8gz4R_Ls^=JUb7GMPkm6T z5ylboN`Gu?E@4^IzIId5`}XthpndK%1)M?0T-w!?5*tIuiG1R#sLO{sz!h8Z;r;MRU8y{Mxe8$)U#I>Nf$lxe2S1`tKos zKZNtaY{gW$830THU>SvI3uaMR;>r&xfPg>3lxUAvy`Jc4jQ7@V9%ZH$#!Bhhx&pQ6 zNYf&Z%`iKg?;~5A|3*W@R#a32AdTu)yD<%T+}qap$uK@r_jAh$zFeFa>%w_S!SlE? zb<;#}F3ZO}B z>U!93SHrr&y^?>$ur9oXc|3T+hrEP|D6DGlk-Xza%RcM`+Q%XnXK1Ze{%_)^AGN@_ zOFV9$Rh}nWy|b|IZu8yR-a%em_kORs-{-MDoGeaE%1O)6sepE9<&1vJ*Lv0K8&9(T4xH^Aka?KCIatoK@&Q`yNaQqsP@mxM`OWW!JucLELRLK zG*n~JQ`8|W{vMB>0{4^W;-Ct&zkrpFlHcf9+bN!v{Zqy|V3@!Ft?MnJ2y{2R>ofqd ziNEPV`w>; zy_L~Wd!DyoI0N5K3>Xm>Fn{6nzkN~q7IPgH&ej}2;;g&1_c0iYEXa--TLULJDVht-B-jpy3PT{9v4!1eOw*0GacgH{F^f_*cOe=qFO+VQiO zn#xk{IPC9cbwnc)tyO5wgEr6qsQIG=O-%=9q-X+Ioux)OAXIyCoFU+Kdpk2|*zFs5 zh8%9W1^^K_`Y8bU9cWGXu@f{@H~@UZT&;X{KHb0(L5e5V7Ri-EXR4F-#?cwa-xOhk zvZRTiFffc(;F+-H7e7E?42Rb7A@zUYjgl~QYwEcbT!-}y*QnY z!;Oqu>^lu+QYbQX0YUR6#iDHWpvQ_k5Uh3-yc8d@UY3q6FR=Ua6HLv`adTGMs&p2A z++=CWg>vL*jFnYF>mH#OCHRMs*X<4B&nOYi7R?V!8s%t{995?Cb!hAYoMIw>JQv?+ zI8cKcvoK5dcLySj5)3Fo>k|57SF8s+EZRkM2|;6~rQaR^PXfTp0MXd66aHs>8wUrc z&k=dB64Y|~E^+v?)`D?81Q3uO3A!)W9Q`WG@Lk_e2wDWl>kB#3s)>=Y#RkW{dAzRQ zEBGK`8f87b_6keP&@&BMkhQJNUB%)`Lu)9qZxnWM?MRTtwOX#0x|X541Y9 zbj%Q5E%%X^28dojbuJ>T^_6Og^{V(3kX#Kt81u#6IxKn4&rb7laFm%9iJz}D->%J! zZcire?^DzZYh`GaS5;N1l*dFjHN^-=2lf%6uWwBSD0O1OcSU3*BxWjXc_Nw&h$*6- zzNzH+RIAR`mXT7bxwg&MHU*I0pO1DnaTYPUv6lOCIuQ%i7InOqCIE7pdT39+86Ixb z`^JWCAL5}C*lxZt)?iH4rd66cr&W2`LTy#GlQF8en!I1r!g*uYnt=YXQ3&$^U&F!T zAM&2Lphpxknu`>2ip>2z%Bb{p6P?lht+N2tjAvcRmddo*st~85gc&~_aXfjurm{Aj zIa?k(ok&(4UNraPH7$O$MV`d_GcB%BpX4#u6pwvdyMvBcpMdN6OUcoB9!J~i{L-Ql zC+p?B?@4l7w^YcRS1Bk`*M?}oOh?=Ky7{>ot$+381?50}Ye4&-QM6P)us_lB2fT>L zgz3l8$DDU8LwYSNdAgUn_8L3^5@w!zzXwzYQu>Cu%5@DSBnX2IxUul?6d4kA%9|Q9 zGNe2;!wVgOvmMd#{VQw;nW3QeOXqEBKAg`bd}Mp~x9Z2Dv;*=)v$T+~xl8{((qyy_oL6J$3uI@t3*J+5>t~Pj$<0ZM$IlN)s z4vr59c>}9)bLBQ1koF~5Rj>9{leM5a@W}kT<#7+eIOB1yV>wuSauG#vw7gboR}JXm zY;5c@waUAkhY`Dc?76}nc$K%N%WW zpF;bH5?mjE%K8`@S(0*tIp?$(RasW`^QYcbvgA9M7@v>?fO-~4QNN3(fpq`$iw#7J zh?(Q#n4X;`p<%Yr?gH}EsnSg>hI@g9DjQ}Eo@gKjQ$hmnQfHpr%KG0%XH^Ej;=tud z`0Ow0Ovn%PrxQf63*LC`X9Y?P9vAn^V|2#eWtS#J1@ZLI>^`Bw_=*E7VSe$o&~q7u z5`}u5Bo2iiL((JU5V82!b^10pO&U7r2G<@XMgHwP?q`gyg#|lFTvQS?j2<0v z3};NT*YCPj4a1(S^BChSeq%XZp?(Ik;ERrbP!b@3ql=>CgJ^3|eTSwT<@3V6YEK70 zi!af6vG!7Rsd7l0=l}J4@1!1Z%KMh2p@sD@th?HTc<1p>;(be^E8xOH5fiUHi6i`P z@Bz$Qt-vh7_O>=A!0hEeJ~zk1#dUtV(Km7|LvQwy9X367cD_A9Oil6EUuNt|ra8QT zPoCHN_+VZ#jeP<=?vBHIrq4J%nNry3@4U_T{y^-yKA7$$YS+W2*efv+w$x2di~di| z0tdV&0I{nit0Y(#bO$g)mDECW6%LKFe&IEX1C8>YhhlzLvl{^UaIfXv7L|S6E~;$= ze#v_^elB@T@TSD~Nqw#sV$9#UyI}9-xtF2+#-_CAx%Y~glw!tF<7;;I6JWnBurr>U zZ=0}OUy%t72Yfe*>*`YR)jbbc4WehD(%U%_wfr096RAl_k#1>G3{d;`+ZQDXw$8v4 z5OL81nb8+zl$G}aPHadPMtMvYK6p$H+VKTi>qlbV=l}?r$WhW%2jidwH zJRpvB3*|qTrwyf~j_}_u|4}zthI^GxcZ?@GJV@RO3oxUff}vP#?p~hMCtdW;0-y`q zISHr0+Q}tBs3d55I>BtjV%M=`dLmoU^=t)d z!AxRjYYRJjIG8JnERe?z8Qk38d%TYQMKci!M8OxV9KPDkz<6pz^x^SpYw_euBjF$} z#t5M?i|LiH@*d;jVg`8~jG8wGktioVYB=;(V$gPvR=|Ei2+I{0O;5MEcJd2qjIS)S zR9WTXMezJf1IWos0j3cNB>)X1|x{fem)Xrt^$T=8p_Jtvoa%3BafPmSJ?yvM#BvCTVF9d@T>xox=HZ5@m8|J+tViMt zqwc7rpe*`JwftTpOR?(H^vlyg?LS5o?_jbYM4fhK;{iV>KCWP1c}Z?&=G`l{9XC~3 z*N(zky!OCagTZ4v-s9{`*idca3cwa%@3H^zmz_4#o-hDQNLW^NGxd;?+;f1wt8HfOo+_~-p((t0XOvW>zPL4ITM!5R3tIc*xlXIecv+Y#qI9wb3t69++c|m| zSWfdPg1W7hRfwxwITS*TlTwlWe%$R~K;xenB}de< zU^#1eW(P2_taVUOba=4lg0X6BhqJ^)NT~QdCBw!iHn&A1{k8S=(O({~E|VupM`dnt z-o(}dW{|AIF|6yp3mm2mL;7jh@MTwII4&)pQ+zIvYJ`Ph$UZ&ua!QWsF1!%qd?mUy zInk4c2546%Mu>7>ULq_C7b?F0WE^{TuaFAVLiQ&U(ZG%XP8kJ-@fZJWffhts6(2t) zDXn-Qx$_uK=bFEJqL~G^iyut0?uj#erUW^olVJ*Dj-<#0q0EAoK=DO`Y$UQj0#MW+ z>%IpB<8aWPsukW%FN@&8G4PYv23GJ}fI%p~g+REdF=(VAIj@Q5F-^PeUt|A5u}8XI z>ifw2*hS}f_J2EO{0zCT8_@C1EgIC*T&J5vm0?@hkmzwXt{lpkESN{B4`vm=HQLog z4Gkd`XET1fh^AmHQ1eM&jlUgG%QXmU>H`} zV{S~y@zdRjq6dYg+y3LBkOfr@nm1O%bMh@Qxx|v9hUI5zjIUDoT(=(NoU{;rpAp&u zs0eY$+&7Fjw6&ZXv~B8Pi}qI=GhV_pt0JDxem)oT`;((}ZjVvF z;`(Eq>JkalsEF}qbi-Mf0k%_SgYE* zNuNxVA^R*jzbd~Eyspue#k5el&;uB69*`x^#3ud{>%rlzqohjDlP?9d>&I9tuBqB; z0xywThBQ2@DP=0@Gat!Qew3+1@@DKbrc+s%2(mwfdf5&wWE{`OT4*d-LgPmbmY0&; z#nTyOh$%hIs3WJSEp=Qgt9){;jhFa&xarm6AU|*Onm^=N$ee%fU^pdo+ zv$RQ57HsPV=-{#zw%~LBL9T28^BLN$mp+gjfhKyq*WdP_ zS@&RDqirb%Vc%Cd-<3H!;JidzWF&1k!g0q~LMs`efK4y!`vzTzyk$d>2a|wal5aOr z#pO~rJl94=s_28@!g&PGv_Hb#=E|@Nw~~@$HFU_^7adfNX%9o6);#Y)E$G%`rl#*uO$+{XtPHU2b8yP~K7?Ijzl?_IQ*m}D2TmcXY=j&coY zrswnw{VGHj(EeHpn4$d6WA*u;7oYH$tUwL^_C;i$nX)M8npq=%R%)}68~L$b;_nq zxof)6uoCI(6}X|kLt|Z(!2md87W`*k9mmGR$>~U^do%D;@3w#M4BBga3q&DYc}{2!~2#ysT11=DI&MOPuMruWg$UX?#Y zF{c*KS%jqgz(5v05sT#(YBGB0FR}BTefs4FPe)fbhDM~VSntELB67fi28?nOj*w zUE2NkR^l@p@l}!}9m*j%3(k1aHWX(roL(zzM(ytBSaX-OzEHYvx#T|Y?juC8f{J#n z(nmgtEhMc*u+P=nel{;rXF6UjwZUr|87|r^*ghZ@{!n&W(~U&z_BE%n@veR~or(ls zOTJK1y!3JhJrVhz7GN4K35b*tfsbF(%|vhSX4T6BbDA3M=PV~{YL^5N0uY_&8?W@{ z*`FfLYb_(fUNV(tp-dJTEo=`t)XR(1*_x5dm!>iugOS+=AvWS(c_{0+{Al4l2}}-p zwE?m394n22T8~r*TH(B@YewmXFXWWaK)V`}v@@W99w|18cFG#8ykLQzO(xoz7oX`F zlW*U8qh9sk8q-XYJ;AC{Z=0Sz$OhrEztSp1wX;;Spgs+dCmIXH+r{!ncSLguCL%+f z)v8Fr?ZE||()_}C$=)w4ReN}@9t=f968|S6p{roOvzQ%&2p^3{TL}l0h*A&QYm@H? z?V0+oHHhez=|Wq-tH#}hVOcMBDxY8km0-FBK9qQLawNhVHtaoVh1 z*IxX~Z)NKGpoYQ%l#%lT5jPajb5O7XC^$pEY{$!ox!zn;_Oci)MVzju+SxX3<;*Xn z69n9U%{-VF$|P-wZjX|&q=24=YOE@XhOq`cLQgf5c+csbxtli&kx+~KQm{B10)oPi z`xfmYq+Ua%>ophX=(nFcC~(a)J@z+^G3q2{8rp91vj&4hX33@`SE0N`BQ3d7FYd|G zuLEznl&j&s@PsFN~m9-mw^i0B88QC|-24%BSRh^ZIfZ zf+Et+$KRrd4nln#4RZ2*ZQe{GD#-CUwOo0lyBQZ){{6rytezB^;F{uI0slF8G^K@EeVOmB?%1c0XA1GuAj~cgK_Mn=sq@i|uoX^ZUSuWlYHq9gcSZ(B+*vD1NVKtL(0{vU~ECMq^ zHjQDvG1(%*g#w$M55{JMokth9?C}0)m1Rl#~nZ%ufpJ2L=XWVq$n! zvH!PL|F$9U`n#aMf<1X1jNjvEjQ$d(3I)4L*$KS^1BoKkL(~%AY=NL~#bs@xBh#z4TARpz4HLele zCfa9!a|&@2h(SzwMt=`I_SJHibS2)W0on@jCmaD3IY4Ojehzj8D%U(o+b3Q2+%fgj z*Xkgy94NEI;^NV-NunQPq3~dCgNTe24UUX^2rl5CpkA&#B1S(LZy?2j7TocjRR8J? zLg5-AY`$mx8ZDZvS4I&NOf!XN1@j$>C!#!WNpnQmh;|D50tzyjLJ1y1uch`h&7yyJ zoBT?^=wBfWbOyx)k>2(0g&pgO+3%RwQ7jZW(JBnIf5T3y6H z|7)n9-&w7Lb}1oKUT7K=COfNea6;w$U;yi*mvPrcp}3}5m$mJY%QZI7D8g=pv`iv< zJB!~vn$FJnsz|{xseAo*YriZf^XmK0950K8ue{Bs1j7myMl^Vzd(%}v=D(uosN=oQe0a53p+YA*nfJq*uWuKI#V(j!M zAj}I~pm)JaYULW04(k=s?G>2+xR(E}{Xf_4^eyO|X23(#(dZpOrt%j!*cMgH zVTpxZON#Uu2Nqzi0bDW^UbH2cyO3)GXdh^Q$oO=a_Y8`FoAj*@Q2!M+2&M-v{;O%v zLx);m4_Xa$8lXaku>`}&sOJKwRGlZTs}kcE)Z4?K!#4FE9gyykAF6sx$^FMtO02=+ zK+AqL-yPnCX#h^3aAINo-rYfiuT|us0FR(dJsf54kvHHo^j^p!H}foA58?yq6yYY! zB9w2BQq{P*Fk*X9co6$V+jn{ypF%en1t9ZlRuGmHur{LV7?Pyocv#&#GiW2|MBwy} z09v}ooD5kDwgEnRD9I3VutXO;MrgE<`W%Uo>|^@^^m{mFjR9JWe>aFLJa@xoE%>ks zwXyYRgveYSQTk_xdq$!2U*%6hBXW~@h0jB@!YFCke~NR(*<{9lr3T)j@ zp=K6x*9kn?41D?Sa<~#NsB%mjth)GQT*2?P4i1R3sOhdMBn<{w0b) zy$e0Kgk^TKnNo-jg%rI8>7fpE)-DmR-qE)*&+GIkF&AY0v%=tqa}%!Y24@9Q`HgSaxcZSL zBG)&e4ldy)&b>YpD&cCEKLLQB;dy-9X>(dLGdk09(SCU>_Af31Y8V|(L;+9`o}DH9 z{7!JR?tCLJ?3*c!MJ`~qi_;IsWAxFW_2Gvk47wzI*q#!EUm^6TC4zee)32G9nhT=V zqmK}5g5g%7KMD%6JDybB+|3Z?yG!5NI{NEHoZtuT*kT5;XNL#;(v4GaKpR=1lrls( zKHt)Nc<^`+QvZm^p>_7IFDWIw;PB!CIsL2u2{gW5(`tBgpm(<<^k5p{)YP}kN%s_V z*fWqd(Ax+i@s7Nts;#kg0=}DC@@S}##z!D~nZK8 z0cq#NB`hYKAH1PaKDRDYCS=;Zn6l$T4S0>$=L?5jVwa!h`I$QjSO+k=NVklIpD&Wk zZ?jlj^yB$%Pu`H9U;1ZhzftSAm5^UAud-aRjV3zmV{}FxIEFEIjwGKB`*|-?w#vpJ zq%SN=*B_gbMG0OW)Z;Zmi4F%G%!sVkM5TK=^d8{_l&N$-S+ zL(cngr28@^(&fr-nJ*V0rto5fVsIj zn>K`ZxwKbqRR1%gp8aNZv=PHbQVfPz|89k2A!IAmIg_phS+TSLyYd-SNEbGDO~=R0 zGqm!Qhtd=1mL55>+#+2_H2=>VZ_YM0HaIvq*rVxbX*#t(-zgiftgK*9$Y$04W`Tgr z+ltaf(RX_+7;LQ$H0@J46S>k_JZwR~o1z=LxYL<1uC~4fi}?7qwZmTy?R*R_VLKvN zc}8|Yz4IN$^SE60+B=-~pBzy48=x{M79trXY@f7lU#wpHYQkA)&2P5dM{Zx?o=D4o zIk6Uj^=atGa#P?*J2$`i%a8;OMB?!vj{h4c7nixPh-z&Tl55QL%-$8pP`l$-T> zFCO|yQ=0PJ?x(G4lh_T_)u4er-6I+5`hsVd|40H^&1zQmQz@@2@ z9mYr3_d4Uh^O94u+Avi!65=p%NaB8}MuR68YoeK+kk8l;`@X?^#duXI)rw=JbH=db zL4}_(pF2u(cvbwnPa1nCj;pi6FVfz@M9D?Yz*Kq#JL2koXk%}|=W?rv;o5q5RsQY% zg+SLu6UDRYun8+4NAq|iJaf&z+rITmhA}Sm&kOvXBi?aYjGd+HuK8wj%Tan`yV2vq zU8d_kiCMw5Nj{~69^d21B~zNlO=qKcN|zn?es8AiZl%u0WFi*<4%ev%S2PDRgu3#| zQLgerdDWCZr~)h`*s@qpSY!bT*t{`id4ScwZukGLKj;fqTxM`k#Pe11vqLcMo0)X| zFQN@Q5?4)rw~4>~tf*F-J$;1=NN`A&_>{_%Uh98b^MwGt;Ay?%da57%&GGXmg7Jlg zs`-Xe*Jfs+)s;K_#*M&u%=@Vr5L&hN-5&v@2Ob2p-rSE9j;3YI`dy#O$$Q}~TU%m6+|@`Wa)4}}rwA@IC!>LMobcu+P9%Mu3&(6%YRKH%Tvt@HshOsr^o_$n z?PsX{q9y$}#LrwHNZ|D2jh|joo~Ho4G$K62oJoP3g^P+$dqicHi?hL|Y{c)4i~PPk zK`q=?zq=|cF$JBNQjL4l#h!o_MmUsO+NNz6(ME5BO_=IvD5@KKk^=LBQrH^?-OO^% z?K%)qx#&ZV3LsA zdUAJr>up35S2+Ws7vlb}7z79-iw|~%7LroP`+zzB7hn0!yd4*m=lNRSJh z85PIAPj{>TDVBqr#w^%S>S(V}x40~CtLWpOfHXNWv?K8)(5c;dZpHdfQz1;o;ESt{ z_1_`NuI^Dpo_AtHre@#$uTU_;%@=)os*Wy86nb=PkL)RFu(MU|0*o|cPtS$~(tWp~c7yKEd5)($tGp0pGd z>4b|}a{?v!x<_mrrovze%Wq%g_(6SNx|5CNROA%Q%PNJ_Y}%f)x$II(=xx&Yp#yxR zOF5YeC4wW8>~xE>^fRAyCr-)_nO}JhY)b&CNk->}kgW(ghi7$Ahl>gY226{~+2(7) z{Syj-=j)C^=}lPJfrZ&+w!ar&!FC`EphHV>%=5ND??fW;e-as6i(lZg@i%PFD_H~! zpT2QJxMn2vxyi!2nk|m{`dRWMjx_Z5Pu94KAHz~__YVyN>)Y65F>5Ar9}VD}r(pDx z4qLu~2t_i`-JE|ERj~Fp#@(`Tr46<;+$tC~DHofAaZ5=v>;LZ2WQF$Mh~qA}v&F(W zSkLmA$zObBCJKrcm>Pe5{8^$}C4>G>Uyt8^w-gtHLchJVSa88vtaesV^ESg{P2kIK zdm(U7>F%i;Qost99-(nv@u}oJ-AVv_vHZFh53hcn-HgxQVcGh07Z)_+U&u?b?G_O_ zt6zy*J-Y{_$Z_!*uS5i$SMF#GO=eS*I~Otp*@q3ZZ|s^t)RoJPB=((8bm{{h4}05h z$n|^gtD-7=*Byk9ubRN*sr*bd=Ym~TVc~wPNgwZCSER4ex_(-wHDom%U(9$sZFVm4 zTklmB|4mh@2Kk>pnwnNz<2)2>qum`n6wTIr&j$_1C5$Nc54r%Alb(;7}t)|5UkJ7c+> z*43Dl2jU*fcw_B)5alT%dzf#^-OfWL&X{-E85#d-YUx#!5X&1(8x?^$U_!wWmi+oD zrI@a?DXJJRDm}DSplH;>e&Ity;5L`5D)lu7z(U z8jJaKb@+HnlNHE7yHT?L!`oG#P?ydUwp*y#=Xzi<;wCB7Wi-lBvvCW^IrzQmx^i;M z;I;D~!)?er_c}h(ohJ}=eypXL5Pje6<+i=DfBExyj%0-spksafaP}M_3+YAuPCD$R8TOd5F6iMkDR0~A9WQ!Mb2p@aS^hPgRvS9HJ#sm2$zHk*PPp*+pO6( zX-#+a%Atp;Sx*n)dw()nc8&)AwyS{GIJCQWFzEmIuNng-%1pj;b~DZe$yo>@Ow$|J zN7u)1Gx2Lfg5r?50%sq_js!|p$-5KM(rn$1;We*ozys2(e$-8a>L>yzR36; z$Ts_D8@6F1r&LKdJKHkZWw^4>b~@rj9hkHJV)9k6n{)V;+(idj(&TpVVibr|-<)PD zN&wk$#k!y?Jc%rw^l~?8KZ%%*t+%dVC2F-9pLcSqB1<%u`04NPc0FPJjB8lAba5PL zuu7oaxuyP?bGD7?p;X#{$0NYcLt9wA>2%6JL;T(u=<@gTFdoHQp6KH6;Bq@*S`}`! z5S6=(4P_4j|vXg->@H6M{B|LA;PA3A@w{4^Nac#|+w zr*|@Tw{aPzIIhF}n~js%e(r7+p)?hsMO5|WB%JSEP*bF26FSb@-?v}f_(28xjjt); z<;GyVMW?J9##BKVftFnZW$Y#y#bI}D>id9M7s5|%_l09uGZl3q^u`vQNO<0+dY*YD z4b&(_q5GTJUNEUkHaWdaAgc4xu9Bp<1RbZa`~JA+q5<5V<;@1i1c*kEm)qV?)0c@W zda?L`FO0O<%xu5-$hKs-sr&3L+b(~9g+aKt7O0SS%XTCnUp>5JGg;bjkkH{O%6}OX z9=~Z+GYE|0yQJM5svya4+Hko?VW<~KXy?)9=rf-=17AN0%(2a(%1Y@6Uhev5uesU@ z$tNxQr#08tv+^jVNEHhAUJ|nSf^J#Y+>r zp4gqn`B;Y09^x}nhu?@U)?4LyP?%0R4O}wYnjp2VTH852-9=Ubj+gQB;z2YA#a$}s z67}}=_59{5j&+}SFfg9Yy>o22@%#cQfI&DgtoAm?WTp86Z6^DDH6!N^4#76#zhSAL zLfND*)n`IlWw}G%1^9TK+i2i)n^65cWAi!+Qr07W;mP(>+~}sxhO`1bx+$KOzm`_t z52O`e%;nA~k%${(!&4-ZUE5MPHx*Z>gD@+rtKr4pdtsY|gl-7W?BGLu%vM}L^F z=-5~O2&DZtKeax`bKNwD1})KU^@@LOY$^u(Dr?f@-g9(+A=W7$O_HQB6uLfL=$}fEaZGMJ zpYwnIF^kqkGK;o6y=p!m?o5n3$J~WYu`%9n3BGIJ39K)a`Q+8sgh)Y-(0y78zA#r) zRhT-1vQHr*x$;x)JnysRaH!NJ;oDN*so+K);KO9I=-7XXu@6TN6)Z{987=hKP>u7pT;~8oLu~cvR77XB9#v1yvnD=qK(+R3 z!TIFm3lwhH;SMf|x5>0sTDAMzou%ek&$A~RB?=WncCZ9a$SM

RXH-P==5FUDmqgeZrLapE>`vHTxVy?FuRIT` zVAj?FX(-y1(_M(vn@LMgPqJgVc;ObmnY(Xi8-R-BoGxDnBi|Wee6B@yqa~CrDW3*@ zW75-;81`8WvtYsLzx=jaH5RCPQ>Nhww!V0!`34pAXQ|xs&t3<3)M0n~N`81~nw>`_ zA0s`PPHas4zQ;I0*VD@D>~kz5SN}+3Cf|$q+bw&3Hk4Vjw!7JY==cR#6Ya4Gzv1XF zzrO_2t%T}AAYDUMa{TU+5YuNC2a-1QtV;X6H3iK~7%4!i-BF|?r{3Zw!FOLFY~RY2 z(EV^>^z!Q3vChgQ&3ehacS~&}eP5A&{#>vO^u`^1d+I}5b8X&mHyP)YnaElO~&KG=}M;~AAb|E&*a;vn)O*lCixb7 zC{j?%;+?$eij1(HpeK2n^{nUB8#3pP%)+zBVfMf|XCm^dyzX*gOPTDUOKQIa`nyX) z`bqg!I$*bT&^IZ!ch3ngcBiUdT9y7tCbqzG2VOvR89z0ssSNR=A0|h>)t!5SH?1d9 z#K*L}D|$L2yj^Q7IL3}D`lzYZ^0uMHK~a&an5w~M&U)atP&umH8^w>JSD48@bwt=Q{7c@ly(wR)}>JyErncs)lWH`HUbUvOBeURE6r`u@sJ zcDDH|4VGwt$s_%;_0TPfVcv&PjQtM_fX8Z?Ezz%W4k51C>gKYWFFtPwwAOhV0OJ5~ zkwR{l&hkPDn$2htA7IaEZ5j-vwZw~(<;hD&eJ~s{ZnF}K3i{)0%f(yU zr%pk~OLfX-JSy9)ugGHWd2-vG^`ZH9SfI@Nw|8U*p#H=qbU+-J2<`P^ClibSEI$V) zMC~tnbO!qUnJd$7Cr65G3eSUM;jOUL+rQ1E3_fwASv-UgBM9*o4Jo6dq`#907{$QR zpJ|b{KU=V^4l(iZ$lfFE!(24HFyJqkPdtlS%({nW?KT{RebH?LhZ48FNU&QVb&$aayv7HLSP~3yOH`;QS_i9gfWF4%k0$q25S`x=j7uTh&4~Oo!7vD}c zjF%sO?a*rttp%5Rd)^vxNAh3NlCiVpcco6uWsP6K_8luT(9)D&+J*E^CkUtRQ4v+1zK<>9z5@0k|k*077iOY*tZgD%LkR5HnYQ; z0Aka0b#n&=H&BGtFiZShFZY#4%Xt%!rFC@wIceWst;@RHbG9E{t?s62c)L#AUpz>Z z)+Z!QUCuubwLK3ufzrZ1Mzx-u8W%YDosWeY`6vmWUOxKdsTUy3D2&pRT)r;3_HvYd zrKHhqtls;WTnV+IXu9}ljZV<)fRAK+jCry6UY?d47x zkxD?6KIz%<&lTH;eW3ua?Y;R~3}dm~5#bu~Y7|u*k++Mdp4-`pJ5<6V#$?4P>7Rtp z`&dAFF%tu~FIq7w7}p*#GHS*5RL#P}q8={l%Pk%)QV>bEv2f(T(MA>c2lLC9iZr_T zAHe6|64-RcFHPG~>3oh_T%+X?aNi#={(sSN${!n6HyK3^)SQi`W)53dt-3@DQ2SNU z?#(sx6Jt-qe%_llx4`{-fmNQg$7l700u40K2mW#VySpGTu5={wWOI>y3LZA%iU}7H zXBSM$fM_HMbEqcoHu#q-?cDLgu#Klz1J(e%sI2ft3(l%*zaOqRRZmA1H5nA`yut?> zo_Q0%-PhCh;y&d)LKlQJuc>Aau9diR+1S7k?Bmfkkj#uI%x_z4cE+W7`bi{~y@jdG z2}-oloCbcEiA|91ClDHb{06tEQIxahakgWD;vguW$_8z;jGDQ&`~&6JT^7emM?WYf z*jUIlGWWw-R^YnZ(dB}V9HRZ7yTv0y;j6h&dTjFs_~J8)9rl0tql?-Bdg zwtP#prHgLgfIL*4LSY&8Rrw&5b3x;dN`6?*Oz!;a1*WrHhIAb@Z=UjSkI8Uiw}Td> zW5rN=K>1Z#vEdQEz zO|NzRWFQ!opMxK;Ew-7MYVY$a8!;jgI>^1<1BW7;l4K)@y{JeEZ{b_m9h-c6v-bpY z3^JV4&ZjlHW^Hjvx8=qwhhCK5)>aFqnb`uSwhn3ygVxKhjJFO*G@jFF0$pZLZD`Pl z0JD9!RZezNb{u=GAZe}Hn=VGrHD)%i;Bs%_-(1l^iMDi-n;WJc)pK|(QeYfikgvgE zkPu2D@VRcX3cMbDO6WS_{hlsDA?D7T0HuU01Jz8jv;9ru3}nz(L&9dC;lC9^HT(J; zD-Ff_M&|ZX>)sFF+}b)@e#1Pg#7*2~79_SAg8xnNoh%?#)joLAA#6H_=Q^1g=iRF| z?NEOY3I{H1ESV$+B_8#e{aa>q<5N+5S}} zVD|D(a(H{-`mK#R(pvb8rll_6=3)VUK?plhSdYS^9-)85I)pU4-RK3E?c-eK5R+$0 z1MLrFQwv!5E)<-4`{QVqTBrdJ?cTo>5MMXolM42SDmd{(^mKaP_B~oQs9Gsn^(`H) zit@O-c@e#j7~h7KO-3v}zetG-D4MZrMS4UlYz+WIV-t9@z6t?HISSk3Ey*0*Xz!z5 zkvEg~m+h}7qK>#!aR4SPb1-ZtbnJxUZcUQ;s(SFSvi0(DAVzj4i{l%q)ym+@$Dkr& zNiWb+|Na2f#cj)hyQyM8jl0R*VgQGS?h=2d9>c3$!Opa&^1CZEcL^gf|KIisn; zzS8UTd@Tm-YtUHZ%3arR-%%&OxHM9fRGuZBxrA#X%`vXehJ*u1AqF&sh*;&z;>Q(hHAQ>eJ{_L6C$5%{)PcAuEy z;OqJzIFPQ%2y+bwv9s`ApqAFaDm&xr6rTGs8~h=aOz=3?u36@z?GG{r7*QKPtr!Ht zW4@b*6Mv5G@AqhmPePsVHrJ;`j++mZo2@Oi0XGX9KMNZ{pj0hB3ybh_xLs;~c>3$` zv=Kw8|3(>qlmOmBU8}p(*Y166AkJ{(r-Az|7_eecMe6R0%V)<3Kh&-6Y^rBHSkAk< zlziRK_P0^rD9CLU2Cz|WZ6>J=vhyY1TqBE|Z*ob4l!@)%oP7xgw_$!k_^50-9Um6b zO~P*u#Jgo9A!FOPCRrY<$D<@Wj1o{k+yW7jgm=zs>Q>$K9I zn}PSMt(t??6>76*U6jVHyuRI7lYwvN?TRf>ob^s{mD6QfRPqt90ut`maDRMs48mFX zk}t4Q`wMw_WWn*93BXx84tv4R%FR@+H00H!baKmwDEg)80<1B9w2h9L!f$r%o~z~a z`=$Rer0VoE(tAk8`J(E)aNiIZ6TT^_%v@d*9)DX3KD%f*X82WFC=h~zetgzOirn1)5$fQaFmsa&?y7EzI$LP)hyxBrSXnvaWD0e3 zRzN;z5SF0pmE&5eUoe}mr=z%v=P7$kT*M><4QBJGVCqWY2d~{7v%njFSTaf>dHO>0 z)eT?p(=sG22G!y7x;d3P7+ooVgAjxo6A7oZI96BobakG*i#thlvhFLj@9^S#gMaq+ zw{dlTw%R~2F?(1kVp*IT?#uVrT8m^Y3gH0o)?@cwE2qnl;yBJu$MXXYAX5J8G1m^q z^<`(kxK>$cKWb!b7}{xP&$}wRi}7~c(hwLwT7~k$zEB{;q@N<%S`-U-{$IiOpYqKQ zLxTREDKeh2&|wp)LQJUy%=r1JpL1r@TFBxbD5X_CwSASCDpqK8RdmIEVssq202fshT+h_P`~)EyB@9%IrcxXyIXC(7w4SahCFECM z)!tvnK3hm2XbT3JGTUGMmo?i4`j=x0b5i2xMojoABkbgDZXnBhti=_XbEh5~RtY(Gg&Rw}l? zIQE@METeMIFKcdz{DeL^lIk{8lvGUT<}L}}g%8BWa>0Bd@#>OYoEb;B=(y-R##d}j z=Dipm-#>FS6nrnOZe>SxOI<1AeLqxc>)TX;H>31)dUrIw4-Lgy*o&6+gviw9-hd8; zUYqXjT!#tX@Xye?>w8YM8Bf!7emFnt8XD@PruA{*5G`1n>hg|DNAc?lyc+d9zlH|i zRW{W7JCaPG?85?PR5)wt>0*)(!q;(~A`Amh@P(@UPjCYpe{!BbXLtC)Cfa$upAEmpKVj4U?{k$LPBxlQN{Zsn~cp#L9MU>f~SodUU$u^-t z!zzJ~iF4)>p^=ar%>%+TGRiD}BjJX}=I>%x=6n9ivY5631*+NcHwleaP zp)Ld2x2gL&2*x93eJxheezpOeQiFzZnah|Qf3nf8n6+ThBttY$x23fs2V5h=fzXtUWkRZ?Bsh zVyT|p3HCp3JRrx2gzl)~Sdys=G=`c?Wo!y}PZ){O>fFhk#(eN3NqaLkr zh|IZ1Gl#TBJa;-wHO%$>k?#6_G09?1$meIkEut>IsMT90tGv1(L4mO!rakzsTTwRR z^_7(vrIIt?=^mWGUI$fh;R<{#wuF)zADh-tg_|mf&P~JI--@h$1a&?_i2}i+c5tr*n%*&biHxhw2ub!H4a|_NHjuKOofYIYz-t(x-m01_Y z{tZ4`Om~}VSJxa|DTpMwEK}5tayj9? zjYM*VCZ-TRjZRH3%0x?}jK>I(tGHsUmAWFMeFNw!}Asc9V=IUNba->$!H8EX;G!^s?>@{*1FDS^b zJ8MjiDh{mucyje?)2ax#;>OMtJ@4+*XXQ7&@c6(`nKa z5XP@OImCX8H0<0M+Go)2PNDr)#9x@F_k8_Z37!ps3)J<-m>fYJmY=BI-C6nNyJwA* zc6Cx@luw3akdo}T@kw;LCMOEYtM%ONEV?0#>kpJ&T`_VDT1rfphyLA{kvg7MK3nM` z-3HAb7JN`C)pT`o*`F6hGhqz$%mR8CXc)+;qg-YiR7s9Xv$tuGdTkny5;ef_^b8xXJ&*tkr-n6zg= zEaY@C6%G=rh?Wwq*hzI29c(uFF?%)(EpL9@xeHytgtHCdp{$y~MBAZAxsyy@^0TnTC?llfM4h_4+7tQ1|ISSvs+2Gpm|L zTOLl$;h&FJNh$Y_;$rl$T3EEN&DGhee8D?JGGSs+4Gjbq4F|TaB7qEAc3k1O41tE2 zA7EU~qgOHmVIp>BlxA_>*JD-WLC-ayKn<~AXJlc)h>8y?-O)%&%$82h!9{m+-u}EU z?yFmp0p{FWfEiU2q^p4D9jC(rOavfYOK!Ffg&e?%gb~$Gmz{MB)%$8atPF78B6G}Q zLY~(?99D7eEtU8pgPf4$Ta{<37CCG(DYU6sz>j@P|08COoi#<~cgNDyc|zj6J4Dgi z0p=h3PAYz4x$BER_SelIDL7J@kbl#Mtv}+tm9LwL|L8K>YfbP*xMWlg>2Tp{U zcFK_y8L8j5ceJb=K~$%aoJ?&ggldXYez0JOOMnh0D7;E1AeJ-6@Pg|z879nT%}AD) zHovx%bHN(f?*g;L=GX_d)}h54IWDT`q6ih8V|ob&wCs8eY~y2VpPPtThE@&2@j75 zc=>{3gBG$CYiXX>4A!XCH|cPEsN%!8a~=0#dVOb8vJV3gx{!Q=fEe zOJx)zIB$A8@!Mx>d{k^AdDvt<{DBH9`uy0})t=ya%l4zY>qX5jEJ`6Gm<0cxXvJmw z4&_Xty!}^QecsC_D_wzc)@&(zUqTpjQvm_h1s>zL$i6;Jn!8TC*OW+^i&|n$eEkVd zi)~#4=b~il+C>A%C?;JMX*9)+i1?P*Acq<47*x{dDN_f^70dmz!NxY0^6~ElBkkJ6 zpMCb8h{%|EoAimS_7flJX)$T%Z}{WVS9CT|j&xuDj(#rRfSsZr3=odAtc^e(@}y~!Qs3G9>Rvl_i7cMoUze_5zIFXxc{~m5>i15O9X)%{TO}`fUZAoNY2!QqS+mumx8c$id zEDdbn&%IIq?~(5?g9i4LhOjI?B0;4MThx(ZAY!HeQK$QqTXT~TtNE%pu?EsLv-J7$ zi1W(Gvg~Ick+*w?_0&Pa>Kjw)&cV{$$(9?CN7PlSu|SqsF2H7MK9B}?IW-Mk8Ft(>(N9=hc>qOfdb>Q}cSJetfcyZ2gU%X)(kl2Y zqjM6FE2Ux))3o7reIcG3mm;B~+0?Ig2tg+dzWspD`=v3xo5eVo9NBNdfe(B)K?vfj zLXCvy&kDe-xs3OeDFS+O@j+O+uH-2@k`uRHw_DD&*E=bdp!uGn;SdgZXc4fbCXb8j2t zYxWEhhArW1w0$_G7b;I6fh}n|G$HjD_K_p}=JMK8u?(I>Ssc8nZ?`cUsRR|;YF1hS zn+;k44%g46{YO>dI5tb;ojZ*(nzdWMF&6iKg09&z3oDFQJoe7QH6Aqu3fOQLUcmfH z4{fFEoDeE)lZEY^z||3o2ipFxvith!^Iez5#7v_Ofj5rHzS(j$g!gs#1g7m%*~KeX zJL}sYnDEg7q7;(GiB_E$l!-ZeuHA>ruRd5;ECOtd)I>sYg0{H{m!er};~l#<-xg1g z{Ds*Z{v77mMMos7$25ZBk8!(zB2X)(r7c*e3WH)+0ZFmSzFGyH_zx@b<6<5-fuCE@ z&csfu+xL7yMkFs?Zj_`l50@9OJKR*~e$~m(*c6AX^vFR2t%$uH8dvh;C#W6^`VV zu@kiRz25<)SNj#|%D)yV;FCkKCW3>PdX8!7D$@%$C1RhUbZ@Gqk=?|J#dt!)fW`1@jaxR%Di0U zEIVNo$Uofsy1MdQa0OX4$eI)@@<}pJ4sPFF}%vh>ywW`Js0qMh)Lsx@-vM&z&gvIW!kG% z?RU%sxu5+ZB{gc>WPvC`qe~0N+S9t85QBjfEfhz^Mk}LOe=sQ~Lfz4|)!=qRz+>zG zJhhQBNfLirx=}NHA2yAho|{(OR*NPyIQvNZFwh5Rn81omN)Z0#CNR83^2m;(DQioS7T@4y8=BvR=S(Z65=cR z#Aco{3uMf^Z2N?6j?ci2VIr9|ufN`oXM4y2Mhm(-xAuzgsDOFe3Q zPpd?Ri#jxboj&cG4M%fb&tbAr;cm=fpWKZAnm54tGs>EM$$Mp_slbL^+N6KY$Qho3 zg$q6fg?KBhz%KgZ70r?$S@CVvbkrCDIXCh78g_EP$L)k1z))kX@sjayNHYfAA%1CXHp%m0o{Y1; zXIHe;U#dR?@49)}dkq>dYG$G5lVu(Ke(n`gJs9dQ>4e3zM9sMAc)gffE65 zKnAhwMM!*TaKC{NE>O24T#;_?H`sujr%$E}Ka0Q^Db2+lw|&DU+2^;@$96{{)u{Q{badkJ z`7)CNX@|l8#w)wbWYy9HW;fV&xQrieUa(^~5C8mLSers6`D>i=vRF6c zba7aYQF2_6!8Xhsbx{(V3u@sSnQ?&k|v6SDD{KJdo;0`d~)D zi$-ko@!pzNxsiK@UZC1%O!iG{WUaLw8wn^#{A$avO0Rv<8UIgYq{dMc@C|M+jgReB zEL>v0Q+ej+T!USS^Q{^X1ShU)sF*sjQ>Za`3hUS1)lfcr|8tt?e^>xD5f-?%1aZU1 zIoz)M(+Zs>ZN8!Nouf2$V;FXoEjMJQ$T5tcsFxoQ>Q3~Z_RpI=@ug;GAbBHl!h*hD zk8yt7wF}8#nIzy-Q~yB{H3Pib%6eP%M(*bXT%dgc#w?J1IRg?>#czLNpT#sQmBVT8 z5=YZ6^Zu56<-8;&e4i?ptXeI71#5GZigO5lTW+6hp^fGSHYZQiTLgS?(Ffq@Y3YNp z1@4wi&k1Dx?4>!`2;@(cKj@mwEq&wikYN0gR0J&(M=d45dvx0d4gfF+jU>je-*#b| z>p28C+AZ%qviYe@N{7zP z8}khvBl$~&j5zJNp3C2t7VT{;ZsWhvP>{D}B}8JJbCKjl%Z#hrxQnlHGiDd(o~~Ji zwr0kn04jyt{7lu=p8nkBCnxr?jxSej11|0d{Tb7lpm7PsW+{vI8QQLSYOG43V4h$I=!Tl2l98XI zlK={o>H~mdQrR~K~XQ?%}l`60*bCIFT;O92G|P)srkAT)2mKs-dIr&4h&zILH>A| znu#K%lPP~XibaGf#y&M3 zYVzsq%E`$o5(tJcy>|AEUVrT{S4qN62lIAr8wWsWY3ypn_}-`?tG6cl;RAM(aiQ-V8P>3H1zuLbFSqw!_IauVHLo zSygshtzNW1tp#UvCOku4@755RaX~-1lgeFg91HId^S}P%1k=O#pX5Bgl~GcMKXAh4 zUe5N17?a&$#EpoA!TcbHmi1*I$>pUM3`NhajOE}-{MbY+24b`o}&Gjcg}r?5>c84ceTM;?bR_r}0I99L1H+*4JT%>16^} z$zmVjQz2JRKQ`(irGa%U?5rn$&kcKy57J@Q2B^-l(U}K@PvHr(kjXm)LK&?Qs4#IxtHsCys#<6ki z2APHS6pQ?)5p=9`?JnVMWT83Q;YJRXDafK*1_LPVnBnNod0HLxPB6_~RoYklbc3yJ zC8c=9kA^=#T{V74)ApUv6EHHjSRA(`lRqAnjWJkX&XuTEH#`^i+*Z<2$SW*uwCv}` zB6nR3e5~TXVBPYY^?>nJeB@%fgA*s*a>#@RzG+~4+*NzPSfXXM3vuxmXuRM=`GY~7 z9fk%U;ebL&&pB6{{9CFWs-i*s`O4U2-$bEFdb6SDM)uQ&zF!4LqD`?y2k5=EB37qloQ=A-!DS;xF=Acdj8as z152%(#;)E#Yyv^K>ZnToH&@cZTcYm1wH58k*rp>cX@lqZePI~!Z~MsUYZJ3zlZQqd z2hGBHMd#x@DB)akB%PfM9J1ncYTcUZI6pkN6N5zS-8_qqIB}Og=+KldI?an>m49^&|g95oa)+bWzNhggN=)JQK`AQ zVs?~_ zrM=YZYoif*bw40TwvC5Bs8$DQ94AB#QdY~+#eUl3%1o)!gPNQ?SVlUGIaB0Dey23Q znU(IH$K9TlE^||Jb#{TmfOb@afiBbYPq3R0m}X3qz?Ek@n@2FB4N5|nbtXiOc#-D| z566CAlgfhmqrVmD?fXLZazHYiu;k@(Yk;DDYEa9@dSsne$h4w&puu=uyi3FS2Jjo# z>|%{@m`Vf`shFfbE5HI7t7kopGrm4;2agbM$*N41Vw}utV&col_f_cztKAtP6F{}` zqCXt|vzirWiPnC!b6w`br@GlpeY;8na0~(YQ{~PpZ&X%*=gv#OaJaJ+UTMK2X){gO)dYr|8+(UFeXbr~4;^$PlECt^uxvI5 zYs#P}r|!7V2*$cqCWz|+*~MNEu2gq)v#DBz&0Lh9bPj0+BgG$gnf;TUZ}mGl9w8ZR zp(j~j26r{NL@g#AF}E8A^Rf$N=vUdp?wRe=`~HhWxLSN{HBufL)@0CIQ+9sT(+#Cz zKXP-6)P>V1Qw#4*Yd7u7TJ7*b*^OYPE8$1DI1_({v@Do-EFr9#;|^KA7f@D2?PEs( zb|MtB+N=_jN2vTXk}9T=G>VG9oSy3FsGF9>FtPIr}w+JN5|N`zqN-oTZ?VnGYzY(bie@Z&Us48 zN>e}hc)}85xU$^hjx8Cz$NI&Chf*RlDdmkf?H_3`*8?Jay?J(-M4tV!Oi~(ocB3t2 zDgBKEKWCoTrTC*dzg=pQBbJ7OpIMzozuOylEy#aTjqScd{OK9htBMQt`LCw?B;SwG zCkw3)oq*q8VH4(y^UNjWrz=hgH} zCuw&p$%R|zUKY6z7mIq%D71V;wPpWQP@Y!9{RXiFk?0@J*vUfg{QcMG9X!xN5$pJo zwusWH`n=pvPM5tq#P5Ars$7jXBP#QJZD*X{z{EPe9GpjTysvsq&mtFrgt`p=)ihSm z+|xPsIRa_r{%V^z{B>VR(es^w=w3=KdobU{VLsVr(*0FR zsrqM!%z4dDcl?gCeR+{KJ0E)}jWh7>Iu$fiymhY?ssWqGamCRFQG){2Aw(bMUs6`H z?w^W`P^Rg{&5gfGU(HPBUNtXC&MH{*e2H4ChWDtTf48xm0LOren_Ic2Spm@(+c}|q znfE@bofNrY+)s-0=yVZ%eE+<$VYoVI95VoW^HQG+XG2aR6qH#xSPlQvnDw2 zV2~vh`$oaGRFlDXI+l#;^X1$eyN)wQT*7k|=kTH@EEo22ZG4^5dUG#H1^l#&=ojWk zSb$l)AX3{G0T>l8^l1_>Rqw=O=yyT&PHwP#X~;0RohH`b9QHlf2^T@VBwcPalIU=` z*y$>cuSMEv80h9>oW&Qu4hFe|%l{^BIXUX5)5|I=;~C%NL14bG9Z7X0g%>=Td5G@R z)medi5Yp-}z0ol66*2tvN%XVn-OP6Q%k6Ha^H_!RiHlz(*rYA$*&8vcfYy5TDhH!1 zV|3xuTie}z@YU({Y4w=o^ZlV+X;z!J`+8QHIV#D+=G_98qY}!CX2Wrt+IB%%q4%lM z*zta&&pkH4yw<|zws15_z3YI8VZiP@8-vV&5DD)$KixYr;HF_K!p(KQ#5B}N#Og8B zo}i2Fx!p0M6LnJAs&SsspQfq~YTmnV zMBhqM==y%RW6e0XlP<)dORXN3EkXpU8}b`j22<)*nW+Pe>I_|2THB(BRr7t~&1!e+ zmA>lAMZ-KbA&Z+k5oQK}4a7;g)AbggpEnxIsydHPajy*2r>$(pF*4sdFF8csU-|M- z1db033=gZ)Crn(~JQ6eDqzsv5jXU563L9$iPPDE@ zhiO0eujf9MVK9J1ks^Re2_%X`D{Gl#17S;f*JmpNDTEYr7xgQa%UfPui6gILMdu?V z{^~1pHSwf;y|*mz5lg{IsfH=d#gBBFWB*4(Gx(1ys2< zqRQ4gKT%eZM5$1fXeL7-@ExNR(`dNSa_dlKEpVbVT*eYs8~_B<`YUv%v6N6PFE@Km zOp_n3oIVS93aE^gtURMQ&^-1=x{xlF*pcr>)YTMbfWPutW9z8o_|eNZdo$#mSzWv~ z)*YG}n40*`8bQD7qp2F~PvgJ1YPUSBg_BnByV;piyBvn%%f-dD%bx?TFj#nedj0;* zMb7hUCvMna@qA>SlafO&5F7ry8`kf-6TRrT1#X*DtATfnjUV4QQ;Hg|URhZ+&*qg` z9W4mBb7KC`X>v~#=8sSUQ)kfUR%)^SA+ z4J#CP5I%E>&O?(cLzPPd8W7nbA0V9D?)H%1aqp422C%=cNE#oM2AFfAs? zD;hzzi^v_-i<+*s)48$L=b>Xz*BYuT5TBi?bL^|H)|d3vmkMek$DWc{MRS?>W>Hsf zOU&}J*bj8MS)OdZMdtIZ=HmCN>L!%ZL7DoO7+>-4fAJnvznV}vtOSv4WhUlO>hCj%)70Emr1aD z>FQ1D5_hzc#`k?_(!GWji;3%7+YMBr-@vnY}`Dr#?u zR#OynBn0Cc%bbKC$Ea)1;>sf|jb&M+Px$1Zq1l$HQ>s%*sDeG($5|z4V(jm{8_45z zT8lU}gV!T#eBVa>MOL=b?^8X#yA5m9p1UzZt*MNsbS~E#J%P&2iL}t@OBwCv#i-+D z)2Wm;Yf}AIcNP~9u`*<^bt^uS_gexW@;_5__DG?lTHY&UqN1ZS(|gb?!JaB;o;l1*vvW2>Ue|0h}0fHjD;Cqs~Reqsg5?G zOl+?60{!` z;TW^c#;#wG2fIgWsWub+5dbrD4!ron$91W%QgoXx6x#fDjEB zJUXJ_`8l<^@2tYH(%yQfEhsDPlm4}5hCp^*-ts|(f>NHlsPr>ZD=SmNN~?@# zTyO7Iy{@|)IX6-cPfTpAa;MKz>IuB)40QU~h5VwC-zoc736KC0s^PNBmBPbg_b3T3 z`B%0pqr#KluenDc*Cc#)Ux^W;^xt&&EN;Z@a|w9_VtQC?NpygBSWdb=lfzdnS2NTW zwgm_amA5xP)XuJo;a}lhpu*Q+!PJ3{!luFAZGU&)!#iO2v^@W6IO}reBKpKm;=~Tp z(zO3lIw{FEck=4EWM8;Ld8bD)I;w}L=@TiJ^f!7 zOIyy={=sYobnXsAp{FT<1alIP&Fs-&0s{a<QSb?z$;fj0 z+gV7_LR|kB_QH(o<-scJ1Edd;S|R+7d9|W~gIQleiPc|1@A0l`l|F|S7RVlI^WFC< zl{<#&&~A%y-PY}y%{4#lsbFJcZ=RkSOo2ZXYa1IIuSyBHL|@y`c&5^llO%J!Sl8W) zL>wLnxqQ1FhtK|ji@PN??ZrSWiSiB(2y0eZ;w~4&($Jg|lSIk^{Z@7V>U!H#X46W% zH)VElcV=*j{-QRu^5O4p0whna#&V$bhQ1YkT+pGa-0cRM_^XpJq-C`!@K+0o3K5kl zy{}?g@U=e<J31XwW89Sd?Dg-YvS7 z(4P80b6cC|*RQrqjdlyQX38Zv0C2@8WhHp49$Ees`o_pv|JkxqpMtr@v4YYV2B^N5 zAQ|75!BKBGc7Fvx?>GQZiA}-5CMvN0voxQ3@!o4c_EGek7}GoW4|mYXyeIvkSWs3^3Q|Q9dTGo@~_`yM+>R0~+0Q0)& z1sYogpH`%}<~512r7cenEV~hmO;ws2;Sn>RzhOc?2L%Vuy5M7$R)Ut)&wNCa5TX1R)j_vl z+!w~&@81o%vd=FpWY%lt#vzjVAJ3G6WxpG(1IQ?30 zyE2r4$Z4V|^tz3p!o7w|B9XvDRZ;g19PwN?{?Q4yS%`{Q-*U=+ji zx>axxK8Gef+_}{%@2kNF-3Qm6tkl?T&Mm1GC5EG*(07C<*Sef85B{(l=ZN4xUiIc8 zxKJi<@x0wrdvV-K*b3BL!^_~Ozh2CJ6PgH9tm(mbW7J*-N@5}>#7 zm0?vBeN4N3E8G7?@4$0Y27$9yy?R9<4ARi|fp-WQ{b$RKgsJ8P0C&iM35*@S|w&C_;vI+4&1b zyA*9rn1`}wEG-S4?*+f+@I&$83l8*!7B3Cj=JAekHZ9+R!a))7KZZYE^!dMIJ>PBU zK%$3-2Z#CUk9iZE*3-1Ctcn%t9K!?7@1~$>wNd}%%i9aK-rGFre1MyNSA)av;(KwI zJs2XgS;F8#&u@yjjrJ?8UiTO6?XW+79gJuzYG2*?99DU!C&znZ?>rIGRqbQF3#5OK zlk(f_D|+x>{QAPr!J$&3Dlt_B?Ey;5&O$djb=cqmPAo(tr+^Rwy#26bOWBE~*DQ3V z^Ae)M1KEisWl<5{4A7D2Qp30JRc?>VG@|C`XKF&aL-_ivVFA>2otCArt(N_LYXra3 zqbAeF>lYGnpJX%85(qmhP+~M$%q$vRUv{uVP z3Q-vQn}SyQ5MwiyfgcPC-i$vIJD}Oj7T$%l(o2)ed|Z!i%qVv;L=^9j+O_SE0$rL5 z&9lZ-x75nP7Bj!(-wyetmzXfx9`JwssGF&(F;h6Ch?6vipk1ZXxm)c8wR>`9D}Xa; zWJeiiX?gj`bu5n+j`Ke!EeCY8+~T}Dnh_^mfZ9X#{{5lL3^?#!za}!swn0}8j@qfz zj9wwm=!!#hPsR057YwSO1bHVMB6_==E9sieQCNa^dUFfDISUjM{?83FZV`zY)@O_@ zYMJW80*%ATLMHYGoiE;0j<+TfYP`p_K`RKwmI@kC_WIEPrjt=p)Q7BoahLGWvzq^R zfsYSnbJ4I_n3V~TfN@)fBy@fG1uVXWdrm)j!@1ryeIuXaemmEDc>BTbWSw%=DZ`VQ z_8^ndW69SUcu-zcrOEXL0dKKJgohx+F0tjsx{wm0G7eQC?ERd2bR9_LaJ;KQ%0VZ7DP9-G^MX!3Hm)>D=v~ z(hR9WJFg4dk;BQ#z@%%Kg- z)eKbJiX7$tFK8m5uB@#5d@K0|nwqp_xRuZ3#u-tJmw$tsT3NCWO@fT4Nz2GAwSi8P z*-WfkDUraFt6gO5fBgH0bLBi&&)%G-I3O-1gL+T!%eOS0`GEOPe3Yy;m51}iKaM81rY7hqX41G1W`_F~_}O(! zd75k{TcSSnaO~D$-%#NZeScT+VX4}LCpORofAf{=^h=z2)>l>zgmF#9l4{CSEzm|l9FRI)_P7+ zKM?$Ia0R7Ru`K4yeOiZC9JLTQB+17l9+UqN^KWR*O+{1ly@|^yEahP09B5HGBc^CJ4w#wk8e}>R_L)iH;cN8yp3q0hzoS5%9hCBd`w~2yntW7LzXxOodh-*H+ zi;5gp6lUUbI=8nqxsnZnj>j&6C$IGE0zGlx@f39y4KI_O)?F-5^1nn8IF-k!WlU<& z9dw@V9L<*d$AyQCM6+T7w51#lV^JkXV6erBRH=H^cPX&&)KaZInSi6-`D%Y9&W&ch zL!Co!xg*0U0}w`HVY_xIa%%ynt+^hK=nFb~x`u&X(hyqAx6>TtWtGtX8Ryc+ADYab z{Utr!Us!lUql=DW36`K|_&kO)WErM6s{&RZaIUWXr z*CN5@!S;Ev-P4Dt)6KVA)$=poKh74N@Om%Q`$p$@gK1qMGuh#6cJI-+@xRPN^Hn5i zv*AUzr7C1tCn8`{00Yn`Q_vZK>0e zmg*f7pzvWfz~tEF0SvKV8NpkVXMIWfY+B1E4?85#SOk8d2!VwU{sYo_Ci>lui`KRE z`IWtKdFRtte?Wz21 zbciP$y}e~QWinX zwj4JR{1G5{09EL?h!~20;9gDo^!F%;Rz<@2gcr(bNO$RlYy*Hc(hOaCU#M zR6Tc|1TV*xzXuUDz&yM>$ktHMo47X4zgL|i4Ug>lYl{UteBmsA!Ka?8a@Cx&QdbE) z(l>MM1Na%o=p@)8FJ4G+UUgA*FFdUV&hHs=6HGsmthTv|eG-NT#3OzAo81Gk0l>sB z#aA~283_E5p0VO%BfB7|Y@hdDj%Z?aWX7L*M6TJx>oRsqD(s@)wNlJNmVE8+&kGXq zO=&)zter235V9P>ww&$>M^W>D$Z#|S-evXY%XL~g2j6km;O59{OPj{vZRanGG}%AZoY8|||5@EqtXI<|HeL@iE<8}<^bY(#KKC#DQ<&)u z#Ri=$hq8_DnP3tS@H%evHd%e7;%hkJY4SSBS*yaIyRwAt4r{QT%GwYD5|F2|CE^jE znODhf2&vfTi|l+n?k#RLgP9Y1MkRvubkK)U(1u4%&!SuMk;00!8v-u73|v`KZ~hQ?z`A3teS63= zPM`=*T}b_G*qZD7{MywF-L#9tXcS?Cb1U3FaRKJNgDTH^a=G%Qu{FV=Hrk8n#7!;! zZ;@qb4#XIUH<8P=QH~*YdR&!8_Giyvn1u)}ruPb+>SX8zFym^aUW$E@yN~xhUx&6- zEE*-Zv59qfve21Uf(E)pMyD75Iql?r3)MdhhTnCqq3YMBy*KS=#bp7XP3}Qh#-uN`@l37uUb1rYx1IS~(7DU}@%Hw~~z| zL!QRyzV_Q&t2KTIN?g84;ZCd0^_?hGd!b?N;P5%pbony!jW-ylk4D;=fv!S*)Rdz0 z{B&km*hh@`Slg$q+X+t1;a7D>gEXwhSf)D^xf}^nt%9s)tFczNnS@!)=;E+@Vy%qr z_(}ZIethAe4G>fT^lj4rPFDlb1{M1h-M4jMcnSy4hu(sm&1W1GwC*ed49ZSPbr6L&z{3g*%uz~FV}=2glfh!vLn6EYKXW*m%rO5Ae3d) z7QSO?!dp-8+r{_t@<-dV?r7y0f8a>7Pi~$T;O+D7FuV1idbhorj!n)!1mzdv9n2qw z*sr##_kw2MBV=_rvR#-fO4k%lPFf?Zm2Qs07YJM(>|YNQ+L{}C*QaB&%d0T1FsZa4^+C@^> znJ3r11zxR+yCdu8gf@>!gI!Go9s!o0^?PIS7c~jNZu4)VBjZ_CU4k%rKKdPXEnvl8 z2nEkz`*1tc?(GX4v?ro)vJjfXyF}sR<)DqS8f_IUNoJGggoJ}q{5ceWhE7uceI&_T zS;*d>sUQRtn|JtOy@cCZl`VtqM{hQ4tcKu=DXtqb@`T{sD4+S*ayOI_YQ>?1O&8Vu z*-r-Vj>G$h(Wr3cH^wU1VgU+A(W5i!x^46JduMKm8*!cv^ahlX>;|6KdpR*t9Q~EN z{2$VB(o(%xHjX;71O4AW$dfwVb-|653=L658=aq<&CcRVm?!$;4?2b+AaP1L5YY; zVCdjrFeN0KRRPUzYz{<|^L}~D#8grEaFO+8$D={rW}jxweUhpw1_7u*&&yIWD<27M zLqUQ^1@_RWIPl+067*OQB2-Kf0`s=$T@Pq}CmlDVSN&#dZO>_94{!i0>qeFs<)3E< z1Km@%iQ&SFfFlAz< z!XUzOs8--bX#$-QkR-Kcj%`li9g2!Al2^I_t)N}8yr9xxN$YFo_4}0^*vCF>0lLFv z4uQ*_R^za|8CjVZq#K*=bsqG1@gFIR?&A>V@HtVg2hDsl1vl;~6>b?(7c9;##xUPq z9fyjzoy6#M1gg=}O^Y%Kyhj(LB`unr@BbP91Aj|>rJlDg27%*OWXtWJ1ZN~`t)mj` znj)a>kX)C0muJa3fQ@twn5%27pnxmb1_SSW7))3KHv&Yk)nM1^n`|MULI}QTZ zXG_P{yDvWUm$1ycONvSsHsUHQ9^O`Jb z1WmkJX{0)9b>@+A;A~f^t&XQ_FkP0s+-_a{^HjRPdG1I^%U_X};9*?o6D~74JF{d~ zywUkJ5hZZuAQtSFu&71nd=ZJEkE!Q%aVklzQ_o6mfHqn05_i$;64m?l`83ylRWXHfy9*Oz6vRZ~@)v84XW^?R5?MyrrT zg#iX3{5>>=R$K6i!iBPC-lt)$wrE;ggjbz6Iwoi0e0wGu zmoM>e|8p%v?yy9KdtAQkQA+$e)Z6@v6@<F&tbANvE^vlt!px*N01_sfvHYPROmHf8zQdVSeziB#VXH8S065IOus;p zB{Mrl7plPV>`|fi5!tc++vMIs(59;wGlyM5fyB_2okCSA9bsK>YkNp>e+p74*2{KV z&n!vprFE!FOtnst%|HEb%hDw#N5i zWDw*Gn)>KyOM4fKEIMZ;=scfW?l^@5SSOqBa3aV2eoxgY2>qIyqK-BZWtT?!|5u_@ z0XRGc9)_7RgV!;|(_YXfy+a)VHf0XaSgCMbumI_Rjh8Pu{Y}yo2HIKoeOrJ2h@a6h z6HNJv93fuQ{@Lk*vZ&dsXKaEBxEP}Q7nd)MQoZ+Q^OErO#qtkq;9+Td1F$ktk}S4O zr`mf&tIFk8>?|7iwEicL#*Huq_I83qb1cUw4q_fp@$k_!@nGLc;*a9e z!i4fKll__6?)s~NH(Z&0c_8VL+qBz}dc3eMzsNbK>1^3y_om|^QJxVLX=beV8iG%A z$Y}}~gw_qt3l|xQ0WTQ~H_us@c=r7IAMhqLu@rYD(_cFuFXfL# zM2V6qndg79eR3ay;6SKDA<$!-Gll_2GNm0w!}91Gy|XZFqq!E#JUBEs3oWa#T_&zE zJ}uhu&Az0)TFZ7iw=@3PdwjqCCnf1s;Ty%G=bbqa<=z@6riuy<=w7~8!1wOQ`<9v- zJ6g-N2OTqEs?i0oGmF3e{raQ`oS(1=4KMl8fr8JFo3$&^qE3wC*|Zs5%p8+=0l^<0 zhmHQB=iBitfxMI?+4zO+mVhcY57I(ki7Wt0ll%vSsB&?qRm?o^>ca$EW2QwWG5)nf zd-ln=j)%2mwa#N0KPUjH=4;PVe|{H-SMg-#bDfu!MLD{wlL~&MYw#FF&=`oNEW_QF zQ6*|jL3$mQWiYiIpPW{V7cqpWjJMjA`&l)Y4p94zN8Cz4mQO^8I%W&*pcKP;S}jnR zLUNvu56up?AXB5#fbHp$$>NHmvuSmn5!=6v?Ka3PO(l?aQpt6;DK^=8FL^6~4D^;8 zuH~LQ=$F+GLQB5AAV2cjJiq8Z1>hp0A8h!dM3vwd&;_6Y9Q`=+I{CK9fD)I?r(U-oRa)&M3cR zpu@oJD8ac~X&FU(gZ#?nXN!JF_J;SEGy*)9&YZZdw;R?!v^AtE8$OEJe9<2^pkbTC zA`sRSUnY@4K$u1b9@SJPCGkXkv~l5n;UM*d4U$f9&KocBj^Ufer@}V@5-*MjD-0rK z(9TtW(4NQ$+deOuif1Fmxs6*v41RRppv;6yppEe`T!N}RQu|Tq8<+bK~UX5`D%4Y-#Vwmx{j`DvwdF^=hF_)*J}hr zrC}KdGVJc<4u=M>U1uq811E!U3IK5C)->Nyf9j9EV0vSS8ob*iqx4pd08P*E{AUrK zT9U-j=diAwAiZAJm>;(^Xt^(psWxcm4{n0*eq7#8Nei~l$5bsV&(+I+{y4+2NW+p9cUS+^sTDCh@(rc_rms&im-!kl2l`)OC(6U~8%BxAunUv?8}< z#f}2NhsBdsj$0}iztlsX)A5^3bEx7z{#)nO6J!Uf)^T5T?o8vJ916`FY4!9Na1A-_ zv~lrSzKd*HovADqaypASkIZ1yoNONV#m{I-Qs+)-N9LFvt5k!4c5c)dc!c7bQ}N~- zwOETa@I6(e6CCfAV;)l^2w9y{1P$+4L7p5HoHwJP&bB9YMI*Xnvj)SqOUpfrfd%vt zksbBxs-JOGI#$p)_MK5q2%;}8-FSBColZ}M+HEY{xF{2?P$WC=!L5z1wK#0_DXJt- ze=Z=-1p&r>Q^r%1p`BA3j^iFpm1q_pStrv|=_N-6miJMI{DXZ`yjs)VoXPl} zdE^07bE<#DQj>f|%RTF900L25VowPFXO>gGR9XGDtAt^GREYaC=4q>!nbYt+lMX1I z>1mL)tLd1{v))&-i9gCsG_&I^-_Awiv2WJM*V^}%#>c`tMYSwJ(y?hCw+AQGFt?dL zI&@7FaWs*Zr#sd^V|QqDg*x1S?^B6$3G@M3rySlpzl2A0E5x(uWWl6PU!F3Fqorm3 zC+BOh>eS~;ZE8T)BK!ll`pDq93t4ZO^nP&bmtU4TIXcS|eqr%>cL4E#fD+I}U^sZ1 zRP-U}RIks$rVk%6Vd|0ylyN;(j2>C8IXTRrbHgyeNEUNB;8#<$eJ4v-%JzR!{l(Ud zc;Q3a#;BYS)@EJi@IH?Rx4#Y`TG37708}SgrP(AvL;u(`gJKEs2Fr6&2RxZYQ=l*D zYRm{t`WqK&nokdAj1GH83Ix)n9Qlx%;$ThS+HcTUym7`>%h29Z!jt&q({3F5T|ott zNMuD46|N@wyK0mt8W{Fj^-lKvFo^x3E4n0Dm;O#dl#C$1_QKF2 zH>?CUla7bQF4P> zswsJi2`xKx>LeqIDMbWQHJh?J-4j7dTO~{bbuO^P8a!*K!QGeY(vi!A-|6>x7Pg+gqGgqs;RQI-+CmG(jdZZ3?%JBV0W*~1W zOvhCGaao58ZM&oC=?||w8X*YUTJ@wXm-9%%MWR)i{#39&+MS7nU!zYn;hJ~LpNB21 zO9yLYnCqTSi$@p#B3Dw#h^fQx4!)P(TEB^3Q&Tbw7cL-$M`*9*p1LP!nlXCUQ%%(Y za#LWy#n|d<{oooJq|nJX6-1(_=2;r5&onX!M~7mmSHr82{_^;4#D3D#HlTcdGl^WY z4Ro~TGu>StUouhfWAtF?Jx`&t)whA{f{Lt%hG9RYI;a}LWwI!<#f2`x>0res0uJSm zRbc(Xul2ePH>F?$5qB}GIKnFCy63$E#wg_bhs0Nm#8)?@jeW5FM@Ulc zPHO3PYgY1?XPp=?9VrG5pI+W^1mpLY3*!B(fQ-ZDnTHp7EmUs(oamB?wrb39&2zgZ znx0*e*@OXL^p>MCh!t2>>nf-E!!olN#tV*D+OE6f$b)-lLG~;x>Y=JS+#YpodH5qB zrQi^~Ro&tmm-CY~P(Mwr+w@4p5;^FEwJVq*S)bYx1nuoKK zFDv2E{u=t)p}~0?0{!G7&%09UbxyK6-iENlQfmp?rN6?dNU9vP zDF&PMB0obiKg318;lz9V`4BXGY8{J9LQpNDXJ@`SQ6YIcE9oGp417KgW`4O&iaL9{ z_p9_%MV!OhPizeVr=sZBpIO>~-s;2a6z;u)XX@}KVlIM?FOaQ-&D>T81x~>4cOE5i zN1I9ahP%A3$mcIUhlw1iS|EmS)+hyP9V+s9IK*)<@%GRjxP1T-|P@&$#at~E0 zRIRsLA2lFnWsfiK3g+8U_Zwpev{;HAB|L1mHS392`~3klp@dzVeqh{C->VM)_KCEK zz2vkB#ieia$uMc~PPl@DG=1#La{qw5ERJAySC{(f?Nz4npUzdGFwD{L*>hG_nUQ5W z-b%lF5cl~5$TW4yQ2vYTASGtb z^BAh{KFulMtCKp@5%BH~K@J3LBp8Ja@KU|JqT{HO5P%f%PATt0Wl*ATr)6|9;QaWZ zeZBj_p5x&d?KNgnUnZ9P`HVrQK%6Cjgtm6N^s*bEB*9h?f^-icc9x5a@?2^gn4%@N2iysO$Ue{>SoSEAboHE z`~?H{oPoO$d{pS&uCt8H=T5%DdnMNhJkE;3p03bEym5feg6Osw|Ti93;)gP}e z6#tptSxYqChq)@cXnBdUHB(i${?p<@IMGE*ch+R3!Qs*sQ~XGZ(qB*iz|KZ8gnr3C z$JBoFFz<{3?L4b1oFepeR$WP%A(m*C;6Sl^*C8eu55%M!&D49J_LCk*i{q&B>BFrR z1mSDodR-njp>jNP=fT_5W(sY6j0Qpr5wdIQs#SrNOjEw2z zj?1~6G?s9O%J%v^1-_U*8QCyU1HkmYNKW4VCWo_xeWW@}*S9bC znM^7V=r+DCa$oMz+RIg59yXUv0Hme8<<{$_ip3T?1up<#G;=aGWtS_L3xUZY8wf!t z5cFirK3bxGg#B|5>>2M)Y_@47rf0Pp%2mUDAjBD10k<4_&QBnL@nl8*<*$lcU%|p8 z>9|J01g;V!4dEiaa#%zE3vsuklfkRHMU;o;_#`h}4%c&iC$9R0>mFC^N4)lYUEK;wA0dW{sR6vqytJWRX z1YK+j706*N-V=HkK}B=twjWLXQEEJamayoD1|=CB5YLz=2uWE#8xG%?r0$&@E#b7y z$mLST#hSI-v{zpj1;YS-P#wUHkNTj_&yRez=+PtVj=ZuL8RNEYCJMKuxB1_AT^*(( zk~~i|4Ylt@rPGwC5CK-siwYLS0evB6nu4b!{T4EMlR0N-NB$0Wx_118i!HQIT7*IM z4J{)}JKNJvU~PBJA9oDj7R=<#f^}tli$n~r9NBK2Xo0ucGP1wmD?>|MXNXztPmYFT zh1-?64b|779ZF7{X+}lR|6~oBQysng4)(-jIqHYGgjI^`VwPAKvyP=_i zG)mpBLi^CNA$L|wNmZ9N4E8m9Jhdkk=CAfO@^V(xpO}tbYb-CHPg*RNZ<&#jPKCx+ zWab#=##DK1PENWD4Vv{+`G%@%%YL5Y`VyC=FM2fX=4W*fRc&1ky9+vJe)c#W73v=? z3vxC~^^o64@SoIZU%t{MG%;hTbXElj8k;{Ob41UQMe!?e%{8#9$`N(`sMB{y_5Lz8 zy16~Ew^Wl?zNw4(Q5$1imYqs*~a&g<h;>u-|&{=*W%@;{;HyK6&xduE*1xUYk>E6lYL&8$*vjTwWD2!hwozl!F>3~^ar z^FTkl6;%7UJIK;FFxmu1?+c@Imv^Ql4zN4bGx%=9HO>$%lMf$Z;HfWt-GYtMtsYOA z4Xdadh#)x*WAf^%^a{Z9Qw|zIY)-?N=X9I@$Dbj+GpmP{~6^hPMGFfluFk%62wEhA=l~-&@>X|U)BpY-BdBA@YP!| zXgIoeQw(`4-h?E7cE33VzkAvp)x5rKzVcK;cI4@PUU9#$M+aE_6Gv}|F29|UM|pKx zgS6CPv!g6jX1sXtJ>T*PM^zkKX$pAisN+2=zF#;u8WgMeHQPvysnaO_gJmjlZiW`- zti{~*rYtMWX6;iqo&-(OZeT!MLVDRZwVn(VHJ^{$UX#*XUD!y6&$B(VTEi;4yHrUT z6bPmMs8-@7vp?0lO{S&_XtTnaS>?1R!V6RKgrXbLgkM7TTji`{=-D>oEyha|oV3g` z%azf-xC_Fur?iAUyl?FDU?I5s>DYPHJiXP1k&7Opi(0m(H9p^z*3ljCefk@rHH zMNjQ8)OqbI7+PAJ7N+XUE!Bk&Lxe#6?!ism&Bya#N5({Dl7LxarJ<}-{+mn%nh-y-58F5Gw*qPzIO zAyF6F+{P>0Z}3Ec^oUL3x#yJpirC(Ckb%~ZP@BPX9ycJ1Di4F(aiHpfMRi0*{Z}lp zF>-;wUZ=O$??4BKI=S#p#Y&6a$HmV*1S zp1jvcaLgLq>}}1nmDXu2G_M-Z)~Nior>&YrSL(o-ha7;z;N~w*e_DJ!xZj@AVA{$SPnzOH~vE7 z3Yz3WM|$p!N^U#D!Rw{*<8>fme2wtvFQQnU>fYSrGqoSdpXO{dDUGva#;4B9C7^$* z?hAfu%}GPe1(g6GQ{RTp*$m`(g;k2zMQ55z;}B?wcdlzppts5@qFc>cf|duIp#Js2 zub!WE2I&XY`V-42Jk)~!tWgu;YGe@ja53yts(+b1%YL}&rDZhU7bP*@K+Z|4VAV+F zBmZo^D@CKtJSxiO=a9`|PMYFtcv{$rn`m&FCix{y;Iv@DukAW{s&BXmm%#A6yS9|v z&@YW0pj?uXb|3p(kaP*T=9=V(PeE}-0C^=RJeQm_#@hx!yko3|5!K53T`@;X9&V5hhBzA=4A*eFEaKmKMDBbh zF5ew}7`iYnRo}f|ss6rgor0Y8mFTOnZ(ri^vF@?Bcq=PkUhNL#BO_-#KD8KEWr%&& zyg%;QG5`s8r&{PSrf&^mQ~(1e3_#+@Mg_rH`u36;>%*aUok>CQdVOAB<4E2!aHMO^RG`kSzTmDx+Yr^>}o@rye*G zasfTlh4G-Q>n{8OF$u`AA$R_`LLBvU!jB50!68@c|M8#yIr3`T`I#tLu)s(goN}(u zHe`vc)ufWDeK6;>v0@?~Mt!!oZVkfPYjW0__w{|ePx%hu-nb{zTQ}4f4BBAM1ogee z3=B-2&w0ORR>HXq_(nuU%xul9QV0ztCrNF#y*Tr?CPUm9k zw&mnz|EYNO)@NtrCiuGhV8<8@O|g*8DZRz-`g||5)@%1PA-$zOJQA;BZUyc)+jh6) zjoCtw(mCtvd2$f{)n>KZu2SS;bgGj-$Se)+U|rw&+umkRAr`M@K;--=?-c!|1d-s$ z&Zry?qR*`NEsdl&%*&_{SR7A^VqY@uIemwdy z2x%WH4HEc?_#E#Lo> zrnX(0_Hv5lcN#&n-!$#La}s;vS{{E{=D2kB!IkAhS}T)2eh-Thg%Y;D&(hZ^TcOHJ z_7%PeV|C}5-)A8a8Rl)YBxwoNgw0nsXLVdngeF3F{5!i7Un|j?mVG|~*sWj9oaKz{ zCT0kgMExwTyhBORTk885ga_w){F9>f7}dl+XBMDJ-*tCzNs@+$Y!zfxz8GO4lX-LS zbG|(GMgOt&oO&7s-)c+gW!56?R95rJ*;n7wFztl2n zJc8`DsJDe>-;iJ{a?kamXQ^1u&*D-n#H8rHQW+4rtj6*;Bn&*d&brYO@sFmRGK~w~ z^c5HYmK|2^=WCn~XJKS6-r^n-t7Z&N>E4?Yozr5gd$8aRy^Xj*Y>kS$wZw~e6dXbV z;(piLosXd}HW;AqBwe{@v{9MR??|6{uJvlbu<){@CR?teDVK~$Iy?^ibhGxRB2hH*V{hC+CWoT?@~}(c57x`KkQAnqLh2Yg7-=#qa)`MqG zgMx;(3ysLn!9t+mL!HlV6JH6K{j$5oxi!AOE})d8)K>m?RR42cQ8-0pcUWTZVx~yf zWu+o?`ybUMXpOxkaq7YgtW_y5#uFk%!7JeYY_p`H+{fWXbw+A>BXM+-v^(y9J$i`X|HowdUn!iDvhf*HDmuV;h|88NDSsW zy;xuRKH+p^m%;c=xm6hN-jeS{R$sfZFiBs+LFPh}d*}VCA}1tk`lw!V$87jP%A)Tx z4$$UJ)DulRQxa**ZycbIO&xM~n4KdWZp{IDirT*JGAA`vY%-p`b5cAmWNHCcEmhG%$sKk<_$<1i`h z=k!GgEcjt^ENod2WhADcIEr>+X<+D^9%8-ZeZ+mE%fQix^regPD>EXY-Ku6@ zMRO&MeX+lHY1x9*@yvDt6S59AWPLT2$4i%E_&Xs$z@3dbDw$pGvBAUO<-Ch&424q1Kz@EJfUZL!PBRp zW#mN*bCILsHWnKEO|*`K0ZGh(8?@J^K6jtkfbVLyNl4IJJTAGs-wN@0f01#gY<>2p zy)`5;D!@xQ=SZq7b8gz_!A|J+O8-HZsD4byncia4jAV-j$JQ^7(6=cj6ZX3o_$m0fjn35%_zSxBdp=A zuUoHOSG(Z~E;7H8iV}0!XwFJWqy`MES!$7L*$tmM1hNRZZRFj_+BxUg`9jV^n#lt5KT0?qk2{;%ALYm4qOd z@noqVt(52P9!_e_N{Hq&ObwU*1$!ue(VbQsCVIqoBQ<|H*YydmT=4{+CCuY7)zEim zmd^hxifd5yW9+Y1@yi%!!5`|J3$x#uh6xJjsxwfocAC(WUXfF!P%g2Z(nGxyXw8eZ}87?}-Cp3o)hlb8q=}Ral z5L`rSqpm`l(x0gi<+_6Lz0MzKMf4iOIuNfA zIGWo`G6yf#_+5&w!SawSHVs_Jj*Klm?Mt+ZesCk^<#5e*50pWMg#i2oT8yx^XaN<4 zpO$anqz_w~Q>j3bWC-1mM_LSA3Ehzq1{+n?SKW&wJnA+X={8BY-U&wM1V4gm0uUYx zEKZcER{~Ed&ps;9#@%fiW=69yBVlSw`|$Iawn$U`tT-Oj#qfFvg)yL_44TO$<}67{!&e#q9I8sLNRcYQ)?G*N-aOu z=07H(_6ShKaAXnQ_P$L}S^DWF(en`Kb~-d|w&XkXr!jW{T+N+%v}QV$t3~=G1wZC) zY(1y>;MTYN#`Ym_W@{qqXDe>14o~&QHlw2grq&<2FX>)(pY^#wVoP4LFshWR(oPLp ztUga_F@HPpy;A)3Dlt9&_IK7yi#ge*3rD9V+NAH<$Vu~Q%d3OdP2=45-cEF)RMwBk zz|Zw4o^#mpU?%ZVc30lKXEW1F!WuQIogRJFG0#{%bb0<}$Yixm@5IN+U#;HjN1LP) zU{!tVwZA7s0h4{QZ{0Vz&~mo3^v>rX=c)iEmlSQX4d>oWApHL8MZ^nj`307kz3=d| zy8)GVv_S8PQQbz*^qv;?Vp~#NRfEfx_^%#$!g}}PHu#%zuQJMZuZNpE&gR^I`JhHr zV`GwTj}9LE_~B25xJ?*7ntz$WC+A1jxN(=4oOQ&;WCPfO={TYm8X&y&*5D9Bt~lqX;0E8$guO`cTFtg}hW9+&@Vfr!tJUS}hSaCaNXO-EL>>G_b%LO)Z zMho*$QP4C3zs^=e^`X9q0whZxLDU4Cq5*AKw>WJs2Jz(#UOXuEXF_$4docTCX0&=aK;}So!MEB&;OL*(8Wu3-1uYF*Sp{KqnHh#7c`!1tXJVLnb>V$ zZG;4T{!vmuMFrmx8h9Bv>-%)y`I9X!*W@!iDQB8TmetJsHI6IJIb?)c-vkS_%QvIx zqVG7{WnLc{jhb_DdPW+2Xo<<$vJF1SY1aPw!@?gZhi*2zw|YvwW9LUo zfHBL#3~lk#n{%<7U-1|Ew4Zq1C}+${I&Z|JS(?!tX>VpTac59>xT-a&Bh@FwS?PCH zO9uq&a}1H6PPL>=dwmAK0O0|1bK?4H*m!DeoD09_EOYj*f{(wj5#5`#lwfL}wA-js z+2;fuj-z3JUyX7w|KK#RNKrk!9cX3vKAdl$Ep zvH*CGBl#m~dpTXA+!7ESs1lHh15=|ca`Mcgx1k3<3VhYUOz8&x0RR*hh8;nIHI#rG zogEhN3#^LL0cHUWL#Ueo9M=!YXXW)S=F2L%{1%{INI8O&Ufxzm1!@n$$VIIJYY(V^ zRMAO+Y=|g51})$SF-4bqEJMIcxd{%$^+cDGkvb%)Aq7S$?ZET?f2P0d(9;+^+O2%0 zlMZdcH^9Qd2!wP&SV#LH?vz?7pkh!n0^Zoji?N9th}D({D@Xx=09Cp#e!5G+IDwet zB9TKP5VT~_GYd?DA`kgVLnB+b|8VXWxrcKi_se+Qdp-1Ckdj}2-F@=e^dy~9{y^%^ zOsb<+h^ZnyOrn!Sh9C~70fic>`UzdWfG_`V$8p>SWo*yN+hz&c*v{W>&FE_>5+MSV z(R$^&<&%`r3YEpBj(<1k`1B2Z%KCPP?i$%kLn_D5AE24mVacQ>#2XvBh2n{;)&r)- z21;`F%q%02^^Y{1eiq_4jlUPf(_DDGcW3*wQ{9=mzV|<89?|-6n$&-Zmn@5zhnG{` zEbEjtR@(O9(fYd4cg}vxm($ZRQM^Z2OwPRj@T3S);xkgL`dy=E1xZiP^G8x*W^4Ku z0{omcDxR4?!+v$+uH8~5D@+pwG$E@HOxfZlwV9lu@rBqabZt{JwP_f1N|SY&g482N zp>1J(t)Z^d&t7RmJKvYy?LLcf!A(lyf>(VKBQC2C^&9NlG1N9{67X85H^WYggqvdv z@oeVsih?24kE8>I{I5^V+S{85VGa3F^?4LYUPH&S>PxRsFPlZ@>_2x^icrJ!rEoZZ z-F@?GCFxWRI{sIO`A|xnh8BE2u#-7TB)nhbkaZj0_SnMP_HIeoU?FHw`TJDx`LH?& zJ9qUpwY9W#)~DgBY+@dCpi$uKk5N(Mt53#rKJ)%6`NIcD|%TX#AyRsi6yCft4=gH76W4D2Ly{xm6jW95wv=HeUGGAndQrR+cgJ83e z9K|O`r$+4-(j17@pQiZ*76GL|h;u~=fL#p#?S>ju5sY2XEiykcTAn=4cLk`HspD`( zfGp_wS=6c!1pgM?3!=-~t?C2tzpTAtCNc&WqJ%Y)Gz09*tjp55f&e|58wNU6+^WON z1w(`}0Ku^cRc8+@#fOqxG%u>u3NGW$V+4o%aX$f}%CsN@2x=YF9YKqDtcjuz)gKu5 zI7|7z9(oQiU#5}4v_k2G+K4F~UK>xD!^}Q2wQPGyP8&`iI@TGPN@?!#JZ7c`6hLq- zB-YS)E`T&uz_i6PO~UUjQYI=C17CpBNHszvDv?GjN*$y5boJzl z8n2`_w&TdX*@QaC)aj`0b z#t#!Kl1m4OV{q@MmFC!B&e6ri_)w$w2)SHV+Dr(MF zIujj3KVk$dUOLOak--CM&^RIIG9Uoa1+h7s)&ENMdq zinW|{vH#zw7pM`qhZw^sz&uDFkkLju>Xoez=nU`M4wHGzr2;PxrSC5pCGU=CV;Bla zkt?+r^_3B^F|+Xk4G zGOZN}A3&ZY3;Hm`MV^?*g677_Bn#=>;0GK{%&*;bk4i#P12mi_t=G%={G=;I-BQjn zS}Q^pv#WjN>s8HkDg<6=*elK6b>*0n&R+6djk4P8T-<2yhQse%Vt)(`#Fo6(v4=%d zrbQQUk%e6uKAW~5fSdmka{7HEO-f7}%Hv*#>N$>I5uvu@8hn6&dXy}qJrWWUJmee5Tm0#pmrG4aX@>Q_$i=^MC3(w^d1#%X?X4OcG5moy}8Y?>$INlY%$JZAK%YW68*$`&S$0Uo!IJ%@ZVxTXHM#tQ_-*k$mSE1}uC3NL7XOuP zG%9576077FJoM@0OnHgv$Ev4`Km*1EV*41j0maNE<0ZPRWN2y0Ke;9y#UrFYV-t$(@K!3yM95c8m^Emj2Z~WD4B?P2VY+HX9+`NgP3) z@lhA1%01mr>D_Fh1Jms!R-uz3(FRfO95?~WfmmT*W^7=P zDef~Yw}*|2;^A@9E!A%uDnn0ziTbwLq*O=f+yJ?-0ucQYct%p%`O*3GgIM$qJT={_>C{kIY)_BNuVK-lWo>jUs07~@R5ps1ijd+>jFDTCrcjUE$i>$!x^uz|Jh?=uM+qynjhv=hiw|g)MMNL^1 zHw!>3Q>gyzI)M*lfW8Vom|%?u&bORCqMAUzA!b^Xqqn9#qnd?CL3jw1<+)?7kBD*v z_p**D7c$7kz3iZ80bx5jMOA)Ybr|%TT=rT*odwzlZ1+ciVU!n8S)_aFwECMDy-7tj zm|h@lgpqNjJDMt5lJr=@FUfeA zZZoY|YM0p~KanUztLEtddV%~q9}bd3JYQu73VovBRY&B@m5wUmC0}HTeHZ0FEoqN6 zif_s7yft4(a~t6?YUZz7C;s{N;8kcw%~1s2lu4=K&_%jPF?=Sg8eh@QF{@0&g@3hR z?O`Xg^`e|7SY;~Nn%8lH89;=cj$Ek_nVeLay;r0ck(snT5r{I*>7pub?WQ$ox^%)T z$?r*XjfJ(=IbCoW4gPP6>6fh=B-c6{gi-u8ngq$M>)02L3TzLMO1rugm7%O@>^^^) zHSM4L0vmN5cme$?!;?$4jLON)Bghm#6KmHD02UW=?=0vkMu`D8upL^igqp+0z4o#YNhJdKqICv;(CF zbVH=Hk;n{}3K!{W(KpACQ)E68f#%x-V6{E6*&)c-b#bsX(r4d0`I(cG^D?mGXs-=| zWNdZv%FD}l3(NmClQGREp~^Wym|n?J%s|C)A!)&&Qgp|qrj<3DMN(JT+sdpGjzo5} znjYt!824C6Gtqm69yl8K0#FKv7e+}zdd}7>Bvx)V5rH4CUVQUR$S})l-pIcwxQn~JZ(UIB8 zTz32{?kwX*xjc%0&h=5IWEOz!NC-fG_01S&CuBS8@c!>r@3Fv7B}g&~7epOVdCV09 zKyCoY7*De&e-V=)<#@=&OP`L7vE&5GFV=Uoy25n47|B#Wj}vPBwcf&O3<5Jj&@5RT$XxOp z#1rEy$`cEzL)#$sVz7&haF$~yrOZ|4=C#WI_!U&aZ11N*j`lJG*E>@bePsQwmunVj zqMEBzjV|bxbB?FYWP(jGWPj!&XQ5#7)$&<(Cw9D4Rdw2}jU$tdB;}&y1*O;JME}@0 z;o{_cijn@e0Hc9b*<@`z!lLS;E zgYfl=2ZslmDW83^&|~k48Ilscb>?ELu1z-Gbxdh^`AaB`D%)HsyN z38Omh+m>IzJ`b)vjc8?VwBjREx>dZIDP;`78!pSftFWe4h^4bmabG^1xlPyRw)f2r zO?LJzSi5V--k?^m^w0h1zGWofa{!odE{skrn0{wrW=+0R=$BnR-L5u|sfE|O_xsq= zZR@RxiWs;pgjY-Pnb;N#HJHNNDLb~-K1LL2*wC{4g){#uRH>Q6bXPwCG6SvquRNm^ zy%=ASVh|u8t6yaU7#5`DXx~^q{!n#|@&tf-YwQaUaw3$Upqe3 zQiagAN!+wsn{Zd#_)rB)O1Ss$j=`$bjCZkGW-G3=y;RZ(f*Ne9oQ(weQx+rDzBmh+ zXE7NkJFabSGih3HklNw6J{-L~2sdeB;IHRW>Sz$(?H6rMD-b&${1{nMWs3W8Bz~Jq zmwrv_$K=3U6^yei{s^9IiE`4p0atSo|LxhyxDQQEj2JAM;SpvUuNNY4&Q?(@D3o%( z-I-2Ye6NyAlK=P_$Jf5W@%{*0GUlkK|I&ZRCHP*%IWD%pKg%5|X443i< zE}7uRCs4MlV-$6}KbPiQe;;VO22FKHfnI+1DVNGIzdDHJ2$PvTS~IV@l~P46_Fq%< zGwK!N#*_5qV%{`T9!*0Ii%5Gkt`(hjzfyUX;Lnk*>ce>{C0#k|%b8oW<*HK7CEnp@ zeMXieCN$a`#=`~AeEjd#?TZ6%3)S{S77}6yn7q0Bg}3vq};|_}y?npo#%iG6Vz*GF&?sUs?tLR|~OGtPdEM zGUB-;qjWF7ln;GFJf?sA62$;PMgWxjHTlO7as&|p_*p{MReCT+I#Bd*CI(_f*5jZ{ zkS>I8F1{N@FX}alk%jT(b^&w>faD>j#pceY@5nESU!q&d5akMLq97-^nxJ#*@4cb6 z&||dqM}-_)U8gGm$ktXBDwp8VN(d_fRuo!yva3!?tXznI6wnQT5W4gvn)7ebv68+29EyEidi1A2 zI!YykbD94yKC7`yjoAuzA&~9m&o`{M0UC^)-QC?SW5e1AkTEyP6&3Oixkm>Hj9@`! za`noSl&vlCktIJ_3x>K)A+PN(!8GZgDC`|Pdxs?>s#nt_LTlYo@iKU~i`Z1|g(Z2a zlT~{b@6h3Ji^)0u@b!(!I-z$z`ir&6mZn**$W}LgiHq;GnQ$?7WSWa;}A5*$J=tOTFXK>W6l7 zOI^heX7?_?dG*(_UW}c)YH;M7LD)WtHE+CZA(habST|Kv;G{bFDLnXlROc1!qh$u! z{O$epnA~=o&ENe6chUdfsiuR{-M3;A*zc}0aU0rn9wJX~PG zJtdX9gYg$NWfv)oHNmh0XV@-Ax2X6`R*VBWN;%s%-m5k|cy*<~XQQPLT@L1?N6Z7^ z`H9dctFdU~F$UKfrf1>bLXdL{Uowte(VcQ0f@*+GyhA9p1r=|7n+|#))*$#AKts^x zGLF_dScZT)p|2305b%6;dhx9|FEu&17iuYHsMZauSSr{K2NNMLA8m7qa)CN%CxjtV zR1pOR;OAoJqSYa?O`JN&<8d!0*L~(N%Ms z9)FwVb=EWE`4h5O?`pL*?k!2>p*CeGOlybz0rF(|Nev2Sk)5UhPGtC2>i!0@z9XS6 zLY>YXDWM`lji+WbUo$u~H0jW&S1#DWCX4wq)S8LQ=KlI_Q2kFG?%v!PxrEbUc-KkD z2W~$+c!;?FrUs0Zwj93niXUUP`Xcv2+Nk|F!N*}JxGF9o3q*hSy8mOy!}cT(@AlHBd=bWvLJNJwFP- zD+vYBLwVrC`c~aJ#&G|j(a^fP-SpX`yv8#MiLR1-?g+K(jrsGM-HlU2 ziofVeqU6!z2XTK>ppz~rpzO#fhFnC|z}MYJxwfUFNa0n>*Q1<8w%_NqRD3p#`-lgh zo;GdGfv{+rd|Mgy+b|1-c`F=xz79Sl)NL+{TynjHDBBJBnV z(TT|qH`$TfKkavie;B(+d#3nS)f8-osfw;3Ee1-**O?!~a?RDkZ+{_%f4RND@cZY6 zzQzLK7mR62G9B_?-sC9mO!*)tG<*p3VGYk9qnNn(w)yWp5kUgsN4aic*)rshx6SjH-k1z> zvA1R_9jP;h@nrWGEt3INOiwd#1J*jbs=d>0jsm95j029YjLB22XL3Rej;303bOxaN zGmZ!hZ2gd0Pw&+|M|7d2tgP3Lj^;-~tgPkD0Xs$)5)BU*Qg>ihH9yCxRUo-Y(7BI4 zv|vcNi89}oK_f?(Rfg(d~wBhG-fP5!2D+hJI!h9trm@Eku+RmSOuo7^Epaj+Z_1m>FSh(w zKfez$)ZN>833;w`-krxNJY^JHI=^ulZ{3qJF}Br(UxIr%I=cLxD^1AaBUGH|uT8{l zBv5XL54W%&L#Dto#S$vtM;gSN<)G!BS>+{^MfQ+ABs%S|KQsUW+!uCiZNf{+5T1Re!sQpnT)65m@C{q$fyBOdMqzeNQm{ znxgOURe{OO;Bup@>mxP*vKb`J$?ZOKH3WfHU)~e=n_UGCM(HlBpK&Z@);B!CXo+xr zz$dM`58Rsb$#FZg-rduw!o)G^!sGkg2mNbBJ_vHxnrBiy#x*kpIRY#;lPaQ=v~&DNLyzA(D>nBQg!{9?rL!NV$p5EPXD%l(tK zY>e~GtwM`QiRWZnbO;uxX4&WZ{={cC(_H-8fV!mXeeR6u8y=8#rbV9E z16N(@V9yhbl!P54fiScs`*{Q*qcQB%vw_G%5RJ{}L9*S)QZ8 zvgJzWfp*UsRh|OovpaY0m`-W&?Vx3Az_5L?=taYwzbUL51i^X;xOe>BRV03UdR1}% zWyW&G{pg3YO)Rx!U1LT;9NWw0fIp{;2XEdc0=b0#!y-rFKEJZf={%_=I~p_I4@Gwb zE+;Sw?@zA?+ww2nyS6RC3L<2(ybpF~7b0G;*VmwB1;hlwU_PuESTGS`_CIj12w9h8 z$;=9`GaTRX>5>2sTUHf*;*G^49u*Fy$a!>ZOqJ=pRs+^;O4MjspE-!S` z_KidG^L7lQy6Z)3#`Ekp^X+tgp=6HwWh}TA<%;~Z2_-j~xq|LS?UA*^FY8m2o!zEHMSR>5Iq{y1A@;v*r|;twEC@sFY$ZlVXGQ=K zC%uV#!PA7Wzd&V`&uuMC)c#5fE#g%oaiKNoKsnFUZ@wVvgTs)y_5L4s;p*9fE)Ic% z!WpF#4swAnZ#yF{FFyWOAtb z%D5;3X(nBDfSs)d1H6pAdmBP}rxVL$_YbCbDwkfH%?~v|L z+u!AYSQ_>YYB8QDl`;?EM+8JmMwOt-znHEkkG6FmEPnBGRQ~6e!}31m=kCZCWV0m$ zrIs!gURGWPN-@h2$*M-YQYlidG2swbA#Jv%YK$rf6$Ip8zdqZ_G0gtaoRku2=}%V-OrwlmABECz4(u;B zyD$E!UJUT4PB*SR{eRIHV+z(fNJQ9i4tK8cV77=hcl?2pgohI`+&*N%H)BeA9(Hln(D3^x;hL02hC=ac^V%$ePbK5q8ca zNv7Ki*x!ix7)UfYJ4R+qf5I_NQy$c~-EDO)X#lvbje{D$H2Ubv>wNgIHg}dGJt}!I zkIx^|y~+=$V-yX|2(wuz*T<2R0)sBJ=7M#a>H1THAw4H#)Z{aci95A(6r7h?B^N#$ z-TOg4`n^HiZdb{>q1LXVy7T3eAB;TD2#Is90#4E~ieBU0<1OnBlYHlBuJ<{m0jP=5 zN3Qqf2N)UOON36Cv-iD{H)+<%O87Us3S$0bLUoOCaVKSQAVYRc7p7@)=)ov-5ztQur zgF{z(1lJeptYsJGmj_eTX1(@!-g{!@kgKVOlh|7?67`kIygxN^2&a^TYa}=v2=oy8 z?ZP5*ey-N2KpUBSFwqzb-hXc%wQ=;fx0N=>+-<{wB^~C(R%$1~dZhe(i^I&tzbdM> zj!fDmB5A@@LV{D*C$u_=#ftxFj0A=0poim%wx6XT4Go$0J(IUWJPyjoq|k2>oRRJR zd7Po*2kJ(ojGslG0Xpi=?Q`?lBKMWxnZ3`639s-4ldxuRj7-~== zC#JpcLEA};LcvU%-$}wQQsAc2b|m}!{G1w(RZvh+M8tq4VQYW+_wS$W0X_i%0YzGV zd$Wy5nIH$QrlzJSYEdnEf(mnZ#~&HFCxk$l?B|S(fV1sMq_$so6ek}}%79=8VLnF6 zfNuF#wZ-=nSH=5D)KEQaN;s$q@pOImI_{;M>_C>Rn!=)nZ2i-Qwtk|zdQbS+xZerj8GHT+R6lg>E)tB0YEb!HCKln z^d%Q$Zl`%nKK?CW9wVs5=k4cIU8|{eOu6p+tIFSEVj}j2v3wldv87tMdPZ|Ky&M5t zDY`7o9Fssw55HJtiY@EsT!ZvtK+Za-c-`9mQV&m0W+w3$yPvx}Pm}`gA*yU>PHQr0 z@8nu(AG<;kdTb9yE>e_k#B#5A^Aw=0k#NtLVN}F-Tv=y%DlKfpA`d{; zA}e=i{%mG`Vry--7Zo)!&}q}1m#*cS3iyV=kGZk#DPma)BJdJfeUsYXJC2-5ufe4Z z`RmC4e&gfJn!38W#zr+&RpO6w$h#ik-Me?}%0oj#NX@4*9rl$!f9B`suP+XAva+%g z6Lo4|w0iHMRa975SR0!(0bOSm5~PhS z(L~Y$iIE6A*>C@&t8`|yNsF&e$}6H403ybJdvK8A)M9jH?)EY5;lb>H_0_z1gYP!J z{t58ZIbi74;l$B&F+z>4u)LD@LO~Je;3lw@v z5gPYLFIsfU^bWJ`Qs5I{06`gdYFe`ci{UCCXx z7keG;MvRw`7bBCh>RSYapvHN3tm#B5A)-XE+uc9ZxK+i zsHmuN(b&KM2QTmF&!4qVK(pV!KcT0m=jI;%`Sa)4m=-Kws}u(d3xuQt%r7i(adEMM zuD5!AyG6Oa-+WHU@;;SPcx$wKCZdD3X@8TfQ#Zl}em9x+y-b`q;L@uh;(9*WygIoO zo1?e=ay+>cx?^^JMAa+pA7_J9sQn+ql6@_}PJ$px*1#P4P*Fk25|O&Mv+z@l!|J{| zq7${qm}$XLNz_vJ`f6)jLn#XRMvDGBQWp8J^!Rnuqid3;S+es13+ z!0qiVxIf`Q^rwpXhiNC4ZtLPn2;GdO)qrHlA1Vh?>B?V~Y0>Z5)Rl+hDVC5b@#G&n zDxTqy*(LTw%R+27=gvD2*14FPpgu&($UPy&fhB_ElI+)`T_Jb#Y0&x42b@zv0<2zu zBo2%Uu2hrQ<;{A|4s}{4UUvPXPL;xtLwx5A+Jmk|2mObXj2<_zhP>GzBqT&`0+&(4 zCXx;$|4K$i#>gmLmBquyCr8BnjrzX_$-fu(+Af4Y0g|neM7Zc^wG#C#RFvn){ca4V zlL|CwsWDGrQ3I@OZ1jjpNJ#c(>NkGXTH$ysf40)uVvC(n~$l#!&M(2R*+DqQ+y415U_*%BrJY>G+lce12y z*LeG;%wh6lV`GO(k`(qIwT6W2OP8W1+A@xgx^BOMTW5{{NSI*+;@oC?HPvlgJ$Z1X zDL~R_M*;K5kX;)X-{S|!@kAW1g1f3qd1%R4d6Fw(w7#w1O&hk z6iNB9yOMi1^)%|CkP~AOg$0}=d|vy{)F<9MUfKA<(Xnx|r7}Q+x`_m2_=Ns3(tsa7I|vSWZ=s3# zN^YVfCsykE<-SLbUry)he%44*lK>ULUU{4Jx9S!m{qcc_)9j0PuY>#v1M@L_oW3u1 z`164+ue&6)FI)<2l4-@N$I_zZZINvus+Fb912DSnyW0eF_!j&`_D%6OQ5n+t4qXnH z14W)pEs?JdwTH83CSE3U8#z=~9f_4f{GT#^pmG6H2)H3X874yEC5SA?hAY2_dayV@ za^?~9d-;#8!l$2Tqyzh(EM1Nl4ReWi<0i7-&-h3b1g8XBM18tjqS?DOJx~g1VRcd) z;Gsm3J}Z&f0$`vL9NAt;FC1Bw|+a0tDww8h1aswgfFdNycE$TRj|c-(#T z=n+!qG$SP?{Dn<> z)oSVKggZ&;;}IF@w49R2dbuCR=#6Dh#f7U44lY*+4qjjvsV#|LI`oU3FL0Ib&{sQk!mslhe6}Pl-2A= z^NXP|Q?h8Gm0e+rsi>n)BlJ%=XiB!ioJMU7&n=bS%!31>>4b7aN^BT9_MfyrSvk(O z9?)|dfirG)GCA892DtJ>5%U380iKn>F(~1Z#?I`@AqC@ z7yo7$_A~e1Yp;8)b$61neV7l3?n*pq^WMH(onZAcsn(xBel7Ud%cbNl;OU~#sxCTC zQ-zm=$Fz4eXatRz0jsU`_{deNq~KfDI(JOMhTOr@T~Mn*z}ZIOSXOmsD%rR)7W4p3 zI_57c-#XW?4){!opE�R9lQ~DLK0nCHuyjaa@HR%i_-ouwDk=eZPrMIxZ)>j&Aim z^dK^drV-oWL-yJUb+n#0>r<$WXA4G?{>q)flWP><1awzUSZI&aOmUS=tIo=JN<;rzWFtdVakxP3>+Z`hZI9bXgA-Yr}5F zLIRU64{Oc|`^x!F!O$~MaAAF7tfS8Zq0ulN<&azaX{1mjKnv0i07fuPCuKGW3OK&2 zcO5PYKcC%gVKXnh;$5a)mRL5}OOp}{lvz6nRerH7kwLQj>{#r1`3u*AT@hY%S~`R-Lg@e*5X6>{*r&{4jYRLwGM1y0-qtht zXDVDA5ZdyKWde&aE;#12Dc%FdvPaKpDX1G90;>=M5Sh53cx)qiIPx+Ij`~HU6%www zsY6$sG!Q-{CAg)R^BZn2hSrREkCyLiAyy>loD3a=5<_=uILq{0HmCU&y8#;C1Gonk zdH;9#7dCN`ISVE?AI?3(z9XuSowP%X*C1W}nY;2QOquG^XY` z=z?YDlCw*i6HG)W!%xdWztRt14-eI+9Xuy)@>(}>I`0E@&}cNRF}Pq>1KG_WCk@$oC#~J(wV9T!Abjec-FcR6I&7qkP~H) zE9bUIe|7ysBH$D=ZAJSQGws5{Z?3-Xud-0G0>#x#Vd8h%f+x#qCyCjW#OLGju1S(Z zGxiiPoeArSWSfh%G1gc60;T>YA@uIyPjqyH4+q#PTdLYX)Qj_}X*&J?fD0C(q0K^tdBlm29-g8oTBFFaS7d+)$hX z>8bbD2@6{!8z@^iA8Az1@rw@ zpVhz}ov0$%~3Mf+6G2yAxeSqPkiOQGfEvf|FbN zjYF)vMif=xQF6-|pKlXFh@KhK#i!nd{Xnw2RAca5Pn8=oV`JhYh1*N+baH?gLa>%3Psx5rQCEXmVLm&XmtE-q$fWyKPc zm;aN@koj76Z)b#z5)+{S49~2&TShjzK4h%i*_Ehj z!_B~(8^)FaY0ar04fD!{EWh#>I#;x(nbIv=%uyhK^#)Y+^ZsR5*h{0i8#2PMCM%zt z-|v~Q7u}0OJx#iZ4K3%R?e4I(e@o0h?)-^DSUKuU)W3FU`VewI#0r| zz18QB8dMu?TzS(BK4%fK$r7(>R^N2EED4twg<4=2B|JW?I>cz&BzycB46L8`JEFgL zy3QWM0F3F@&)ZoX8s#WSTc(`tEJ}J{SKT#3(++%&Uu^!njbNPMapJwnSiG@|P#q53 z)gh;L>Xdl1`@LhS1`kYL=z(LLXw!7*n{$G$x^C;D%}4Dw5%90#NU1;~h#$Zc(rkuj zeNBLa#9XCb;>^~0@&Pw{@$y{RRDuuy=FmFq?bERue=M*0G={Dt6|c+p57>j%ooRG%NWh*%r(}Zp-B)g7(~nk zjy;V8ph7=U1BL3njK+C7%^O2UNwyd zF$gM*y*iEkL}I%4@K;BiH?auEbFcJId$sUQtF`xm=KGK9bN1RohLTp9I_7N3EYS_0 zfUWvV>idfpUZEBmjc%TF|Z979vqZ-75jHJcIYtywh6~s z#yx({zCgNysRzc<*CuEhBNYSHBlPL?%S3zngO?HVC0pO{juAzMND9!edH}QI6Qhy? zt&mwj4jcf}qw4W>XHpg{+aDpnGx@~IP5>NZ9OEJi4BCukSzvt|r`a*}Z%M^TCn7{2 zUAMYzT5q~fU-I+c)N*Hn3=IvH4kHs1RBsyV>!-cDKhevk{G04)MP^l{6mQUD)ApcZ z@u7DufKyW?U=0+`napzJ2aMcYrk&kAp>UmT+ECu@JYYY5Vuhw;Ne(4mrHg!37^)Fo z8hNmvQIyczN_j8! z%^{g+^YN;S_3gLlY?de9E1!VVq19Gh%N@6KhpzD#pG^~QrP|CimE!X$yG;h!=9-ER zE`~+S=7L^4T-{*3l^xxryn9^0t~H$VKGAy!81$BZ>M6{2Nw?fSq4$f_I0)dycV(?x+MzGF#ya6Y@HBltRH;4Q${JvNrW^LP_|xZl1NleVB{u-il7 zV{tcfB~i?D_<5p*Co5E`CeDH+d1eLO7drf-sU3S081pT z1+AriE>K4%F_sN`5ogg2VNRi3c>lBBeoe{%lb6-4TV@lh`%BiB3IK=ldH<@0vHk{D zLtw{vUqY1vl0ttx1U_ zSibFrip2)M^Lf2@8h2M@9R0r9Ejml4?ZBF_R&a{nb&9_=615-Segzin60r2CE=7qI z+n}NG1QH5==f*VvTHsRf5R6WYjvZL`oeZC}Q1`g7#Vl~%T`<(PzFhpzfrY%xMul|@ zVYC79T{A(JsHm=KE|A{?khi+_I-3Rnm~KjbOFER$)R19(r?kTzBvu9QMPpLsi zVr$?UdhExvI1r^fEfK>1ER5p8R#Aml-y1Cf%+)gpIL)B%vPU<{2 zeLm};5zi{m-wr`+4{1(Jr9Ty4*=U~kf0MSCPj-2{evH$ZGdbi2qOA?wJ%!xXx76Ez z{!L&_5V6ym4|oBz^g@@o*+r z8@>^y`uWhVs+Bb7alN6QX17<>=W(S?0@uXyHS*r1&25zh;pcsa*8KZnZcO#I-{bVl zR~&Y8_~4`5%q$^?$%#DU7SsEUnwDFaB?rA%2&pag3F!Oc?L`9hAJ#f6HWmC~4+Ay( zPyG*tfkr#+Xw(rXI?Hp_P7VVP8XY$UNgeO@Kmh?`JoNLA9-ncBahr`54@STuKWnyDkR_eyiO z(#W%D8=Q0c&b^sBV6dO9C6tkUk`9)xu}d%U&ifmd-XqhNs{j^B1G$J) zDsGWp*=s5CTwSFG5y)&2tEUIWXd2yLnm1#mQVt@9#~~2B(8Q-lowoHOf9uX)-~B^y z#3?BwoB*!<$9KWVr-hozb*EdaAAIBCT+3D&+_n_cU$#CS{1qf|E%n#phj%tkYFbH? z54Jud25F(xLT&)4ki&;{Q<(`dL`?|cJcEnw>nH&zKwE;i@$Fg8v-PVL2wnX^&M zrjrf8OcBHg4?Bnt#0;&S_*!pC)GrT&!F=J-O(Pc6wQgRT+1KWJ06VD~mLp(q*`9jj zh(ipn27Q_1D7*j)d~e`T_yC+zFVE?YRt(Dz@&tMXm&%~XKq6lL2DwmSEX>pq#B^By ze1afo9$=dQGzC)l@bLi2)PfB3?|&zJa{>ndDnRPpTO}OvK!f@$h((?D-xIO^l^A5N z?+@yk1Mt?tw8Xi}3!ze*hpn+>1Uf42Wz4^4?IF7Ds>YOH%kzo%M04-avGW^USeI%}PcuN;o<0LMi>!>? zE6tL#I|Vw3(QXuR$*x)p&1<(I!gEr!{sE3-e*uqpPLoLRKTT}DN$}oeexq7S7 zQS8XYN%iqY@Ugz|22b~=0{Sed&@jN&Ad)VZ*wLtpz>7=E(65Dz< ztAM&me(dnVg-)`_#p`6x8&eGcXW|2ckw3bwoBC5;l2L18W8s7x6*qQSoJrmq*`!u6 zbSYKHPrC0~EL1lcvkv5mULtm_-JUQc)wH&Yo9~=|GnxNAWE_Pm#AQSS(^X|*6**R2 zHvt%ir9w3yfX^uIU>mRq+;{hqLzPIyLVdlaPDUw*MexEKG;uyUqXfORXww;RqM3WI zFi$~Q6fOoD?Wc3na#Gep5%)cUHSmy~!PxZ#*at|9Nicr;wqTv-v&O;zR}?jiaT@5% zQQ1R9A-y~TLcOD+1djRHr*Cx`%ZaY5ccaH98IjvzDsb98LQz1B5TJTyiY%lq0aQOU z6K3+1u@uZq)iELD^5JlN_*3{sx{ZOl=?saORZs6@1F+0|jJb?}g`Vn8b<9*K+Ym!A zJxmqg*Q=F%1s(Zej<~d6j>JmA`7&shvG75fzO>va?|#EROma8;2RR|=Qg{MB6I0r) z{wANl5%uxq1?J&N$L|#(pj_XOwy~0-e6bhz+L>I`OW|o#1kAZbx~~&!9#pU#7d+R` zp?&+fp>FWRd2wfTWcYM8l(%#++~~Y1bfz*lN+qG1$GNmGKylo^ExY(yo&|k`SrtC? zw$KXP!HF>FQQUk1p3M=YM}zbLhr@)c;qXzR)5?V zj*R2o)C5KbP5vF(iWRtiHz4p_pdq9SSH@8by>35dN3uXJfs&*2CZLp9X#Sm%xsD~< zB$cg;T{JPmbs~AfU-agE8Z~6LT5MBpM)r^uERm?LPp_<4>I1YeJ#*b`pcJ{$d<^`% zDBW8=Yp@I}EzNak!j4_^Rvda|elmExDBGuN-O{T}rfy{{?mqaq@5czN6SK%bQbPl-Nk+jBmYcY+9Wg=Z zsfx8D1gHm50GQ=7bQgTf=$|2ja2w}2vqX&sEOMH=E%BTi_*bf0^Oo0$2&8=^lZCBInqRymRJIee-;;X83)}5An&$NS# zz3t}q&?eC2>~Q~^P4Wi)WdNHQZ`AY|ZPfWT4!>S;Ig1Ui2)~i>SllMsaoF3b1lZHATGqO$V0}9N5R{+2iZ={&qwTCc^*D&dPtNCezkH_ zCH222?1%F~Xo^Yf3F{m%hNOZ(qtNn}L2e>b^k$$M{04YFN*;*h4oRHy3I~_?ril@j z=QJ`pBWTnRUppAdG=g-EhZy)i!uglE;1C(WMDKgY#Uk0=1uJL~atnfjghvU!AZTKY z{fNo;h+GGfdZcSatW2^PLQ_K*?)D-Q#Si#USZmIM2oeLOAfdq!id4;%Ju4K*2n1M! z)(WKT8R&u80X~dgUmX}$IID%xze?HtS`L~9u0byVYOMV>)NSNY5Ge>PgK1eG@0Sm% z8rEl!3cv{v;Gk6_NrJb`2M~Vp6cDbjTE_G8oZZ{bHoyS3DQ544&JzJmL+KPl#lei# zAj+!s3EfXeugwKReNBA5re5%qgXlmja4;2KFF33C-nBO=*|`c%p!$>$bg`tKqep;RA_K+4QMO z+k?JzXi)wgbT1&q&F(UuOcG&^>}Y2W-M;GUzw&U0+*Yo>T;#4^D+-ytIJ!q%8^U9i z+VY_z^}HS@0f{4ySU79;5_N5?pW5lX9u6W0<C0zqCa&eI|r#3S)z@9U&~KB?GE=3o@+S?2vjiN02}88qN>E z+I*F#D4(lmz6!8o*Ml69(!k=dUsE@V7CJy1 z1Xqs_2X!Awa$er!4UkM_plJu!Tb80iyEVV25*5c2PYKwkD-c6NDPWFzB?kZX6CoN(zK8oyi`Q>Cu9GQx~tMkX5eDszl+IA@wI__tw3@ z%!L3fH}@4^Ay&)%CL7@m4MobuFQt5KW`}}J!ITH0rfKQ}qW7 zF(wr5O$Wv1l8ZR&9ITfN@K@ZP7tmlj2K}S2O2Tj+nE>+_5x$UGytLh42EN#f!&&lk#?Fk%^Jq%@Qd&tdZ zT>bIK>IU)ttVu=cPcufnS!MLxO!s>iay&XM2VWXDCdx!+FJd@2!8Mp-{dy4s;%N}Cdys+0ML zl#pfq_Js&QCt5OYdnz6|yn4=qF51{~NLuDDJ#p$^P<%sq_u5o0q%S4vMnL7_uP5IWp2f2 zu*b=Rql{l-2?FN~M5xHJ;OM5odMq`QvrD0j8g&m>*C7&%gQNSVoySz?rG`ilptnkq z*}>tIpQBM?KHUmd`^vjU52wFM;)S4AH9?@;zDGsY`=-1hG!B)s4qp!_e*e~8d>&RKEX4>ufDZU9=QR@i=VB$^+xGM3G%HiWx`jLS1`$%zUmbh&LHWL0 zDWSn>CA$Q@h{=~tO6_zukQRYbS#-q;*-TE*A%sgRKDQ$Vv-esj{sRvN;>n`camBJ) z55KP`l`ofgtL9pFNi)^1J|#E-wzezN-zRIw{a2Rv)1YLQz0at#N$G04w%=NH-e33- z4$nq)fpE-A_tmcD9S6ra79`K@XDfcCAYae1R&b0O*=Wbo9r8@-%y?oFZBo9(a7@y+ zk?YAi5>*S2pkom}8WkAzyZ1Q8O)roC+Wy8UQKg9U*(+L}e0IUX35gRTV%sa$;rGS>T2;RKo@w3vnlhn%U3wu=jRi&cIi?9_ zWUbB2w}w`s_ajP=U6tt?Ouw=hGPdrJ(UzaD^>Ahx(<~)s=uM|+`y4cy7IQ04Sg#(l z?#Id1>ToZq)AaOeV5QH?cB30wX@l~x%4BU7IUAXvPxJ8xn1Wb4f7^bP+b;QP*CBJ9 zMb`SbJ7)7d4jibvuuf-(dT-5A_I`mhQF?vJ z+@>jiJRIA7dRl7=p?N$?-11JYqkHi6`Rf=&vADp0@4Z8U@4jnNRem8>kPf~oixJC)cP-1^a&`>rmiA-o8Yg#WM;=c1u%)UZE2O{YN zd09cCZk>Z2>tms&qsV`+0EicUl;C`3^qW%1;G^7U9%cVdDaWno%HMToXP29{-&_9j zZ3YP6&dh#Fb^aSmvT4(rTt|LJ$I_xEjlqR2b#5`o(>4fvvGzI0Il}Ysa`O~Fttw-c zTc@_VTwQ%F*L{6=*BJbjwcg(S?C(;sh{{WI-jxJVfrIU{%SNy?b^i)e1eUPy#pIDa zAA5;z+T~dN;h*2uvH}KnUk@9Ubv-XNdFV08>Re|PIiA1K<#0rvTk^bUf*@;emHp%| ztY~^y%+~C@8_}iv6r1%GunlG2yosR0JS3;Z+d+R*y6yqI#uP4H=lD(hY5fN61-2@$ z4Vw*UP*Pj{?Q9sCn-Mv1j3fPOd1e1_X?lS!p_{=60F4Hk;pMiRy5trN7HM13cy-4y zh3CaXYfU$3YEj5w@hK&&StbAUqn)rYjCS(Q^0 z|LLva#+MY-N~WCzx|^R)f7~myKS(E;qXZw6hKkm0!a#cXHDn}29fZXqwn^>z3mF9Fu?F3wi#-g)pXBl0-+l|F2@%z7 z-e@%9Z5d_z*zI8X@lHNwZVo>&N2i7RWHG+tpOJy`h0r_7ux|@T*INk=r|73wMB9@_ z?omAptg=@&U8nCYuDVB3Z}vNLi=L8i#M*35Z7(iyc7tP4;D}*a%p+`ee36MJEfB9x zjj--MrqO_*#sfPN%pzv9yVC4NA_+h&#pe{x3iIJA^;2VS-km+cRCoPJN9zA%B6i0Bp+v?$*2eNd|IkpwwLiY+)IP4U5^pkX8P+@PXfP zIYR#L9Y%cjvtj=bi?M5ZU_Qg2NBk-gZSv!bWy3?P% zpcfTQbG#N`sjv0YnkxAN2xTuA(BLO{53XbcyKP;`Nt*L|@;N+KScY|)Eoa5ybwKz; zlAj0E9;|vecV~A&@k0lD3MNqN=}T|jPfhqg2_L<<8vE8&r6VM0*GEFB@I;irLYzH+ z?X$K;zljEJx=7<|DiTC(U?Z;5h)T`Qr;gh|F{078zJ8P`i2i6z7uQ z{t!R{@dl_m8!7<`k?_#{xZTbzw_F#_3-LRFHn{VyL z>jU=#^y;X3IdA;7k!XuucKhJKSS5$atvrqLa5K8|&EiV4HK-L==1&gu)s%dz$Y<}& z4*nOgfC83??;mW1NFF&b@V%J@-}iQW2WRw2up#lGer~%9RrOsI!+?X4;SunFi}{H# zco=DP@UF*C%C`@&uP{=zXUyi?-Tw!b|J!E2!=#rymfP3!RcJQ7y}f>K36?U3>R6?F zTP&6XrOc0`LH5$X`C2phykwKo)xy80UsUv-VJFc=F8QeCL{izeQfox4OrUvtM5JSJ ze>R-oTj=$p?VkWUVXdcG?e3&^92ChxoZjJEBoW?gY1jg1C(><4e*uhh-jGR2JjEOa z8Z~@IOn~Z3-j-StqiFTD-f8}!737*TQMN^wm@?(`0o_8j2%vJf>Q_X>m;V|4C577H zk)r0m9yfrinmd2g`z$<)TP02HtmC)O>9#B_VtYPaKDNEY#?;3T;+1(`Stx~J!lmH1 zxDd08hm}M2q2^#NRgMoX6j+uL`tg+^-2K)qW8YteIo5qZh*&0Gl8W%R={ouK^yWIz zy1A0pP@7q}<#XmUgzeM_7dy&h{E~wjbSV4s=Xs-oo9t}^^N)%tCCcLBhhD5w)rc1j zTKy1MA9t=v8KJ*)^6+NTXX=);V1oHx<9qRz!Qkux^>4W>zgkF?54&ijexLy4doRx3 zm389+dV@1W&guPi2li|AAdb(oi5T)R(^NJ@nQxUNy?4i)lsEh~QouBtwdVV4tXvEm zBtcz|EgGn-6vAn%y;Y*F<+8pU@-j)+jnYVTVU1M6wEQ)QaC4oK^uzu51hZqaeHJB? zv`-w8HKCVy30riuDUIqn>aZ#~rIAg9{7CJWWLk8rQdC`*?G@dhKT%oJW-m)u8kY)n-IT;=7ngC^MVQ%)Q2WUH&bWax+5wxrD8vd}K4JhUMl(Gw zui}&jKc`&;A5QXSaFuilGmELkCpZ?s%d9LjiVBWTXQ~R4ptvk7m%-LELIQ9Z#G6#H zruQ?H{?w+sny#lD(q{h9e0=^mu3lGSW}P=slY1IE(PWmKzYMt0KmVmlxc-VRXN&Aw z^CDiCpZO{f6Z<&&jil3J!$432R=EFzVs5-FcR1PTYHeF*_r0GCQAlx5(dLZ=YQ_5m zf?kOuO7V1-ZlUAcTyOz)^H{)46}*Z&56fnI^FK|i$XkRN#_{p~W`15Prr

j8N7 zqkVaKxz=j#_V$*?QS%={=~4O9IO4rqY3ZK))rRc_Pm2~deGsbN2DI(zQjQi0bDige~{AVh^?z-XZ;_qj`M@T0}6?D72f$xz+on)LULERJVzs^Q}u3VbvG3 zRh}zxnCZG^+gP{==={VJDyLSi6cM%h?HS?iPp2J1j zS;d$HXYOAqMEp&6Jkf3+nSSvf)NbzRaFzvt;={QL)u zq3Zdt9|Z;-;ZNn{*woAjx9cY@eiU`MWn80NY&H69BGY%sMF4@}T}wLe6YgZUCVIE_ zuCFd-D`vw2zAu@9q)e|$E4(+`LkF31j0;@8tWBkAO~=qHFdMWt-8ALU=Oq8idg)%n z*7sRX_9`qm7(==#QOmNA71eSG@~=!+Xi)&=nc4z9*NJFj3TFd9T4nT;1)sqbdS)%1!w%D0GrT{%c!*rk*YLH1y2Qi1FGhjWlP0Zu@#GL z=EQrg16q>d3?`R-D1KJtCQDe3+w0f%R~g60y|pEuu+v!90kMp}M%ThGosCGH869bs1_JvfnvC&t0rLF!d)@GoCQL zx%3BQKOO&4u!~4bs3=Y;yi`-I$y7IW%3Wk8UDOXp)A1qde>;D#mx_{7wbR~I5A*p< zyWtxnBO`=;42wb_RzKr~v2m3;)iu%kUSU_`s)o#DF;usdAjd79)`w8fF-<`?;GejmKJPjGhExvboWrHc>86$ELS%*-SxVRXn-v^^_G0@iuV`Wh4+u z9A37R-sBD+2MMag*6ra&Rtjf^ZglAblHVn+#$$cip4fea>h_07j%e}8a4qMI06ms9Aq9LsP}GiN z1Qkg98#2@)$c=v0gpYs$^kG^N3_NFHm-wH!G?ulsL^=wN7P2<$$lPNbVO&xs!B0H#cqEA zrxYorr)+rbI9w%s!D{*UEsxn|*FcP{%7mt=XQgU&-N6vCVnH)8dtlPfFET)N#j^RQ zTCG>8SXSNZIj@ZNz*-XSZcPnYU##kM{=zT6>kAc1>GzMzQZ{~Bg-7=_tK~PBb5;aG zBMoeKiRM0Ty5*Mt1i^m}Oa#A4JF#t}$y)SIu%N(MtaGc z!tQ4nEYT`$y6(@ZCXNajJwI%vwT)X>&YVt3#%g0{>pBpNB8)Nyl78(=FKE}8) zOBnH!91Oh(phKg6qQn2c17p`k1brUw4M-le`V8Nsiu$zbws~VN;3&|9uQ33PI(Fp3 z))6=gZAiE0F`}}-l-5H>Ej^|UWEFo)}(xtqZhtX+r zHr0F-y)4DWA>ph%{gY<|{gbv}TytIX$%a3> zEZEDBqW`@bc2euN(fNM%PP+GgXpL4g>vs%I7CX;V{y$oPfMjA@DsZ(mvF!$}J?7qv z<<2;r5eDWrZcwLhZ&;exmolAxf)?q;P$p~iG2!f8Ke|k5&g}Xaiq!U}47ntBHx3vX zl~onf9wufPrxG!MP&%4W6bk6iP!#r;nK-t~vfz#)f-X`R2u{pxgyQ-!6~ql->-tpi zi;A|r)KJdvXC0Mn5SU^Z=ABT@9~o&eGghCfEXXeJBwpD#Kz2g}!-t_z7R#F;nXo63 z97}$GZmsZK?-|VhRMNYPDRya)CCqwb?I2vAfFASNxjvWK-&oK0e?sMTGW5SS->1lH zF0Tzgj5T81yE{K(Wc8pex?u z@Nz3c!a@=6)J7bHh*bJ7&vaOY@S*!#gD*~&h7I2Yy{nFw5q59(SrIX+M?3vNv=8-OW5|CvpFQF;x%{#_ngbr&Q8l>d^`uP zyLM@ag88n#uwg@!5qA22pRfQsWt#0(uX1W~ah8euf{W!sh%G)FPLTS~1+b@Ip(i(6 zbg$v!YNZzKyO&dj>*gVU*MIjLJ{EJ7g~kNuEYFOM+56onGdmB!m|EkN7%T`D9MlwL z?4*LksYm*U*=pVy&dNFYB|;!oBtSs$69+Rnst+>aE70AqJ+#c^pBm6B_3~TkXU!Rx zQmtPG=yXtn0`VlW7$A)dX zRpn3}rzAq*lL8KU&sy2~S<1XVG=*vi8$9H|5;-Ay7TCUZh;M?zd+T9b9h~_>-D9gR zbK0!_LQKizVDD;rU56TM3n)wLZbTI4&u z2+-YkC}%}!jAjoA>Ydf_!;?v~W$o6Yks@ZrnnMA_rDqFbh z>9)(|x6S8ktI1^RQ1!CE_=Yb=Sk{0H$Ak5{fyE@Q(`1{aV<)`re*A`=o|xdB70?q{ zXe4<^@|V2Hn;rh)!84(SWLm_h?p+H<3}!b%7SDY5DjE$5dk6be`&+wbeb%5t)kkJ9 z6=6Bxr%xXxWy3W_f6-`X7k6qNHw#Q0{n?_%JY;8{%3Fmv#~jimsp-7GIKum}DxEC5 zHAVQbzyIFrM>Aiw^3iC2Sm>+K4?pe3k$}+M-9!klCz#C5lfL3l$=Asw9>-q zaqC^0;1%VWt&zo1<(shqvZEHo+9e;Qf`I6dLQiV_`qRqYHtJ{O;mBn~HU8MAIBtHD zcgjR2L&PDaCL3htOqn2#jhP^#u}RN3Y96zr4wkgWbo}ViaF&NipB2MP3UtAxp?m6e z-7s**|8ymOLZ*lmiSb++&3fCf61;vwwV&zmdhFulYG|&AdhE{007SLW>pRKr=Fa$( zF716huliA?f3@r4Tvj_1Ey{Vx0KitHBQ$h18N4a^1~*G$?mx-e9S)T=F}?UcB&J$_ z^b7e;q_SBVOQY5%wlMT7@!@3_@piEDP-2tCaH@>{_1OHu3zq3ec?oQKGaw$IiYEY9 zzajWnrjNAD={`sp;~Ah(=b(4EI{WyG{bly(*u7bTH)dtpvV&9Hj%So+2M=C3x%1~K z<|4QD;_7;l>C?<3ZSscAchNQ-Vycn$*VSLv=ZY-?sZTs}#h3p!TG_ho-b^S^=2p(K zO(hD04RK}T?A~arbEv9c9Z!GM6@9Up+nn_f+EC1QWo~yk$(5<c3*s-A20$$oUMpRkX=2~yg?brZZP#*ua#Ltj ztyK~$FSoF%*LnV*Dn-<@(kp25MG;?pBu7nAd%VqPo znRvDQZI@sq+;RN;?uw+U$@uBy(yi4V9=d2H*!yX`%3+|Nw9$73J6byurm5LrK!plFvaRZ*+UnLFF&wi?p84E9L&-h@8Lu zXhRKh{1kiQ_^>3 zWv5F$j^EYyUHdL3md@k3IX+b%0$;)u^~#~T@tR6#?8S$hGDdsx)bf@4RX?V(+yY-I zRK4wac8>gB;~k1$z2Pe}*W*YOeC- zY}%z(V!>xN+Y+P!+|C1S&P+dlMm3^289B9eTRp`V(?onNq>yazByRh7yLbnLOT~jUa+cthWiHJ_PBM_s2Ekow#h#faPPs6(ZP;!H|kBoVTR9`T=z2Ji~p8m&ilgCU>2dZRlIGcuWC_E zekR1KPvv5ek!^fl0;BVSVXV|_MXlI;_D56Cn%_s_9|7A9|2v|}Uj94Rpo#ai?rA6I zr}*(p%=YZJi2+4zgOJ(S=u=HHbqTvD(#J~4w9lE*2`IHMreg;h)V~z-prtm8x8IW- z)Cdjq_Y3XkfEk8H#{E9LtF>VpWLjGwL-sg92^c}?p8PA_4z`U(gMZXA^j;(GH8({>q=WP@7=LKkv<=i?p=u)X zE4KcY&crMTy|H2F=MSnX3ee54aXVc!;N5f9pA~(*?}7%aHfuuVb+AiPcxkb&`-iBi zmalE%%fP+acE|f(iGcs-+~CvE(UF&zM+`y&i2Zk#+Md#cJzQJk(QU|htyA6@eKIx8 zS17nY9biMaj+2w;o*u76WBRxd2^)1w-X3rmNZ04VDVfKL*ng(ZM)zp3rOsx_`*MFK zBZJDu=IH5uOB5LyIX(-4A}yu)+|Nz_mTK#|Z4jhwAxw&~uN7U>o_34M4Bx0rNd>jF z`AA?BH@J}ZkYJ!xsLhXJy3G?~poIT&qC<%B{%pq8thZ207atYQPhALbJhkARkv00R zKR15%P;dZwY*|~`U$}lk$T&vQN!fXssRK-_4OO8UQvHJMGhoWrCh z)Nl<#8?mQ#M}GQi3o9jWn(}H+SF8E=TR5%>mb6g))_V8vkTXGlDV6n~LvW&5R~fRZB%2 zu|)URmb>t{ozf@ob)rxqomMhN-OAir9c*ihi({b4X*ILjIPNvM&whcqY&hDV9guNx zN{1KnXnr*PNw_I#?j0u9<4+o2Qp3R@U5JD!QiMlUxGoLv3$}Q7`XhKYy5}WS)5FCC zg+S@eTV)kbW9P1zC6KNNf$5}K9|=k;>#+4fTzhbFwNg9HsAkXfv@A$F6{Dt1s@(Yi zVOaI6Tt4)vV@fiYmRB{V?Uo1#mp#;;WwWIiN7a6pPHdzdpnMA;6m1^&FwwSUgBifqm?_Yy};FV9au%4KZbw z=5e|rae%s8e;iV#oFLT1MU#$UzF#SY#@9dYa-i=OnT+6EKt^&Ca+2u5mp*B} zC*ta&hqSj{%(@e19Q7#0dn_)ua@4d#>ci zXlT+I^hmBEGQ-hjz3`B3>-o};G9>Xdl+x^(7Tpa7%7tQgQ!;_wy9H6MwXuf+kgPZz;SSCY!m&tzRE=6`Sd@V%IFoGaJ@m#el! z<79Smuq8^$Ld`mm%$8zyWNl1EMX{@P?}*pr?{i6Kc%*cxWp65@pIJU-d@Xn^2tV6B zHq>;X-6As+?6iiN!SI&`>*G9*-9m7645+ zEP6GXq#M-r9fRwk zj1(Lop3j2w&KrITj|LMdSR@xr^Y@m5@ZtwC@eEiGG5z4=&CM5EG-#2g?`1iyyx{?% z9pLX;v@Uaf8=Lr0-^GiklI~D^HeD3fg&RF6CQhFP8&wc5fMqHK7IT++SMgY7S~1*vhOkR zm95EURjbJ+6R>p%`wT)OB;>c-_zjSGRn*ij-S~hzw}e9v3nD?_$*ckkt=UQ!UhcyG zvo9z_kRsw`3KyfjdjnW%JK{;g{gbiG*M;0xNwar>6o`6GJVQs#j8U$wls?iQqOp%EXd~HIEL6$Q{0yBrT~!XxFUX$%+2~|$ zur7*@>CCWMdc6mr2O#942A7tG36acvWozMN*-8xy^74kSA{Hl5F8y4f);U(p?%nX$ zscJ&|5-&Ud=l`1Bmy71vxOjL1cSYj)af2q0D98;yMl}l)6R}+e2fVR&F(SFYm&SpY zqInc6{3+yK^g>E9HL)=jkxoKpjEobHwjRYYMRfyPPjdci-2V<(0fwxf_}|w6KhKPO zWlNWyb-RdEOAwdJ-^4z;i}_5ZSgP`x5gel@LKa7Zd9Nb&_o?mL2!#p?A7MZ~u<|=| zqm}%WVQyFqkS!mHDMu;e7Ah z{3{z?p%N{i>4{g7@pl#G-hlx7${eMyAdnMWrufgwW!E|z<~OTVGQ}LvZ97j%(%$|5 z13x7VN=Mb(`HL(1*Qhak{22NUckARtHou{9*~Wq`y{_&mmQ)mY{D$zY2RYz;49Hr+ zK*vc^BuN97uv(RKt_mLcf4xx(Rp_w7K12Hp0Tb<(_3v)5CP$HAENOdA-PKsjRv4>P zng{|p%lW989ZkQ6m(PMO!$(dko|n`-J^$^{-bI;(EKzHXCY zyYkzL;GvS9nPn?qEwfqZVGl$^tKV(PX%Zi9b35Jj!T9maHmn}QMcbw3#-GM@!!l+s z(ym21dK0trSUKgLJfpI7`@}wEwkSIteYvO^O*jB|pgaQ73uLECcK(_hZ#E#Aa+kP9Dh2ZhxYV@ObK$e1Vr zq@*Gp3@S0rBz@91Ad;Pi24sP`QuYtgFeQJmp7CUT|Cz{T`G$=a;B7PM_bCOdw8_w* zSRupoB`y0(*~Ji9o?sOOEBd$L(q+3~xuI+yjS>CxX><4Gz-X#>PY(U)M>XS(V*AQB zgfpR%7CW5uE{VHis)FEp9DA=X<1^M**!#e?bf+!%w76Up%)CwB*tZ{{Qk&<+4x0Sd zUharuy@2 zUMKes8qo=Z*K>QngI__mV!1^tK}6Wvq9NYEfnns7$L<|bfw}rdF+-4m&P0M&v! z=-Qj+LLyEraYrrjfP+;vHpzomW}Wn()?K?O+c!^hWii#HbgM~{d`0qVZ?C2xCZz-r zhywF`wWT?YTN76eQ|6kGMWm#6u@)Dk6n7BJsf8iUy+-Jd<{|OlaN*yhIuY9KCv|-y zId$mliw>iaCMr*U>48qF-T}`I1b)DX9o)nkd_7$VDUqgEZFazKllS|quzD_UDp%>p z#%fxF5WBJ8qb?5Jex*-e{7Y}WISG%sP{z|Uyo}=b=y8Cx#u-)5@JkltiC{GCeFoEx z5|q%W6QQGxw9r(v*t%)Nil`dV5vf3eCC=7*5eMXesg$Em!e>TJ(ar94Zyl+y8Yir8 z!DnNx)bRG9fA6IEhapjm@20}zp=$n?+ffUy2~?o(>SX0*N|G;;!tq>T?)0zs_*eOLtYmfS7PdrG79=V@Q?juX}8BHor(VOP%BwgY5Ji{bXy{A_n2bi z0o$RX=@G~@yi08X5)uXqc%h!Q5KuBf*O+_LfryHH`OV7K_>YRJ>3YlyuhD#SH?{mc zslO@S_2~Sjp&m*yL4%)udV+^fPoFX3je8H~_&Kd|-|gi$^DkJ5raJsJ?hk1UHPI|_ zzl~tW(phcnid}S22Fe|6`-oP4-HB(pl&$=dSQxRDaJl=*PG%Jj?j>Bpv481sY4!qB z!oU7dsk3%yTXq#~ZCy};ai8aSttC~$Z;X7;_p0!Rx`+bw6;zZxl!YP`i?!cz*s$9H z2MWq24$C|%*Oh+B5_Y^5aBM9NKP3F*+Izo7l&m5=nbe6YNyUJ#*5bVIhc%DW+bHQa zmb>%5YV&@>M$hb>z2;FzPpfNk37)L=wO>4IrbJfyB<+>BVJjn{>;oeH7t=ce0kVP5 z9$-L&+_wGby;y+%Q@n+KR6$|Y8j(G~5u}(H?SXrT3f`Ga9*tGUT8k8fSgqtcK!Xa} z1#~-mI~_*L1j(}n><4d(O|Kqz2@sZv^6}*T9n7AN zk9!z*XrVxoaQ&Wp8~)k9cr{qUmhxs;Q&6nO-qSb{7o(mt_+P)n_@AWXP&$9|fEVVU z{Y7nHVoq37>po9L6;Cvj<61=T^lgS`6j~-RBb46|{xxFgDAey`@}q(dBijfq_m6RQ zt0vl*38)|tf&AlEE*XcKH90Vszzz7s-G z%Z}x*laTc7a^BG8mR{#f1k8p?T{7-d)oUP>VgTKTbLj)62UcS-ht_C<*Yj@T)-Uwm0y^G4neK^H*9r29!_4*#Op~N>7EKsDt(QKW!^U}@zeLvY$ck}YO ze37;3GvvWH<}(av!26L=1A9v&?s7fZi)EhG&0bKHXJ)^sY_ZDNhnR3=Byts7pCm_{ zi`(_!&`7j%Sa}|qDmNZ6d{|frHmWv6eXKl@AGo0;Rq%nK_0n?i*R|eN zXL*O`;qM|O8-s<|MZq*_^YLZ95-8-ZQy!BxL3XN5k(p)(fBz>xd2z@~M4{PH`r9Rj zJv^{&6NGt??CE;(`;K(?&=TuUS8S$yW5gx!UW4A%t8LHq?JkQs3c<%oJKlFVf{uZd zvZ#W}(z6!ZAvP;DB!7N!?HTR(_kU6@kA}d#L*nuvmw%U&=jq#M`NRNTN_6BM|A2Ap zEspYNlY-3cwg{`|?U+)t-m{qL%SdDB!)A4MPZ>e9p6SSp0ZJY6r1njB$QUyTWzfWA z;I1=M&I=-ugkmZ_>yEuhVqf34r~PrJ&wzWU(?=?W<9hGX@A~GT8Qb-2kn-ed-| zK1by)*XIEWmTg&J{gLv7=egf3`FJ6h+&reVqw`jSOf)6yqxW*bNlXVeN6ypKAS34s zFV-Q?SDn|`9Q;$wMJg z@Z0~DFw_SGKubtSAO#y47?31&SQa7%>7{8@ST(=>dvJPFv*0oB-?6(;cWSsW-}pM_ z?Dp|fRW0+8STEIobk$2Fg=~~e&e!~S6eN85IevnHZLAm{ilX?HSw-@Lv(9k;@M$U!C=j-gzs8y=7P+O*1Cy7b)wu#!=2 z{H8D)gz@2#h;D|jLhmPKfc$O9!xFC zT`I#tpqwnhmLopC6j51bON{wfjEn&g&4Sh5QRmWXbsZmna-q!lBJ~$>iP~PMRVtoh z{Us;f-eCrkf%QIJL@ZfCV{+UN{Bv^n+qzDh$KT%$I@_x0e4$Sz*dpw1{;3NM!fWx~ zKhsgwl0}!F|6$O=0>1q2yCp+QvBJ3d%#~jp%FFu@ol0`}p?%-?M9KR!CTyR077c{v zU1l=3IZ3)rV62YyM(8xoTILl^>i83lt=?#1wf#h_ETteG?lHqbZS{$cLP*H>VVL(XVtGt>or0Z-< z;rKmY^Jx3sRQxh0^ORp@e{gj(ZQZp}1Xq05hDc#npM5bye`yx$`}X2CBjKCXN|6Wh z6WbS}CbNDmvn6wPZgy`ik+q(%3_o7bOyE%=(BiYQ2u}t11u8ZRg7x^#OsZLZ`#5pq z|9t)JI{l=%2iD~CbPnf`xgY53(UCy}fe>0FUsqD%n#>>XJf}fIK6{M}M&NT6eGi>y z?rkZ4*TB#xpH=rr)SMx#`eSzwayPSelHr82 zlsD_$_FR68b&9iGA@WD&ik^m#Tl&U-K=CE;k^C8=dZvIAfDn|>VHhMdS`tLkI#DZH zExab&*pFWBbf}qbw>uO4O;oGjW|}Q**{EtSkq0?#Yr_l$8)85JMZpdfAT}#QEuSzV1e_ zzl}ycWBGF?V!2rU&YgUw>rLOavDvqU^L@Qj*z^oq_P0I+)13egv?iD*lV)wS4(95% z_a+d^ug`D!eW1-718Ar6%ObIVAya{Lz#GA4wWZM8(Av5ZtT6$fTX7=iTlP1#UBeE^ zlQ~v9Rf$@wJGw#{rz9I#8JP^-}>0%2%bYUfhWSN|jM2 z_;={YY+jc%sJLAT=s9q4LcEUFkM`TEv}_$niO_ODx&2sFgseOuPbIMAbHiy_Zehv@ zR<`#l9U6=RZIrDH9C8!O--JP-y!WR=4W*lysT?LDiNDd#fz2@5)#Y1?`-^kAbH`8d zM;)7`MC$V;QBjpn(lpdUbUVX{v2#c>BxrM%ekDm#{$_w5&PAlkHlv_i+JrJ%>(}|T zW%>0Y^J2<=y0A#_T1byk-$<0 zEc2J_I`Y0uAQB-pD1xQYQ?u4twp=pr8G)(|A4)N#opEzD}mf(D}98Dz&HH3 z!SnKk^crkD3F7AI5k_Y_Xl84=1Wp6n39bkzY)Mqvi*Zvq;AMl`PspAAI~~l z>Rm%l)|!LJrW)(AZ>AN8zl6^hjVUE@bMHF*+|cTL7)2p`vr3BU8Rl6^nR);hGh z+yt(4=ZS9TabiX<(3MQUnW}H)=igP#8u>lB{)Lwx?4?CJ+v^G<1WD$&J}(Sw+|#f$ z9%9f#4SCk!DzUnDp5a+*gGto2R4pAuR#9Znv&{qpNuzq17`NV2&6(Ni!Ow3SGZ{;L zd4|P39hAnRG*MR~&}rM{xjoOyqMYzVrWg7**b~iisZ_aGJPHpM1jqxAj*kO*c*#xA z>N}v^0N;Dim2B(lx9+=Ehmo%YJL>LWN%!IQ#^u|kU&(;|5zzBh~R}J ziQnJ#w&^y%+uSY?@gF;E1f)dMg4`})rA29=fNEfs^{o6$H8dqukTfkk zPgIdrIQHSRc@R7(>U6^rrl#Qrtej}-T9q6W?D?>;7WuJY!S}?boRRB8?&0jgebM2 zsu4d0uX^LYFcV)!hx{}9d2m3Q>s3F63I2-~J_DUsqJCm>g4_8X`D*y2<@*8>)BTgg z`K8Fba0f*QNDWi1ylZSE9OY0NRQOZ^aFlK?EzH$kZ^9W2CN!OwnJo9iCPvBkv$;Rf z^6qc;gqC76X13_iya9pmJ++Y*A|ZD>3+9dZiFOMuUZiz3oYcXbS9Exb*T1*4v@njA z3k#jMW&KWsE#50wk5=IPF6S-T?VbwJMl6w~YCF91_Izs`QWmLRHnT^DPC~zv)F~A@ zu1rhOU5u}r3!O9dm8Yn=3<_j|^tVh8%7I~@NaG<^U<3Q>LzVb#T3R3^-SCzLx8q`Q zt#Uq3K^7NH35@+5Kn8r0(j#D2=0EfI*`8iS$_7lL7SF5oc&mL6mX;wUf%38BI<#u| zo=#`7*v~16r%n$&dR#QE^zUBd*R5Hu# z@zeChs5ivfnR^1gaXobwKSZTUzy1-cHd{aKc1cqiyEO?9o#wnqeQJVtn*=W{NCY;t@7pVDWA*sqB-_paIiEG#t7pV`<&@z-Gc{N51+Maq>Xn44Nd)A zKGc>nwFOD1*4k}P8J|J~9PIDz4)D&VjelO9UBrcXjM`voL_fMij)($MS>O9oVCq*&j+DWntk`O3d~X1M3k}u zT?;rO%;Z8Tgpyl5?+i^B0xa^yeH%Hp?x!<xzY zsz3dqng1ft7f@d!LAM%h0-Vh*0aMiyp^evD6)`;bIEe8^Z;zIjy!qY7J|_gQD#E{P zAi~gbuTr@^n6K)2!e&Q(r)h2SHH8K*zEMt}=L>ug&ZJa`==xV1@b3rDquK0G2zcbg z5b-}hKgY+%UlXEOHVh370frpq|7siYGket=zZhvW*^EoyhNxs$KhD22e^WIv#4w$g zEQ}YiRTxdFP}`Rr;cb?AZ8t6O4qePTrxJU`39r;L*X|tgK!`SSp~F)~*HlAONP;J7 z=0%wDp&=>AKPs4oZ!XzYwW;Q(8;4aKsPNonsbSe-exttYC*rW$gUjpv*R8eW1^K97 zYT_U-5NN`Va0FliRVYB0ym@ln$NT~DnTx(|VW#OAUuTz9ROO3az2N%52Gi8z8ajW6 z|5!7G@Nqwd%m*)B8a1$T$`kS5O|InX$DGCe zV(N&62C0g9VU0#Jj*PY5bRP|1?Z-QIB3v9>>8*?n|GL%dM%KNuM^q^4H3Q0pPfz*= zaGp3SloYEKR^^6E@IK$(a+1HkkcxStl`fnJygH8ah!rZ7;gxd8l_W`n@UEnUJpxRP zl=PW&v{cF9Y|B2K+OJ(Dm91a<8-SlQ11!%ht$?zd`PJvky(0{E>=8%6df=Zur=T1- z4uMAcFWdDF4i1jTXxN4?&CSgK%mnC`0MChC!x31jxo@qkouGs~bTV4-JmG>!%7czp zq$Wm#3h>dhZhtG3qDt7W2w=!wD&n(r^(8f$Bz{Kg7htm3aRB8i5jR_Iy=sn9=qKS~ z9E4!XdwQcnc|1w_dE%NlP$}vmi?9ISq|*MhOHdRE{&Pweua>^I}P4htFvSr#=P{)YX>H)rKx21!r8Xs zswLBpHsuPihE{;#ZL})6>w$ppBkbtpV55whI!Z z+PIsozqmz})<9YH(3bi9XW5NL-g;*;LD;chmT2~_Z|(rz8P~`?Wm>xbj({s14wpJ1 zLXVsDmts&Y-Q)N%lrNmzOF!49teL+bvrD5R;x%*BV#`s=r@rmT`fi^$)IKa>&P@67 zHp}hW>go*}QZ8grLp$E~&BCNToH48dX-s<_aG~iO6*}~9aoQf?j|0jrnl;f8m&>M$ zSFLx`=8dpGEpXLP7_HHeRu384Noh&n2Blo2!s7#dm((ozk`1^Ju3>0;~FX%or6ndB>2uoD(T9de3;m8n2yZ08Ms zZBFo`WhV#6&vE~E*EMNfA6V14IMjcpw5yJs0$jkjFF0LL{VnqFpqG#8?X1 z@P6f|NkJhtT&NefJ=d{MX;5rYA;qdX2-Hr+%dWf4YB$1mm>G%J*zb3n{9GK0zk5{J zqs~`6a&P9ZEn*}MEK;-M#j*7m-+J&%hHbLab96Rk`X2y;L$d<_WYE~j|66#wwv12s z`*lMI`}Zgpz+@NY(9@ZTKyei0e|@UXaOE$t9wTF{^p|WumLFntyJWV425kseJhp0l z_)0>}@hF}SXd5`aPADhTJt zUp%xu=OHOVywl*uR@*AyWmzbvxllLlp4BQ(w)fi#UfVw(SiC>{FwJy!=JT*T*tz@F zDP0=6e6>4O4yCndG|Ua>*X{FAV}E}%pW5m*r8c^T+M5rfgv4`Q$yGGIc6SSXqy6ZA zw_Rxe`K?tH@q4O9e5#1I`G?;h@2 z+RD;$u^0<|-ecwLvP7u58=y1}g?Z4hN02f!Tsrjbw2{W-nCb+|O;eDJR_ZZ`(I_m7 zUw4b=!Ni3(G-Tx)fF+{waAUW;OE;-hLP3d;AWdhM&4Q}H*aE!kuA$uGFkRu zci9BQkYX~C-+qzBM~jPU0{>O}h$8pzG68T8eRE%s!6BIR!^}{R);a1_kk+ZOjb+Bw z&Y#$3#aa5^U1m4;eoa3~L^QG58Hd2XH*#eHr>Ofb5aHX*T;_u>SnE&b_ft zvzXMz?pbJF1sv&0rE87mAm*(`-?tM=BUdDvlt-RDV4vQZPO;V2UjGp-AM4-a)YjV` zrmeQsV8Z3{e&%e%6t-De_qATql|%y>sQolP#2fBSkU&YF3@sV;adW>3*c!j}+ScK` zBq^8W^KM;5%kHWRj7$wGV5YVomiM(CnVc!q&nU1?<*a@lKCfvP%$Obb!%V{7Eg%SQ zyHuE;?BD{^PgXEVprYWehrS7@?dN0?TbuKFDW;u{dF6`Yxytw%yI6-TX@Ai^e9URn z@!Rt%w}c&+`5`6P=LT^nmtH*x8ngq(;oHxa4Zx&$;V^AC>b(bE@`QA0^zz7W)qQ06 zu_j9E#1GnUi?Ba0!2I10)|UQC?~CBX@YPnIsv};bZ~Zx?FK~3FITZ|7PS>+f6(Mpk zDe=Uk4!7?MsHxjTZdpNSBy!QCYOu{y544D!w8(4fW>tcAv3MUokvLroSvb%2q*A6J zcV-KmFYaNwPhJp@<1n30dsfjS=X`Sw;ebk4Q0skmnz<+zUJbdkR#*Fhb^)#w(3iLO zo@$nMbKRtxtnV_+h6#aX5uz)j8<1akpF(RJvc9zjKqdg7+u=w7!ZxGsHdNb^9A*h1 z#@+kxi0wMppl%VcTeMw66#;jor{j5t{zcP(a@yL$duqITzTA`Fu0tB}OQ1@G&Zp&V zAAE)%-g(imih%L{B9-pv1OqK-R$MZdYGKI2l=4)j)2MXba;R!KF|`*kuFn5%&OlXH zRaK((c0i1bRZCQn!8`r9QLc0vvFy9Zp&rEkVKalg^L^Dp+bvboYe?#0l75sfM*fU- zS^5@_(emKW9VRO4=AM@=m7RXXx705cGNl~C=iuUG6eF`N0Sxt#qfbZEU#_1rLYTO3 zGnNOdpRF3vU(=A}ew$I?)P{D^8p5o!)ar40}RO9sd1(@BNE30r${m&ep?5 z+Z&MH%`8~>u5}SptT+BYk)8iU@9oD9w_!uzg!1i1UbI7s6}EEqCIXXuwl91;qhs;{BNYCcivAeEmb9jr@k0BAAS?h z*T(dUKwL}Q=}u$qPrSGOO8cco#;+(SCS!u7z%Z|OWb#>AMWa~d=6TA$BmSo8?ywjD z8-_08&ZUN9|8*;s$$R`IHIE9Ev-ju6MQd%j%8C#E)-rErMLAxpVvi>t7ST$^eL+Xc zVBp7f@0I{{dUIdHNOmbsutZ*KHy&1if41PRuD+eWxabq#uu1V`i3S zoUUddR^8Qqu)^wDzy12TwNX?f+86bF_Y`R4rPoL&65m9miE_3hyx*He51n(&`(cI)H-Da~l_P397Gz?Y@92c! zl)Sp^d)|ny^-A<4(9QJx@5T8~P8GD~UOQsd+P(eyHoNX%xeH1;8H{UZ3=WrznF*jYAOQ5}x8o@Lq1yf#y zUb}wit5|aug@F?ncdC)R&l?k4?SWsO?>pri@I(*!1aZ;{#C=CBwpxj^ztXoB0?my_ z(hBQrfOz&fd*VY68!Z>Xjose!m^Y7Nbic*-l%`oFWu5+Uoc^{?cd2%p)F11y|K%!s zz4-}6@I0eGKlMJb->As(JS9JKXgfK3i~oteU}I`&vQ!qxok)~d(;cOxad?+}bIXTAQ7h5o6k;bEhp zq2b^FCX4+pr%Q_q3qKdXAqD4wa};Ezzp@$Cr1o#`?(D2|`o7LpY6J9(Id$eTi6>4U z_lxU&df)p`1ps{^Q$V2Ib2U66ZHp6(OL|T8e?zlUj1G;iBO!p4XsbqsJ$2|jgC&YP zitr*X%2AoF^Fg4JF4Kp&@dog6btjL8v+YylaAs4d-BEN+InMEyDA!V%Iilu|W)vV0 zqCUav{@caXY_DWp_d~%BYh6Ox*pv*lS8+v~DnFbZARM_1&S9;QdSxMXVXrA?R6BOk zR2Q{_telYYDsdU@H=eJdFeuCy^n!J3E_NXM=#8^MJp=Mjnu(vYT1b5rZKXpBgWn?- zV$I@_`0oNutz5c+L3Ar7Q}hC6{_22}iLqHdEOjAYsb!CVj;lVNwZ+zFp4YnI(U#JG zkmc8bFo9oW1W+yS@?tE9+I(HJn=jJ^mr|pNSQ@hrBOC?_5ckO&Qw}^q0tlbP&r`~& zJty%xV-gh1B`USu8;%Ge7GJU)o49_y7PE#p;FyeVdnyW6M|>}Lw;@v9h5Qvq!1Gj;No+BrSi27-S$LBdc+PE-J ztBc?FKUe@>jr71DJRWi7B=+lfajI%(*tMqaQ|c+RD@Wx(m*w{%E_GjC*Q)pabYFy)!BWglt^9z3-5Fkn_WLor-OA(< zA@4(^TFB-$oI&(=)s>h6!INey@2+WGn@_FTXl}kX%AqIndytI%O?I%lKF=_3I^-QF z{PL+0rJ;rJ^3U$e(^x|M&Q?b%C_lLId@T2hVe$)hq2kDlyx~R=% zR4LVa8pb1O_HyJx=8KWiUw`O|a4}Z|F!70#aD2;UZ);2W3fs1PW^P)cF&s4?Z$#}c z=ILY#)_)tdFuz}u%oLvm#^7o;feO}@9 z;?bU0GNxeWa>~(8<6C*|EgHz5&ujdq)v}cyyKKGiH3N=zh{ABi(Ji%(d2gY&pXn$Z z%(vUm1Wn`0DWS7vv7S4P3o{^WeX?aa&f#o5AEH&R2I?|l9;5<3rH2wRQ4RH5B3bWa z{C_^7e>TU*!2k9h{%(~ZKNjOYolYO`?jXcR6qt5SD%n&!$m0$0=z-}W3@uHJ7Psfg zLDW8LfptsF!|7Jm?lfd%;GZoqnmkcN*jY?Y%y2XRskb0g+Y>v#FFpuSK`mLG4ndSW zzv)FtCH;N>M9?wOMt`N$cvVATv&&sm)u_8l7su9w-%Wg}6DZxkCKoZ~1m|Rwc3@Pk zmH}Gu^3xW-w0f%D!Auc$rP?5{jgs8EzT6ELGcMtws!%C_Fp*ZbQ?Bs@2a5R4?nyhw zNA^3jbU(0x$Tw08{bJzMZULr;Ov+U z<$%BhFmXm-nbYGw`F7cskk>`0)+9_1I_GWt*el=lP@(4f$5h-I-!)rZc%79-qvORVGVhpoA|My(Bq5Each*1P(} z%d3bYqDKd<-tO|cw&M;B@9PBZPDR9dz~2L+O&sj9}6qYEN~WyZOt*2j=CXby=w zkX>1JKj9D}C%#FNeZ3H@gpAye%{7;AS!O3&)0J1jGS@IqPsqfhZ~Mcj;X4|O=ql@0 zf^s?wJ|b3(t?XS_<4F-Np1fld>(aPEf>SmyX0CYEA_CRd3L7{==s_@d<; zuMQ`qxLi-g9nap!FN12d_wGDi4@1Ly7QY93;YswKLfRuDSTq$cx<5b2SK6drb6eyfnxDs<=$CScW~SpiIFdkTc%sB z6TfCnalB4+YntQzs6mY7r=d}r;9G94$^Be)y4dhBC01{cF|WPtBPey*ZgwyCt&1SU zARp?W^-<~d$Ejlx9Ky!OZjW@|ekL8{# z*EOAeZRUc+Z7f*Rt?Qihecn4(#xN>_y|bbn;nRGpCSDP=8K_Kb%bZ`w)+*Fb7fp3` z8We3nLM1jIAzphnyP6?^FurB=Cv}e0 z3{r8wJ+0g{E@I?=5&AYo>GkmP){5H4|Cyy+6Lj;N;v59rtzJfR}qk_~Nd zPtD5pBMYJTc?-O)D{ppt7SyE|w|Y+!A@z&e{PtjaUhOIR_ow<8UePpXPHkORxs(IW zjwVInnaPq@$oFc}U%LBZBgbS9wxc`khw=N=)PIVGBE6qbSl@vBv_f-5`Ec`ESqAa8 zcC8U(O$qLOMu#45UaeczWMi>7n|xb6jvt^$eEt$CKx`u%HkpAfE~iH33U?@on8^(p z5$fvtuX?&Ix0m1@@Y!!uLjgr)hCQYQbj~)6D3!<`ZM$nT9b-J+kCsU#wa;(YC7vHn z*87gh*WxwPNdC=FHvVw&ZF;D(`vftsfqoeJdU|Q;=T;CfKZ$ zig@-Xs^1n>J$8P*VHF(OAiW|$Ke)|@e}~w_>2}o-n-vRFJl~<;_}PgKbbjwVn_NeV zg6vaEB1GhAAbej|$m@8qKGTHoUkJz)KmbzgfOK0R&?ZTTeGYKWd2h)A#0|YC<}*4# zGzc`mT1^Jg`TK;%1=`%TMw%_OIphu&tS{Sy71dD_x5hUR8i@YCsVIkP(HQ2G=} z1MKe_x#xqV;*xd~*)zpf-o4{%U-~{pj5+an??u4clDNp^SU~k?W^wf=AA5te+}*kL z_-60==5d=&`X}!vL34Nr*LCaqcUJq}3dVc^=Q6pqde&X3548)l-yD49d6<_Qp{6G2 zzaAN3BhOHX|B$MJ^8?auTOdKLEKvRsKFor@kqmh_dAXd}uc7xXXT)O48AlEK85(S~LgoUD83oesg>e(`#>_8v~A@dk@s5Ne#uLH4u};S$ocd_O0F zd(&c_kZzV63*(ch{7*pnC>J?boXdlVk$HdTVoOG+mvHVJ~^j7t1X zKjY^%3D|dRo#JO?rP0>c>TjQNfr0lEH zroi@vpf(c2`6aasOg8m6sZY%MzVeZv2JHBTO}rWB4p2jn&w;p8bq%)%-hh9A*Rf`{Zv*e!{~-d%;saCq)zx9`Af?ssD>^=C_KdcS2g3aSk`t zSCdUiAfc&`9<~|0eJsvT2jqjZFrbpCObPKZgk=3^@0D3Ti)oqp)Ma=k9Gt1?t^iT# zU}!yQt?RxksRwzR5o5bHa}=Wcu+~7Vgp!Za{EmkcdjwqxUc&Kx7g?M}_XgP7q86Js zsR^ijo#}(k%=SHLGiC#f#`kbuJjKQO5>+g!TA5i64|mfwgU-!OBZGCm+4=t$dQF?p z*g<*CA)DjtwSOiS4=;zcbSV%?!X)4L*>;Nt@+~hd2b?)7@))sSj6D4Qw&j0_kquSMy#X73lJ1<6~(VF^}-OKhAf@epi z(Z)fbZiRe$W+d}|M|b&oI7HHVVW|`a6GSr0d>V6L0TN2m(c#}I=R$@8b&tpV`Eobw zlGJHF@uOW6IbVsTU39MsXMMRRo{pyF@(yY+$h1eg2_?f#{93%T{oo=t4))!Pvjpeb zoN|)Ccf*rj-ykde%g}qi#UYBQX`jB0pIy$OvVDaIqWHwBw0LPqN{Xs_E=GSY-qtur znRes~f)Q(T<3U0zygaWFo;RD4kkE?7<(?1ky7s+%{{QQ~lsM9H{<6>f$zx6vw5SrkN z@9h{XV_E*&?eSG<_`_M~JTfvP;m_|+2wT7NyVU#%_;|s7TZLnV>4|6*28;CvnkI)P zfP`$S^}6;g!*KGGk4C!qdouC-tJe!dKqzHzC5^SN^-ka}@-+%_93wbZv5i)f=U&Zy z{2}_Mig_}2D8ZBW-C8Ivh5CDcArdcWX^gbB&_J6t?A$opb!l!bH}6}GnMIt(tEl$~ zFaVSID*;W^UWh}l#YL~uDQe=B0dup?2eBHghHK;*8VkcPzmn`@krDcVp^djR;hICk zLnd#7*jL5sv{FkRS2`Go&iLG_EF2~@*PBG)7G7t#H*6G=VRkEduEE7=c=vo8mWnLO z`LnS$`M&deBRnUNc(Jfc;%^<+_Le*Mv+r$_`;+`vJtk8-cxcyHj|+W@NY@QdPcHbw z^BJF3WRhx~8kVlVE|#Uy@TI-=D@x@3=A1Sik#=zEOIak6n-mP$NsCBA`L-BK3itJA z#>Y(7|HIWeM@Rl_YrkXLn#{zujfrjBwmq376Wg{iu{oJI9ox3ew}0oJbM9U5TD?~P z(fw`is$I3~+0Vx!K{}`QbH5B;j<*AEb}y(v{Ft`gBv&l#m180Tkqa#gAzEFuotsE? zD9HqDzCbEPzm{BGK|%NJJy3THZeVEAgW=bj_dcG4`Y;{s2#{_E4lk*$lZ#bC@!?~@ z2Yu9X=>z6=0t(34tn->@v}wESK9+*R*W+RjYRQS-t$QYzhrCVwB9|y4ZRLi?QBFF0 zSq@S*6Xy6=u(IS9a)Cm|e1v?J^g5Rxj7^Vr8`_dWUB~sX>nFkyi&hvqk;ls<)jF6;bP9nvFp+q9Ads3-!MB%K0l2?##Wjp)z&v zEK!TsB8AWQXZ;my9iutBr8K-%ByNU8K4t5gG6IDsD9+eRmx;XxM=dt1`_ZIB1cpE0 zic;-+%{OiEi8kCOKufVpQ5XkvDWUM$!b*;HWv{yZR5JQd=N79<%I;F?B5JHi@}U$I zL&_II!DLCi2kkmd5RF*pv*rW3G>YzNMUEKN<)b!u_1U@EFH5gv66bQYT@a;S>iYKX zS%v)uBDf}(QRT>RpULRNnEa96hQ_W^K8GowS$N|;kif!|#EqV`lH+`Z))VH+)V)m# zI7uJA&&1wAPi*#XpXhuIvf1q3VZx%rk##qO$#L;XMiC;Jj64{nVfu;psNN>1VtO2l z%h^1(={?hHz448bmWTPt*U>sez5`7!%YV^aPS97d-FoUIOVLg75mcq1mt$Apzp8P> z<#0M#OS{s!$R{dYMa+0&@)VUf8*`6Qf#3b1utqb-;)2U+*i}yzSFr{A<{LMTLdz??IMJ>{$32fQ9Z1*`HeCb$g&vBKy3e zkJ_zMBAf2KHyppWw+Br$IK{a*;+HJnLRf!ZP}xMHhWmIT2CYzB+}1Ym(~hG3l+w;W zj@)5Ej{f6%ReQxeLwWkR#dP_NIh&1J)qiP!bCIZn_*Tz4*J`+&(+CIqrl*-S+DBRT zyas|dS>P`GrDifx=&3d@&2iTjd}`^$0$^*j#Q#`V>*xQ^4D%J$IuEZRG!(z!tj-s0 ziMXnFj_o&_O7c4?&7@S+Sh=Mngy5IK3rdbw2*B;bK2po|B=pyvMvDNxE13~H?%}1A z=V02Q9j%1s^+Vi~`eQmc^3Q&Yu_<(YBeQ)U0kN6@q$VQst#=jz059|2AK}{x+MyWY z_EXJk!^7!{@C%79{kbvnaNS#ZB5v@WF^Lwn2qWS%FwJBU{_p|7q5oRiddFjS3biM( z1ZLX@ur1&*aR_1t*ZAHYLfrBfvEI`<`wtjN{)ABmdP3VJSa8dX>S-4(hut4xJqlg# zCBMKOKfO!F#7es{GNEmU%W57TPPBkBW`LbpHu|&BwAE8RuM|ewozSN7J)ZY`^NT12)?$9|>6!3Dm+SCctMbZdA<{VpcW~yq5FSsFFJT zDRGdeSa@uiH`1Yx#h(49A{cFa$zAFGlnidxJ)NZj^TJV7+0#c&DoW^on52V%cxs_q3IDq1LnFOSy6R-IOZ-011LJf>)=XuMn$OXe>&;2-+Qb?VY6WE;N42k(|<-(qz83P*G|N0mZ(SdaFCJ8kqehC2d98?pV&KfZv}obn@HaPXfQmZtQ8=g+ zT_bUC?Ba^qB5iK!R?lN!)P}K}bXxzW>4Zky8LybB$$?Bbb zrE&`Dnck$b;mF5K?%zY^sHlk$!1oji3h4sKPn`O{xczsBL2c19I3(E|>UMZqkGT}8 zb!O+I*E{+{O85s8ZZ=8d&r;t8>oux=RDlZ=S1bqWjp0&F!agE--!YNA&T`vtUN$an zpw!d6OFsIf(ap1*E@1;gZVIgLSrf<1XMwvreRB)*3FHmcTL?6k>WFyJ*?gc!wKUg3tfyY*6A)@)kjWAz<%As5L=rawbdNKLCqU6xXAd zEo<>dA)y`2rGZ=Y!ziOTx0SPguZuxJNeRJKNdo=9KSNU{4?oZB9cClqU(dRGY|-Kw zkv*>p&~i;bcvQs2PZ}2MA*f&rzEDPHvN_l>fPcL(G%g^E@Is|gp;STr{w$85DPGO1 z9#fE~`h$XgC2i5WY;VlLR!U{mj;<*V?fuSbKhJr`fG^ghtkK3wb$3`0lqqY`0lmA| zmU(rZvUF&eQdGZ2pZ4QYHL7#QYDxPOD=CsLGiAdgoFwdgt%*=M?aZqbET9BZ<+vjb z1pVR^-@lRV<2SM&cG^78>qrF~zZ4S9jJ9)Eo=wcIswup6{_;*%GHJUIOTLQ!C)@pR zT>m?~KN(0RV*V$jK~`gJp#1i-s3CRx2%o#u{a7`Dop;noAc68HS!2Ti73z{3^Qg7! z@hZ7e2yWNcHy0Be-F|FRMT6eyq8yknQ|LhGwCYXY4tUCOYqTDrT=;E?k` zp?~uY{`Xr#f^WkJk4|&UXzY~sFr@f4B10)z2{QopEDlb!-7V+k@$X40-)uTQ42r?A zFrk>q0yPl>(Ideq!k_?bGHv&3`7y!T1b9vrraHHT}WlqS`>KA&f^^o#KNPrTRSW) zSWj*dp_-d_TmSo0pzw^UPNJBNtk%OTuTH#FTs{e~3>_(wcJzsTW(%IV*Yx*EHEWTj z{-5h-w9nzLgte@Mip8k4MsPU949kXVX~Xz}zbqoRS z%DgJ6LNqK7tf?P}E3w#vH_9s%p3bAKjevc2uz6%l(`7gUcA6WP<@cjmSae~H%!Oi( z&el5%3~hDzW>b?E($&J2HU4E}z!6pxO9k(pQB(>FOx0qg{(~3NhTdF}LepFLHG&Hx zZt3@B9qZ?5W=1?}Vm#*6(k89JR9-Uy@`RFIfjB86Z;$p@x}AM9DZ|1FX^rR`!3@P* zU2=dB2?0?&B*|}0#JWNBK()e7Y}0*>F<^T7^Rsf}v?}z?sXk1R_6ZFg4{f9dQ89UX z=Sfb{@@Pdy~u5B>}yrhTMkmbwR#c&`gyn#Rh?1fnOBipWdp8cnw(%;i$lUx2TyQ zBpsqy%LXbFd)W->75Qlk!AONlwhker)h->Nn!y1HLD+ymgg}fs7)Nl|AeOJFxNEp6 zaQ_X`e^ynnKWn8yh3NacyY6P3pO=LE{9T~3w71uKjfGzcmw@3pX3*2l!^a7m*__R4 z;g=@+@rI-%Kt{prm#XYK);e53?L-l|iVyzP?iWJsfET#8&;Muvj%rIo61OAP@Na8@XufL;&^Xiy|x^5yXbE5wUj zPRF{O*4!+Gqmf=jj|r@I>+l$%q@u+8Ip>u>atH%qxgEJDy57H~AgefB34_|QYe7%- zqQ|P23Nu&m`gAvi=ryhJVyR+1PC#9hK3bzayF-V2RMmZH`Cg%6LroYy;Uii_fyd=d zdaI*C_?zH4K?>Rp+mPKx+epgc;lBTs7)_6ewy+N4`h{u(b)xm$MRB5U6%n+iw(F5G zf3r-kQJ0Qs?Biv*IY(_b&)g$y#PTQh!=>n z1wjYq4{Z)`?-dZjg9V2NBQT^80%HXO6b3a3K{;X=f{B8`mrc4jAnF2K1BZlorVx)1 zbivyFn_u00)Hv+{ICap30EMqm0L3f&Y_VRwEx;58E~GAmI9r0*|11Lj`;TnX8kv~f zHtBtRLj^e#&KUrZ0)-FZ&3F#rziCLNzsei6N%rqEDaPp~r-dJs3ND>LypCLTM!axd z8-$5?rN@qugs*mQ%5SwCU!X;o2bJ{AogQ=j(bbVt6OKtp!Zs)4L+N~_>mXzzr!gyS9bYeZ*IFYxzjYSbXN_^hYyd27Ehhw zdV_^dA%(N?$7<2k>!avQK;HI$^8`KuN2ZL+o=)Rpjei()qqdGsDtT9w^t(`s1vCU8 zQO-Tt!M8|PG;_n-^UUdL5mw7z<#&NG<0P-!pLT4)>WiwUy%Lz()Z0#{huBysGB%-P zYPl#B&~16&Nyn#w{% zHraf-6knmTVcCi8aB3(irJ2OH+LcFQaVZ%cxVZQB6l=V4R;9F5T5vLs_YUgPjkm%5 zrOHu>8oZRO#~8C~gAlJS_}hLZvC5IxN%`~LrH7>k*bEVM?;uR;;TXi&7fJ!MVjRU3 zaeB}e^xZ$Qps^ENXjL&xvFfic(q;(m5g?pd5HX?TIFodY$+V%BIBUU5U} z-r7-}Gx##FFM<3_;J64$;Dr^)x&X%@qFz`-Do4z?S~GXbJd`ZRGk`@eb}Ni47)8MU zj3k?&WFdUO7E~R&CY)_R%cP7UQymg6Vkh(q_&LD{%fBYn|Bj@ft7OZ~ZULM&*q^{+ z;dpV@Hd`n&Y;@4v&!76Ld1JFWvC$1gz^FW(9_K#%GF#fuyYDiV&5pie0P^ye-1nBg zg+hG`Xun)@y6R2mRbZjLJWDDP=sxpzoPF?vaj+X zPPrk|!v4?qoiZF{Dcm(S82vCdDNMYD3M^yVSF@F_LCwR1hj(Vzw!lXeqt_#sgfb(0 zci9yUzSob?D$ZXYpSMm3pGm5}uRzmVQsU%*d2;A_Q=Rg@jXw0@YR zkUKj|JDgTt@9W~RTztzb)Vjf^$ESL$+WkX~HMR`1XeY2`yAk)+Rc|r@eZNrcx>TnD zJcwFbG^zyzjzC#sZi#TON+H|XxHV-JBsj})rjV2E%`K^pJfQrlW;zjU>ad;|me$g4 zZyzm#Sxf!c7P~nZFmH~pJZ27|&<3QrY7dV=YS(I9J|vIw<&0c?P`DbYUf}@g-*=>{ zR4ZkOVYHawlg>I?Di@D^DGXdkho{b`M}8pz*mMR~go)qoIBU58nvCe^kY^lSyf$^F zIm0cdo9g;m4-{4PZqF3#iQzl@76)Ke(EOYv43(&&V*173+RY*khQ9Ch=T$ek?Z;^} zo=uIYjz&g&z9pT27UB2(<1-ar^FL2EwR1;&ZrLz$wPBX<;KI{YCxjG77rNvshT_o8 zd;Ha)XID}t?$e?%u74{e4#HPgL#xIJhqQ&X$Z%fi7#C7kD(LgK2Nbgh>)yPC zbrR&TAo<~ag~|Z~{0t-kGJFHyqwoUPhz=k&rIiDbT?>=eZgu)u4aL^XIk5>9=x6u04`Etp5iM zO^+mr#op9}dKcH5uvv+6=x25$%DDNY-kJ5woz{2e>Y4Ar@uEVt?R^0Y;z(+B{kaJn zKi%xGjr)g0zc4GH&tE;5ahNV5uPTgE5oo^CXEzh8biA`^ zjQw>=OFw!}8QW%~`pWlJz18^W6M1@(*j!oM9D;B{vyGizSsQt^CKg)&^i_D$VeDLn zg!Q@X{@iYQwr@xip~~?@g3?l zDx8G2CqAFYtIjbg`+*->2o356R8R>uxv;pcR!1v|SvK9jl?&|${x+hAWZ(PVJ-3+@ z(~%}3o8!y=N=8Zo6I-9m?q~+k(@A*|ChFyJ#}*^L$o@9+=b;eH zW#ln4O*&oy3XI<)P^r{4Un~kIxZi9g%P2=)Gpbg`7<*%(dS%Kn}$ z+yzQ#<@t46H2bV&+{hqGHW{>Wo*vXp^4yp0#LdlY?zvDa5WxN}LOQfbS?)=mY@N;> z#f%xE*NxK;rTECxi(0ZNfU)|%xRDGoJ+hK77g8quEH8YwBdb{8w>b?%QK2itFb^t} zmr+#gE3B{I!-aJbKPjXyd!&ykgc>pPsa5_y$Fhc!Uyroj?FGjPR8{=;ZmS|lbNRRH ztUe#J3_D4tvZ4&!_iMYl*|J5PlbV%!HHm;Ctm^3%J3wsfc@tmb-S+YH;pBFt>Rvk~ zpv#K%|5m{+%K-f*qg=c^vgdZ)9cz$&c< zSvk39-;Zz%%E>J5+Z-s6x9t1tBP|V$`^jAS%lEmtIl$HP8^~FsP$Kr_`FUR8=b*7( zrzbNvx7YDZ@%c)VPLs772*+@9b2FJM81P`*_2pq=a#CJWl8T-ls(0Jbd@>7UYM-8) z3pD;+qu+&(fzjamd=t<>00-HR5>iuhvFILjB|4QSxw|(sG)PTj-NdE{C6G}km4=0u zGa8_G74qoz{y;tNrpr@exBZ?t9bMt?k3vJuoP1JUN^3?&F8=3{$Z8=eJ&DHnq>-aY z#n&0k^0YFN1-xNnLQyoh+{Em1DnFzfA1A@NZh6O;0#K9F?X4s(?WBjS5{HQm(i|z7 zg6rDZA#twHjnA%cFQ38bynTog9AIbEcGq2&7Gxh( z`hAG%FG9(cFP+2c%xl317Pwr}pKm|xoSI%GC~Lp3{m|Qg1m?6bo~v+9_jtce#j)xi zc$7Uh$PHjaOA$PaMwh4}{vHX;&Y{im z<&RNcSeL$UKbK0{^Bm`VzD%hAvh)oinjk%cVc})+shV8mGf?AySHU;} z3ZqF7y|CBI0v(Qfx~B~C5bYzgKhDnPehFQ!h8Xy;0fg=>A1CYkTqx)lOs>#_kuNrV zSfUF6Y*PHq=8*>#$0lLp=VDkxJ>4xM>?U*NgA-;F%!+R-K+3k<=&swq_#0O#|NhKK z8RTr(nh%j;{3f5{nJm0S$~(gS8HF5%5h;aVS9hLQ?K%3)z9|fJPU+ihjh99Gqg!UB zeBHZfB$05*RgsyxN=G*;mr|84L8(L@hfgK+8p`;>aavV8hA$6T>hs_?L8psIkb!$9 zqBvQx#g-cldaNGmRhe@hA8!JkWIgZpPAVMHZ#xioVx>`Mzy4qxs@M_2d{zxoPsDJ$ zysUk^os_6NWN~S7xX!_5Zu7UQI?0dcmhwN6saabO?uUEmXCQH0@C}NHeijcQJFCD(% zRzPER7EW}6nF$QiY*2?B3^?ZJv(oDr;~z{~(zi5(zYND_@3U9-sF;7g>pq{aydMG8 z1b0;`zjt}xPDis2E)II?^JbkoVcfHW%yeCrt~K=CH)%sv>Mg~d|C?W&&H$l>V&B;t>)&)stTOL35M4fMfvZ{UGYLHtBrk;A z3vIoB_gBpjvJUDAiwdkhXyur128<&Q0zd@;A7m0>!w280dTKy@g;{6y3sf&~Lr;UQ zg5pq5{tJK>I#)Pk1(l-M>(2GYZ~&Bs7*{nzfuy}_zy{g>tW8j14dBGY2F)ij z-WyCJLA?zs=a=9|0pB3*(wI%K>9y~Urb*e^oAkT9p&%j0GdXM9RsV5e7%Hu*I=kK- zT3A?UZ*K?5-aOx)oZLc;214!DTH^<{VIhC4fLkJmxjf&Pxw+j3LBN0?ZFKSG0&4}V z*IJ!U7pe)^EETP-trZnfQJt zar)G=-qQr9)$P0)EZ(Un8*H`RStqd)E(lKRvM#AvVv_oRp|^6G{09@Cn~tDx&tsTY zckr)SLHnlwsEDB*qst`XYO**nx8nZYTr?GBPvG_RArUhO*V){};(`8-&yf*_XsEh* zlFw_8sPAr*K+Qd^S%}g!?9fJP$f?q<9!Q8h{M9-A6ZRf)3-~j%K>YR~V>BYd`9@Z9 za6cqOYvj0%ya^+C7E$cJ34`~7W^=V3^Ak8Rzrxc+f&U7wLaSX1h0GVfdI zXTxLo2jk`|wgunx?sQo_5v%#T(kOZaO?PuRhc^ z**&^RH}}rp;ofbLzCR~k|9=LV;FpOEcAYNorjQV*umLy(1f33dJ9qbH5TG42^gyc- zd^|i5CgTi*oB+{qnAq8oKu>A5#fu$!-3ppFuhngHetkHvuV(>(pg8lhJ`x>qxw(QS ze-X86_Txz<>^C~wIHvsI@&DtoJzaY=pS@%4!6$-Y=DMe`Gt;`&$^aQ@o?uWd&zkz|t{e zE>3KVb!8TrLqXOZmlQl8eHqFY#3!a{YA(^zFi;U--IIzC0#~GBteu&Kb8&vMhMq_| zu6iji|H;kGms~>6UX0gfIzloh#EPjpkCon;0InY+*?&-BZPiq#Y#i1#N>^QxSG8Eu zl}`d0vb?;HUWbK=g|&qF-QcNaVm0iLk98SfS=M|aSi;IVdZ{j%f(a;#N=pDXek><7 z$hJpkPyaprXdw;!VAvjn`UxJgCfviZV&1eVh&hV!$b+~FbB!aZJl1%$B#ZC%aJ8V5 zUNySV4YtP#;h>T#G^Q-8JNCB;)#4KH^tN z;?=|eSViLt=)AK!Q@TMUfk5IN@#2Ub?3Je;K%F36wXvf~Uvz7(H_EWWF*rICvnLR)Z?yWTUGC)P@NP=MyeefZQx1bfH#*ImmNLT3TA8 zR^P97eHgUIF{h`e2Lr9oRdGPJck)?W%oG#{59h0wM8#!gTmc`x?Ck71I;%%VM}Pig z7}S9-H8(e}&ss44{RG1M%$6z$dR`fVSWyH0{TCqJ$D>P2Lqi8B*b^xg2nh+*YxTQa zk7rIuzQm@crCC~8d4bIKva$$4d?PA4y785j7n@jIQ0;6EWqTg(1;(ellUL(2cl|Ww z`{l`Q(r7!-|Xi3jUWo|)ICM_D^(m=C?9vqFf~wcPA| z?u^$az2{7u^Hd@iq5@>?mAT`^&2qohmi0)+ye4^%%2db^WGf18*3WQpDG zT-$u4Lh<*rhvGS)H2q3*cy!bljOf}z=RuHxNNHdtf$tsQM~-=nsIX+i!G5W(Z)4-*YJ1@0;{()z(gbQlzPhpmb+WF*j5@lz z>(`sY1+Id?fSsP#!PSpv%P!+-%>MrV^T)YnAYc(bv9uC3h7d%YBpK*>bg7AVO;S=) z=-~Jy)#uKK5Ck01x%x6FE0n8xf4q4G2{BntePv~3{RdbT6A=L(mz1=3ax!Yj1hO4G zJ3F)H#Kgcz%*wL0vO3<~#lXR#^kHRYp5EBV88i+bG$j@X54z(2Cf5@{K4KiT&Rbw= z?lNT1lG1De<~mu7)bTSicYks|QPT88m*?^?5aOU!BI(54!2e_8({|Y{d93<2ScgHgw@c5U%QXYuk(zM^}p3T z{Px1a`XQYu?R;`@DwhuH1e}foSv6zLc3bqCZrkSp7JCGd%6db|9RW5N%?D%UxnRH2 zd59!F*+im)Z#wr<^$*{a$0{Spm=#2PWE)d`{2AJAh|I0l?p<15@0y5sZ30PDl(m!4 zymw~`Qy)K+FE<_5*7R=zHFfpX({jkbg{H;wQyC+@AJ3Tm7ht)QQn8C6H(FJDKhr4Z z@9l=Ys_m8dd^+ss z!_?QMvbkjO`FT6YVDNjjGo9YcHdARE0)6fuE+P=P1KMB0$|f$ zY|}tTTN(9!d!Pw0OnDyWs3`en(Z%i>95j?ro@rtUpkmW=-;U)Lv(MJp+*3>8c(=Kg zlDaI9A-Z3}au~3G@7{B@lxx7kZ#FS!a6dCIDgBOP_7f_5{lK5G1WcjaadnA7o43n= zfFGLicsuC4gMa2va+lF-;#eREZ-LY}w|`s^RreLhU+N&CUJDSx(i!rS`Ri^|So%Hn z%Gssho&T{mHT#CLe68AND$|bjgC*^hr7SU=MDd2DLrT}u~e)5rRTLAg>Yk5RVj_=Fu)DypdWs(oj&SDm_;FfHa)#2DWif%{41zk zLTi+zKBL{*+aR+?o93>#mVbS{Y!BJJ4$Uxhm}kM=Cmnt6 z)ZJ~@Z6t|^Zk|sApHsC%!$W>JK_&L`VI!7sTB7_YgjtFRk zYzP41*1OWxIyy|Rcl~*lRVzhL^l=3@F4_@bC~$=rE`AJrAY9qM4J#g<)mu-JWgbXJ ze{zd_eUqGuyR7r>>>3$jp!t>X!FPPW70EYLaKL6RBG?p5Jr1eh&_PVT8a0>N-^{>U z;-U63EW})YNx1TCvC?u!Tf))*^AL~YU1FT5W;{Pqx!;%5!3BlJyEcZ4k%j8gh8I51 zU*%vX)-`;aw&vR=$4-(4_d;+xGC>|0if~`$mCLCEWGTD0;9e;|lJx6J=~I`A16ZTr z73&tfE7+2dfFpXEzM3~S6PP)eXb}1qz9E>Z0}&x~6ch(oija>G>>ilDr(nY)%NCU( zlq(G6J%-TYITrB}A{~@BV6Yde7rf4Mv7g+Ki3u?coGEb6!)&xPu&>uK5Ah0GoGs`7 zh!LKy>+0*ZTOIR%|Ni~s$B)iVzDTv(Og46QEI=+0CN3pK{XJvEbipqV(ZFCcDe0e_ z0+M#6XJmkgddf6@hxqx$#l@MKI@~InBd^xh=|b`B3@lxT@q4KVgX+& zT4iNra7Arxcp){l`Po@kLBW9S{tytMeYMF(Lrg3Lw5+T%8UVnCh@=B}czFo|RcR(h zN9*hAwj8^${*Vj^a_jI-+lJ#Hn1Gc`h zf3i{~Q9$eRqhEm`AR0Z$T*@$Z*(^gA@I8pnR}4G~Hr?;u!$IP)4H~;FAx4na+ApbQ zhp^~}Ljo*I3KGB}(?!&FL)}`<&>f9x$<~ClIPA=d*MD{6?xo^L>si5`gse2xW=&3y-i-x}u@MKR_wpsXn z$W+>#6I%gQc;=3GSUF z{vv6)>FTk0st7spM-xOZ3$W6jO+Fhw$Hb6A*XXC;`1XZp0?3_-e4-l{y|*uk(LbBN zZ}gfU#qCo@;TCFa%7T3@ZamXPp{r*=>~gxY4wY0|{Zo}`=i{et=nhdt!ZvF;*X|az z7irN#yyK$deqNXl61kP{G>^rc<;?O70Z-TVWy@0r1Ep@=ZHy;PYg>|ux6nmPy}D{T z95OmJ)+#RFnizPwp=Pn6uLak0e;h#kePEGCy6H|%N(Ztu;&QqFbvK#pclr{(qYzX7 zlcoOserM&&Od78RknAd*9tP1vpi|9h<%eHiu~1vW08qz0Ryq2N{LbJ)yER^!zl55&z5eA_r{eBD6y#MJ?_>7oo+w9(~u+YJ_ZUh<=v9LJ!T z^I@djWNZ1hm3rgNd0L{;&gL^eb#Wy&Mp|K-$Xtd23OCai{`-fn_1NpH*uiZ2#TeRyF3NvhYyL?dqSWsPDA{Ze&QslNR(^|d zDDJ}+H4tm@7q2*4DWMy!CyA;Xe>xYZ%m|QLWw6g>P(`APCZ%EX0_qIFslZ`!o&+Ie|aJYOjPEKrIFG>6U!iU|CYo*{2Z z2CAj$j^X);(|Qy0c(TeUH>^Z19~5#Bx-m1xw7R{SLz^wxoPlE4^*GDLwx$7STI&a6sghbO( z5T8>kj!%xX1!6Gd05mKIIlJqf!SL{K(B{8d zPn8#V?X$Aqj*=7)azIZ=05`na>H~3~Pfkv}yu9X*6G26;(~FCoq$D<%gK^Lkpr6Ng zuZ>MiKvlK!ni~Cgb1f}AfS8yVASES5E`!a<#RUNYArgdu2C*mdgoAbU^eU4VK&I+| zyZd`ErgT{dh+xpGL_|caR8O%*do(C6E@*<9Z~8S#`yGT&i!&B;svAtD^zBY&julf0 z!X;kDrnMwsv^O%*JW`ORt`AFB3ES~hhVA&xSK9^>WtX=EH`x7-r-z)li*M|PE_r$a zsyY{cC}jq3NW!EMs^GT@zQr2Bv)m>eO>l@dxOZnfTi@IjI_!jy>gXRIo%s0$9ZRT@ z%~VxhX5f$ZG`Jry1_&rj;#9b0xLv)a1hvm6AB%h@zaNq@-aNZFBb=OPBQ>NF`DIPa zU0&3GE?N6bXnWiqpFKw1O)OiUr!~+%7&;(ldzlK2?Mn|S*T`CmB7ZfV_oVNlXJr0|-*7ltLEHKakcIxy<54_3tH!atF#VWNJta~==Q95`x zMDT)*X zZtUH&6lzx3wVXXIV>MUI98IiS;K&|fJ8g~M5|;elbr?bc9_LYcKe=?+RS0r+^_7Zj_Sb~>5d%?!o+JxUVOKE8ZH1WX)0_Au~2m;rLc zyaNCf?s}Q<1y2SHaA+!o0S@P_J^(-{7yU^;qf)>sAVCk`-|e}T5$9x+>uknxbIhRB ziXxFZMX1_u6Re&3(H>{99fURZB`T`;i(P$ZDpS}u=uR)$v0`IcKUYALciJGAaj;7$_p;u`wgxYd+|Dwo z!|JHyl|?M$l609{Sv$r>m*;o?caJGVFwRe8>$X06~deR zL}yMid6F82LC1hc4~N^8BZ#4Cg*n=%d7M|W=*dqJq z_;m>fKyK8SwRMvyR&z*`%8piy8Hmswo~dB({Ke+5iBZ%2YjC``4{$UyUU_S9QcwK+ zBW?F=iKmQ4#g?c{oY>h-KU?iS@y=K1Zx9CU3(DD+6V+W};dq0S)>a;FTfovuk1}A2 z>vE{W-F>fM+ZsSSwZO$+0*SgbRPoMcD z6&YR5pJ>L>xEO=|cE7az2 z%xp=5fwm$7(7}_*UgVqp$LN@)0v#AMPj!F3ca>i~ooQ*Xdm-p!0x}LAwQ2Ia=9+301{=gL*>gD8lF@ZPpmEMtW_cUJZR+A7Z zS-lED2Ky``9;)_}8(;|+%5`$_dp!q=8D#at+sWm&Hd`8a!td%lR+6Xk%>c;07ff`+ zeF{+UTRXthBS%qZSoKt+xD&*p?yUEUaCs!n1pkRS;p1jHU_cbZ)6xF(&3DyvIYFLW zAoKAqvTYGzVr?ovbn!ia{ zSZ+Xmk)VHr5dV9nirPfAL9dAwvq5APossEkxrQL)OfpDut3EGi-k z|IT7IipkC01}a!Xc!GS>Hmj{{Y(NPpc5J_*vU0QC?X+tKbZ!Hvsi~o+rUspP(o<7g zO&5rno0mv_e|Q5aD^TPF5z@4_}d8>PByD|Mh1o-y4q>CscFC+ z$o1P_4~o8AZWzCgWcrC`MdtUe(|jLIWIq?k zlQYzs>~Z94*kyb9{**H5X3%VU)a?*4UvSyaD&yUxHp<1^yjpva`0@@^Rd3>Z+4`yV zjg;SB!M-n{Bs-0NSN;`2Y0cp^x;}!Bql`lC+x(s%Debu(UXAeYgpGqil#7xk zrm9u~O%5a6!@&Uy=!eR?G`Z1@>QC4EBBuyaMo7DcQ+k-b!m$nO!de}*9JtEedcnmF z2S0}ef>34%QXb>gm zb;x>~$fz5F7``)%FOIVbVLgbr{I9?6yF%zI&Za{2Whe++XcWooD3vZcOve1cKp<2_r_ z)xB9u2QeFo*~!ub;ijILYECBR(_%JmpN6`i4N4aGy7aTdq1xY6nzwTh3+wXy)>G;W z_3MXWQn%bMMpLH!q`4+FtDv$IH0cauCIE6NANh@&JXB!k59bs!0zZDa=T6x^3-)I( zNvgscY>K&66;Ny0x}sF;MJAJn5w8x$Xl?UTRQ#pK(C4|&j|TK&{VpD=Wa!*>U0OZ1 z!mritwvh&}kO!;)MK(qnhMQV+_b<#DMBKUp-vR|}%qms074|W9;;NQ`krF!fs z=1#Lclr`x%5C90=ANg>%LppAXaV5!fGA*h2u5uJkyGdbUMeq^?mdA=zMSXcL4HYv9 zGqyE^GsX!|byIW2M{J6S<#CjGNl|vI6;NWBYF4Y{NW6@pTntYy*~)b`JtMPDbqv2EpF*X4 zlXOFy9DJdQGqsE!DTj!dTMP?Qo4h-6hFPD5-)8-iZhFwup#sZLOtbA+D;m{eWj1%q znblTdfpl$wU#_VF;W{XVSX`?VY*uG~$#(x}m%SY=nze@vWjwW|rmfy`J)6-_^!A&f zY?zVv?j1-fH%C%79}0sRyDOJf5)p|(h1TZ5gtu(7PswgI`-XC%A&(ux*$rf+BD7`v{g^8KiN|BRo>QS zDRjTt-}On_L61zsp$__rU= zP0YzrEhwM>t*XgBFL!-rG#UCE&I^01D>;s;yGceXHlrxcT2<@$%Y$@~Qd=Pxi! z>4xtrT*!TN|M#@RbQ7AO#DBV51-Ukx0p9aN4bl?e3kIwF;QtY=3w?b<^749y!=M<} zmk<-XettVC0Qn@gwzq>kfY%onA7AcIS=t=^%3ga@84Y4^nMHJTbiSfKJwMxlJc4PG z;UU2VazO_+86dG_NeOgsU!Smgk5nSXJ(|#%{C6r$3No^@&7S{9*INd))xYi93BigN zcPYi)-AbXw-HN+QaVL23;_k)WT}p8aQrz9$edoUakGy;4-81=;$z;~dnzb(b9oKn^ z6wD^&dkepfrSs^vIs>6|t6gZXvAJ1mv%pRo^+EhHplYh%Ojc4}i`(d>4nhZ|vPYPxfF_NMkF4w{0ja&3+#d<_l0F4&}dxj8*D zl9G9D+eQB3cvC7CgQ&Hl0Lr=Loe+o|0b^O>wpvT^R|Sd&oF~H7bb$TqLu;jbjU>b< zADRx_4E5U=sNABLK#HAvJe5BkfPT%qW;(Xe?oQoJ9d@7i4B21(O5SD8K)E`dTiKnF}nD|h}q&3&_3cxUEJJSbk+mXl>|P-?|=ZI>ep zm3tkKCZvPs$|dZ{VLS_yZv_MtEO*A(v~_U%c^j>x3FR1wH2_*i&w47}>fBZhm2`CR{mcbAG{lsu-SRjk zs{K2R&zQpd=kwWS@wwQnjNW?fBAISi6;(x*lrU<5(r31K^PN{Oz=(+T+Gg^!<+ZJ? zKdS;wnO>(W3#o47^YVx%YN2wuH_aPS+M`M= z+oL-yiHeY5Mg& zM8LLLAr7ar<$TJ;rIrp56$uFr98kVIY@&LYL?p+7V(!y;H(UB>>ZAgy7Q zNf7>PTbAumGgXPx0;k34@aQwajaau_OG^hG!oj;5tE?F8y}e{K{-XQlL*?kOE7*eE z!W~S2O(7aNP7Sf`n59R@LtE7ti@+kSk2oABR^_`|6}A3%P)(N!fJC7Al*euThOjO1 z@wE(D=gt`TM%L}B2yoz9J_L+nAWbCRrg$A94wDp#~EgUrRBZlQ}KByrR6*Nov zCd{GCl_(kRuU8`9LC&JhXUeE0$xUX%Az%$}#DLXBuX#Hb!jd)rr@@c}v~z$k7P*%c zsW--`lJPjZ>$t*PH~wXeK0V&DHh@&nvQ9Efr!g?8)8c*;az1E>u-poeB?2UExpp=Ppq1eha# ziJwv-;`jfv3v`6u%vs{Ekx{t}p6g$*&?nQ56M8SdY2HM9ifRFb2hi@{Zxqk2MX%!U zlhfVymF*}09)PTcPkjAuZDuEp+@rFlza(a*TpD0*^^MG(>^=iVM2I4PQwke|{SoPh zXq%Qb9-b4#AQ|CNkJ zfcXx4=0WJwe=&OZF;}L@ISRlbvZ3M9lg|I>_Wb)>E-@)7=?+#pm$mxy02F!_{_zfV z9a@MMAOwYmCcl#4;T3fn&_M?y|9lX_N+qO^lme@&>NA)=%@6U0_RtYP8^+3vxd2VN4SJ$EFODOD0c*Jg^@~Qr=m9lJ68%7ET2#@?5nV0UA zw)6}D)T;*hdIvvBUkkNs`}naiHmxQ;jrsVVWqPe%dO(z%Qk4VUyo#8;y2rwI}egT1<7G%Gxxf>WcpxeqaJd_&+J&LhWZBj zmbEHtTjP}Y*aFcKK-#zue)VQ@*QfYC6iFpzL2B%nq$wUQ=GYg9it?z31rJU@NLYzf z%{-XBrqO6`XM1{rrG!RANcVg@8LhEP1wfAVJ5aydQP<_ADpRCtf9*On#CXtyZ+ zg4Xrg6YJin0*S0O-#H63uO$Qqa0>81CVLk@i8P9eiqR*7>Qz5@`GIAr5&E`SONq&w zlYUC|W;ZW|mllCW9cx}^2<6@1?y_Si1T2mJgn;?)2S z$6xHy4iKX+oj;siZ%@Xw3%EE~PY;jv4RndebdDw8t~z%iN#<-Dg_Rp=cL{U)yv66S zBjB0#PLaXxRuXc$HWg_iX%&t{V>vMt(t@kj`cNtdhEi&Hsz0f zl7|7zJwZ-th%IzxGKl$C|B5bfU4eS4zB3F0!m4Ec}6`}AU#Y~W@@^5F= zR@%SD*bRoK`wYyhofX=RmS>xDoz`p2@>u4uf~B2v@?lg)NX5-OD3AGQg+rvuxnemh z!rebR__}uI*4x?Pf0}gAy)c>!xLO$_bpJ{X#o}Sjqt|wlJEq7+;3CRb|J#3u_+JIX zhDcAc&*uHNo8Oa=5eEZzTlDPzPba8P4;6KXK&jH_mlr%{9nB6dj?_{6u9w^6g*t^S z!IBdtOhnjEbfc1)f+jNdWRP&@gQdx0t}2zo5GOpoca_GSh9IMyW#}kubJc!8|F+rL zGCSfOrMcf4{9YoT^G%p|g4$5FEing;qxbq){@68+{j8U36x~6?mfK!5*L}-XlZ;o0 z%{BM0QN}UUrYHmtm3SOdUekI}mk^ z321r0wx2|#pwn6CRku1pMwm1yQkM2(>-Z$jVSQLjqV%r45#H8kaZJ#SkyUB;!ikca zs-|}R7gYiM`XE3tC9Q|Y-t0m6IdyydGS|A+uf@mw7f8X=Ppr&7adr?ghz%kJViTtFns4Z+}a z4!K5BLY&0GRd1NWC#SLo8BX_8UC+Pob+lf#Aipt@zyYjzKiB)Jw8Y1w$F(`QdLRsTf@7y4{yX#b6*#+0r#fFeB| z1|}+eWW{?K6_fdSzw5~(j6QzyvBNE~7t-kFoVWkuazq81)llrz#>2RDun!I2Wrb$@ zk@}LhT`xv2cw6Yq`Hs1YL>47Hwz$D-#KXy?^C zq9G^Aem1N34u-aFg^O%&uTN@Q_XDk`?}mN+d@qC1Q$ys70vROtGqt%mdF6I4P--Lw z;O0D!PITK`k>94RtAsTv6*AJhmpN+RhDHeyN8e2I9a7tj9*Y}q;cci_PtJs{y3?l^ z3ESbJ#n{=UgUi}k0FGLltXlE()kNovkFy0J%$&yKEDA%T;68bmVrFmvH@N<7o6u!9;-)8`J)5wpbsphdPl_{C zivN+|frZH(9yFfm=xr+y(yE;=C28NF>Ak%-I%cD&`SG0GD=C1D1E1braID| zkv6-uN9QKP>NJ zE%)2J8oV3L?QI71n2CocMv4ypWFlg*@15U2hocAC}{dYPjNDyc!Y>!YR3j@kG^l$&d{Xn7ggcxA9W4&gmw$U#$}p3JdNzYDA9TSt+BmZo-> zpQm*0Qe}S8_lvb2d+#N}=^7;p(_dZ-1TgsD7E4^;BZUvpF|eCF-M8R(z7?A#(fuG@ z>&h-TnzUDBP+MZR&5%vWAxDaGXH-8>Q{o`wDYJ6&;3Cb-#w1LXUhukhFu{qL%*)ok znj&@lxDk_OJ}KwrTEonzU9-QgUd3-GmyFgT^JV8Jkw=TQun9GuqVPxH4-eA(j1n?;adjmd8>pR~ zC&6Rk&$(r<94_b@wCQ}`^q45_)n-^@DL2#IKZWvczV${hF6o~Ttuo$UI&(vd#oK4? zyG}`wHfdg-2aVHym4cT&zvdt+E0HoWk?&f_AQt;V%T3N$g>>RrSRmDD(o0LyyeuAt zER#B@e#dJ4eSXm1^2$uKw9oU$4$r?3>|XC&d`WmuHLWeKN1;!OF#*;~=R{S|IqOV` z#pFanmZ=lOm03@Ysq6ShxIVz_YS$i}bU8#q=9$_KJ$q^9D zKXZ`#yWG9gnd=iePjw9rq=}p3RrUvqOI}EfB%B#HL6dWDabKnFEI~Ra345_$U|U~F z{@&ef`r4>b?%6C&Xh^yh272H)KabmBm7xdRm1lWl`)xczTRkwb^!AbEh}X5Wt}eFb zfoJRSnlnnJ1Z=onOGI;20DG0vYf=IghS6)@jht><&aDn9f@N8oA3?E<&k)Td!@hYf8NW^*{Yx(DIuN z_8NiO#B!!K5g`!u=KQerua z*$m!jPo3e2AI2d+Ktuj)ZX|KqEC{oKL!ihyYqz*piC}YI)Zv z?HZ2S&nP||3`Ef%DJis7mq-Sd>bwFy%TMcU$o57X=8(MZI>ia4{-Bezl}Ys)!Do8d zLCA!OaHAi#nqLyFAshv*u68TSmuS&hRfgLfSsw-2C70X&jPi-!oEC)9X&E^WPV0aJ zXdho+j)8%pFSSy4X64H@U*^n^Q2#h6V^9$E^;Kugvx%WHHd@b^d)rdblVeFkr$`C!y?d{HY{IFybEan5L z>4>}%w@0udBTX;)@!+-8d3w~P+dT3*o0!7`b?>mA9PS(A-CrsZAWJ0v__-|g33s;R zT=w*@$GX8%M)%ryyK)p?!4owtr|sr>jW%^%)j$O8#u3Mz9-hN5rQaJ`Ag^(Kt~q{I z^_;E!nL6Xl0B_oxq{PwMjg=_vvsR|`c0cPsb^h<^jIac}Xm}vA2W<$E9n|LgtMgu- z^Oc5cXWE)~Jse^20|5OOmL@}uBjkQM3%-#L!PA85VwF{C-J1j4F|WV1S3{;wt``L85`0wFeC4!KOCN%ZxJhnS zxy(+7P?^S8BZdm)h?hK?q_pPDz3M+vqL-j7Tty{b5r7OZMFjS%+WLMXpy^D=PofO?mv=d6N)J*@4!(@ZB$6KXBSm zGd8mwndxN;2NiyxNRL*T;98%01?7+u+T`=QhLF9v3~;&?=gas$tD&P!cwWSZ2|_z%l0Dc5$wccFlTWb2DJ_$& z`tXRW_nkuuemLdZ3;Svo%tQJtHYmy;JKC4BUFaUGexz%T3}z3Irh9(RZicNdBn6#JM4DdgtqQ&#|(&#N2T)Tw4+ zL;!W*3e#ynz;2Sx?5}lC$uN3h zJ_J#cp{^P9Hcndn+H+~+4^fd3iEtGgt<37E&}AxHeXeA- z>(Cdu&hEZwc8LtT72Bw7-3sArq(Kwyd8bAa2)&c(VY>7x&o~BGjrWu577{)18sQ~V z#nb%r01vrZ-mCjWT)?NKxpSb)@Jc0zgTC{Dn&sA5vLG&t5?bNwaogs?8G`L!-+q4t z4qv=}ec+Rl3M=6&2r00F#cS8gUxGXa*jLhR6 z%}i&r937Mod~Q3MfFqEqc|Q;h+v#pU8aeHk#db%;H8Ap1-p|QyAbZ4qFZp3=Px{kq z)2>ED3ziq;+yF!unziahA~E?qX#1M374gFV^z?K&z4eKue6d2r4}v&mmMTRPQc2iq z@;4)P1p(#ELi8Pn(`LRM3GaJ4bHIA|r^dH?N-i(?n2iWQBqLH%ZP&kBG4^G2!x^6F zhMC!IU(HvJ4gXL#;TDMq)H%hR);|a<7nrB@+UU1t-2J&$3N&k$i4%v|FBrF7hf=e@ z0}u)lbwx2err?m3fUhRF#sMh-3}pzaukucnv?forcjCU+ceU|Z0-ZYm#~k)M4ypN< zX~HWBtQSv(BiWsS_U0N--^-krIuca|5l%nZe)qRmlF+Ct(}=5hqd)~%>O@uDl2D6v z<0aoUW4?;F`2(4j?$9&WHw(Ikh)+T&+g;5rJGyQ>Rqs1NF8cw;^BMztHfuzAL$hDS z5}K`tKTPHw2*F3X@Z?+?e~5vsl4592W8%klHd zNyS)SXVvKzM@~g@tV@@Bl=~c#?LCnt?@{rIE%! zGdV}RxvA2U&`YwYl&}QSv)WYyTFZXf@l6*h33ECHGxoOMaYA%lgy{@{We5H9oZ#nM zDG1wNe$2s-i*ujuhiRMipHW8PeBozjT<16k0Pb)mFcl-Fwt!t2JkxaFeT|1j&l zS|=z37zH#Ih5ZHN2Vy8y{sYVrQ1o`qM1ut|a1G8>uf&WQjO9kzm%~Z|xdB9j2&bSc z@LbwVd8bVQbYPSpC1Sl5pO~bJjsmg)5PpTbRr{X!-ul_Rf{nCV3MTF^Hg68h4~s*e0u!QjE%Pxm}LZ$L=_br9UX1mHuRA*$u5oOVuxh1N%!ra z3CE+t*`S~9ZJX&nE6f+-CJdA(H znZX1SVJoiO(ju-zM;u>6ZGmzgDHZYvad*R0VS%BUDtnV5miEHZIvxQRv)?oiE7Tus za3_08(KeB(&t~;G8=+%m=SE?^TdEC@Lji5k<4Si3Vy|>W+I4(aYc>TaKKI*=Lf2^E zblZ%S{Pz`Qa9%L+)?efQG@fwvH#Lx?L>pssQJ_NV!w(A8#FF& zef6l9XWW+5-(pBK-tUYr*gNtS-`8^P?hY#$AR1`92yr`4;g&l*`rf+`mP@hci|m`I zdM6`~Ap`4_3?DAk@a`js_?#E7#=DW>>IzB@AtadS(E~*uwZ`uy9VUXx$>3!bL zuGJPb@xbYADyN$7u9=riFc!6jGv$x5O-@A47GpAk-;ZtNL)qewh|DF4UtTr8GGYPb z2%P!Prx7nUJXAg$y);^>*9ll7tbQ$zHfPcLcg!SM`!o11-J6K*u!>9~&D zv&Ck7YTUw=JKj~F>j4kPgtgZjK6R#B5}s))|7q;pdU43aKa&xL z6O~`!A#1bnL^mDNXtG`NVv|!p{wA2*5TiaLbTcCCk^KE7!*)LI?|FpZINv#K6Q^4X z(OzJqbT$m;_x3#!j)NTqc<$IO(wI2A$BV(|m@-YcwPyZDC_!+#_POqv~Oc^ur#1R2zTG$)9|$ zP3>EvslnBs<6)hChegN=t84(o%%@7ebwjyik0qGkTD^qAx`7iP>0t?sq)AN%K$E>i+ z2jA1t)cxWUa~86jio^ph@W;1Xo$uB0LUpp@d|%#lTB$osbgX#PZb+4tL8h!$rW2lj zIy#*iKUQVY~x7zlte2d5f# z9Yn;%;0t#S#|~B$g`WpTr|noxRi+V?^9e8T;DSVe!J-(7fO_~x0Jo?Bs#Rruxu*b@ zG(lCch_&*qv2aP}F_HnZB0vWL& z?wcgH$7lQJmh{bu0b*(5*F`|-_+c)~d(bd^b>*IIog&$EQ^3H6=XH#%>a!FbAg(#i zo-d8vqHgwWX?(PKq)DMOVV~RTfXnRC=B2_~ovI8ez|!eX%6eWaIpB3MJ(ORbBqMTR z5NNQv2sIlWJNNB86_K-XID1^6l<)ZR;C$8EbcTcszL;kd8FQmW@^Wypj-DU=UEsGy zpj~{EABa_ulRIrT(_}0#vdO&M{C=SNY{j~q32WV&*W@KSi-d6R^9t&_FqWJ#VjcLq zjMl(9+xr_0IMo92q;&qMd9KwZ+MSJuGBL{p4;u8dooSSvuCq2pj~tDb+`zRYrIko5 zQc^*pvT7?1M+>(s758-ROlxnDL-0N4q^d(Rbh zI*w$SORoG{rRE3oe^>GonU3^Yq1L~(cbbmgT3`A`%0kilky7M4U%nh`mht%R?qbM| z=l2T6eD69VR1taG88b%EGi4Y}Z@RlYa(w9?v4BI(die`>`D5#Jn#t?_L8d6!zUzJZ zEFrS_>+Ngbd!#G9>Cjc-TI2rrQsQd_A}IRT#-oD|zcIrD_GeZJ7{(o_qQ&uKzYhs= z?w#$)PHYC;-(#M|g=9Z?T!Oggth!rMcVKjO`5zF{rVPou?_qjHj!!$Jx86V#y~t^C!~KaHtI$EP{-?1m`L z%b~Iyw4R>Ff5)()sdbN8oqSCcGNED`e0P+_O`L2gLeFG*rZsW z{SXfh&hq+HC>=5p$-Pn-!$E7#sJG>xNm!RH(sItoC0D*wUolsFfn0uDsdFxgh^;43 z#}_SAM|h*TW$Jadoa=PJ4#q^@O#Eho8mdF#C{*-5J;cLFAAtl#YQ1jkkPoUp*`4;n zD<=;$_z`c8P)ox4x9TO-wB^EZ!1u>AlVwn!^%j?Fkc)3p&M%Eb+tNd!pmVD3 zsm}1l0ffP~<4G$&kR+f6=iu`kP`+EJ8w^%C|H{#Ppwouif& zR98=QcZ)uGBARYEuUxaYx%xV>r89ZpYcMCS96(YA8Z2XbK7A1XXE6|FV@+0Xsn1WY zA*|DfhF_w`deYum;p&$kE-bPPX+6W=3$2T;caiI53acL(%9njY-KITiZnA08`rTCm z18Vm#uD;v9?))5JxokmQLdSoJqTPld$M*GD{}WAB5yx0->p0>IL(OG=GiM_==4J*O zZnT+u!=YHWRq4Nb0D5V_0RUH1B4x~CH`YHk&AU1>D-7^vvmYk+@>LYb_+9f5MP>6m zWFV_!9X9g9qlM(MZFuSMDNPu_=Y)xQjR(`tmF?9|%2p?RsE$C(b^}80dylK+=<|bW zdj-WjX@tWx(+W)@hf-dX^Rgmv#t}diAJ^U0Et0t`aYVK6PiB+dK;i6>z(Qv(UFCVf zu5GBiHEqXtsEs_HHn&)I)($eo@CHKEHE-;!QCztkOG&9r{9k(}fwky2w9ixdmc&H9kku63IYg2?Jpq2guG=V`fiXtRpw7KM4aaC;*(ozlFV&7ALN96x>qCf z0pzSFJT3O1{w;mq=L1nfxX0@ayvwpK=j?^E^#)2ys>9FWnQl*aWX$bJpu0b>j(5*P ze+sB5@7YknZ`Awme==O8z|CdM6UC4wTzm-Mr3po#@}JAuLfA+b3*2KfkwvC|o_L=8 zqO94Sl^$&VmNud41mu*t-qh^E1K`slX4JD)+t^W&!YIZK#*3_a=!pX92DF|k zb`-@Rm2BdOi4QmucaM4CU?*2xM#7A7A8}U61ZD{MMYmnk$G|vT*@Qz4$W|n0P`|0| z_W4M)DxY;`(3fOIU;}%9|X`uo&cb>tMScZHG{T6?BsaONb_`c zw?NKn{2%bWU_f(49J0Yj4776~K{rAcFkjVi01ykt_z64^6BlPSb-6e+oh-J9`eG_g=|zyhA4Cfi z7@;%f|2xEegW;17W&qcTewe^0-mlR$LN$tl7^42CM1zL9^S*fKlJyc)2%d&KOY$F5 ztfz~}O!AsTY{fo=`P=z4%WKwSRrIJ{69dDlIQy8O3jFgyF)zSIr1ZW;k@Ux*HFjk8wYTU?J( zE&eTPT5@LV@%hgY9N4eS!#IOr7=H{598-4MVA>dk1Hyd(*nA}E@ws?JNn$u{;eFL= zxbq?Z95838(N358P0hfjCL})fiv9Zg4lqR~mWp-;!dUIBRhv9i;z+{5U{?T#V^e0& z|7~5u{cCxx#_=MM{=Mw*=wj#nZ}^Z;qj<`XsDfz(POBO;2mr!%d%=#eh);I9GNv^xjNHj% zg~p_73Wi+vE9H)LgLGbGG2F}7GBj1~ULvAWMs`5JV0tspUo4?6AO0x%QxTE#Qy<_+ ziRIEAy9fQ=(6nX&5-%Uy7ehm%65DO{cLOax3{Hm<>xl_*rEvkLy?*_8aYK2=dN>J# z4%1k$>-dCPs;ljPOQn{~BJ0x)YcO4FjIZwP_1bes(*!h=Z*Mo2y0 zoy6W^oiM7MUlEfk{qv@%@^|vq8D=2s;0l?6j#0p$<>g9#XeoiMuldnaGd_IG8TYbTpf$h=Ra0caC= z^Wx=VHWE=%_uS^N8qmt87$y<=bT11N=2-85X7kl(D)v)=CN9gpx&B#N;hja?@z zv$XoWDqTLnkB@+lnC}2|+XhZrfYD>w6jq0|^D?gmZVC7h(U6h`piEM7rY5FPMgXTs zX4QiO&Mw;O+LOD-qIP2CY9KrhKm_JKSexUgP5LMD=}yLz>;9fNS6#w z$>}lzBUf9a`d>ttFfz4;jtNmg&Ew4bf1qF8xDVr4 zvvlh9%&F>6aRgEWclc31>s=NdT8 zC(VraUicbi+2c@M@TrdKQ_2$}!<*n*F8v68Vbi9nqHss_DR-Id>)7JK&E%bQp9wVD zn_3sE@Q>P{Rb8H;jX~d`3?9Se%BFPafzh|KfL@KWxI7~=QdFNr%?1Q=ChEv*Cmazi zk7iXBz2JXer4L{|cmOXVz#XZa!E4>K8+RU6R;f@2t`%eqo?1(S_zMHTYNFBM+@7ok zARVBp!X6RrTM9Zp$d4{i!3!qYUu@^PO)!V#r;;6P-#17Q#I9uw09XsJMM=<~CpFJW-enVPBHqu#_X_;C?4YLu_QF78Ni68k=UR( zfWLQ>tdp$6JHtl9kZ=wDZ(<7t;#SMkc_f2E6Uz#x(q@t?;;7!0st#o{FqO3^_$qP} zRU@?fF9E>tq9lN9p%k7P_!=lW_)iXlrnh^~ACZyAP^D1YyMXgE8ur?$(ARhR6s@C5 zzMDJ)DTaA5Yh9-7l5&Ytdk0_-p{g)F41a{-30< z0_EVI-rwp9DXBW$>(vB+bU8K)@R+F+dHf)K^5uJB7FEyt+(bi5Q;FD~CR6mduLWx8 zG15a*fk~MtBlKl-qcRBe$uy<@X6YWb8SL#!{eQY38Gc#m`m`#V{4u%*r5>nx6HaZn z1qo5SOe6Bsyl8bu`giuKd{7#njm}`j;i@;H&Gq?A;+wzfZ|BR@uJX2FC66MS7~k!c zl0fZrbvtI^R}#-_+s^vC!lw^}I)KP)hNZc;%pBUQ2;sZTU#+C7DobyR-0OJ#OMzbU zs|;mtxX73y7(p_nA22j3!|0g^#_)pwwpP5|8Zseg>#uoooZ5=TqLicg>_wIZC~D?Y zXuwioeP&#~pC0PB72vkmUjFrxf=lW{d>bWvsc4MKd9v(sX%dtI5M4kXsuD@GWqwc4Bvw$B2Hw?$P`6G6N zQV@^TO3uGY9Gd5B$3`75a{rt-Hw^;j2bXybvS_xP&-2um? z<%!H-?E5wIrcs?7mUF_CoP`UUZp3E5wOZjvpq(^Z_M?vNVg+CQMbvAzCm1;c<`Sr3 z@R1y$-&(Qmu{KTnvJ1>H(WqlY>QJkb$L*`uVG8me>P9mV5e4P@Z}Ox%l9i68h(_yMm{*2(V1o90RgBKjKq)|{(O>!px3|4R-#>_oMups1D|JEU{QxX<#%^L-ocm42XsxG# zijvJ>uJfd46Fb(Lfja`%N->EJGB|3qzb~zPMx#oxSwE%ouwcZ=!?87C4^;$&8yM}4 zl*~;kd?cQbWw4gvR<;0ey5G(wC46M$%BC>pIXXEQMAU8Fzsv92qTM8T-1mm=JtVq6 zolY*f#pDedUWQa43p{z+clb$sLS8L2Rf^(+o1}>CK{v@yd_tE=ONoP<7vNWN{`^2b zv7p7LYAr(q66I6kcI8HyEXznsjIG_#D)E+H>rp170{+Iqy)Deni>jitpzD5NjZOXe zJ6Wmqvc2i9OzX1PcgUiJ)&kPV6vYv^d&2ity&Vu^=p|QBw1}P>pn6VzI38Fmz+1%O za9pcavC{T@bq7PE^_fD<7Ly~7k85{oAMGl3|@RL!Y=+h$N&Ll5p zGl2m>Q2n6^127yEsewZ~qN;9B?f|ktT!z5}3v?4hGYkNTQ4tr1CQ9|$na$p6Ni~g9 zLs%&m?yJ8+&V3>%?~u6-e-P%7PQ(uC*2I;_wSmA3!=l=|pN$}5K=%_twkn(oNUR|h z29qM{hU(a0*|a;+&*KYVgw8b#3_eT{e+5VuYl_Q_B__@E#=X4{0k)02Fm#pIUzAy~ z2*5QXbHdC5yI_)}cRa;rj%a4dXa9Y=a`~ZxO6(jQWhZ+7#6d9x%YG zW`h0IaskmFFb@ZMthP-p^HgPbNR7vWHb>{!>{oZSI-hj^;Ad>DMVA$6uZ({{;x>D)RgcR?6Z zba0q3fK@-FlWv>qj#N`nIgR~z^T3Qe!sFsIR_;^LpV%M~&nyX^lZC$4&xqEOzbmr( z9S76CsDF(8tvFD9jBe-pI5IGabflNLq}h3gl2%}>o~=CtFPsA{lE?Dy*ZJI9vc#KC zxygvdrGh_E6j=8>83bGLnJj{f@~1rQyd`60rz7w=oxd>>u8y(4UMldc;(BYA!tUZh`D1%part(!^)$6pL0I#CxIq%ki?f~`f52kj(Fpub% zb;8O{!hrRN)+T1aF#wO-ghdWMOCz-lq79lmvdGhC&H`aiK%^RnxY zkZ6^Tk>^6p;=LMb;t4DY?6SX(0fPgs=)y~?W}s%iXT?822yD+lO?CD8)m2@6y-t^J zhp6HuZCy)?hr0SKLTH2KbpONzhA4EE-onBnTi6G>!>OyN*g)g*czu9{kFTk!s`?C7 z6n8$FJ(((ySw#6gHB~DSf$Q(T_!_kC*wW#7!w+?up-F&xGgyEA+>0Qo(zm)Q%*J*x z?Fco@%T0l5zCwwLIae0MH#6ZAnV(P%4|wNh)lhX#ZI8XEJOj&cw8!fengmVhRR=wR z!TGT-2pI%4_qIKw;QF-5yuCEJRZs8Sq2H?<=-v`+q4fcUC`Tx|O%5qV@lYRw|D5qZ zXx%VpOWMtyh>0|G-oDIDYQyC24mPB;-|T-X%Lf&~3%+HX;9hw9@i+Df-7m#ah61?z0PqyIcLideg(|iXR2*%voi&)zm#xZlD8${x8y87|_ppx^O+pWafP875| z%g+@uo}F*<*CN99(E0+p(9k#kg;mTqzkl&8w-Z(VgB_zruQO*Qn7_b6dSz+|`K7 zt+BZLqSr;^TyZ6lnYOMkAr8%CLu2(*p&CQqO`)PXMk9@tVx z$1h^Pjv9PPTsC(FBtdJFE7w?+WPC};{|}1r`3{DxQ$7agi#d^SJMrzaZDUVP=iaELO_hpC4woIR z_Vfe5iG0as-}qY}f8bzK$%Ok7>mn-s|M7aEx0clr0TTU30q5GS)A@oVj|UhcP~ z!Vh+;wV$cqe}pYKH=S@C{4gh6maIv&93AeZqWa5x!3^9Mb$^pdF7>@xQE%Gw#wHc8 zG#iPURM}w;$_>ct4Y>D8{UkcyayQ=@^4z`7IACT=EJL5r*AW(;kV6ek$|;PbfD+kq z7rCHtx~38D<0*C09Aw71ypoe{S3tMTx?4`NGGGjW7_2V(>vC|@&8~gAd{G0Snn&*U z?iT>9+pQbDn+*UaAxEg36V);nMfnWU0}!3V3XYIEU?8$EfL%eb-BsC>>CFkajIMS6Cz6^`8blP=(pZ>*9+Ks+GEaqq59v5ifcMRF4W;A?BMZ zfC}7j(m@`Hs6o&e_HWxBL)Wsr?Dq~Sx}!qx(>J?59$kxb*>WbNX-o#({gj&BnD>p1INy%2KNY%T*|{@MR+pJ8 zIgNNX%D(Q#{$9)v2^;ou9ph)Gab~iyOJW%>>NzDmY40CXQ#YOt=W5U}B+?ev_!tq! z(O}kYKUU9X{mswNt4+`@R`9T9BT1z-Y{w2}_QqiR4u>}tkvz7m&HDUB3-p1eR5$G} zs7+^?f*B#|a8t|1*6s8t)Jnud*4ps=^-s-bmS1o!t{gCE-fQK@6{u$OwG<-6GtW^w z9#Q7U8H-UwnAMcqy8f~W*YbK%_H5Lu($L7*;Km;R4s0)Yeb=Slw!g7J zw#F!2Ho24?45cBSxJH_*H`J9I`Yz&Y(w|1NR_ZLBp7{S|?i}y)rv+RyJ#xaVClhkz ziY8yYeRR_1(}`;P7HS}sC#yc|Mrme8y5o+Q_*^wkM~(8RBo#Lor+zDDq;j++1cJ#k zF-dN;Vb))yKDSu1Wa%!q9^J3#L>*Fli%lfts*t^*jjqR=pIUXA&bU`~a~$l8Sej(t zqTveii3l-Y-%g&q1@%h--w$}LZh*6Psgf^gDMVi6_HXjjql5azT9WkbESl~=eMe#1 zc3pIKF+EEdBiYLuzu$hvKzy^aXtaFlQ+eXYedAP~IM1jZ!UEi^@3+&%5{CTQ5pHfV zPe1svk@@AV2lgj+wcQD>FhH8xz5P5Upv33##`$Qk&%c^nU_*EQbbW5qiWf4p+;o?G zTq!Quc$v0YPKk9nJ*}VJ<{=P=01h~R7=9zcMn9dYSYi!1Um1%)_#+@kRJri8Ps)6k zWJY&!#nk4_m0e7EzC)a$SWm#qg4e`sHX=XXwP@`8;&9nk)9+gf1mcP1@}L??y)l#; zd?r6C90J8|`fu&Ep@7l(#t});wFbO!JbnXdbBUX=*#-R9{N!vu7QolVhpWXCE~2Jp zWm>J#U^zUd0ok-g8U47&&7P_e5NQNdQbfct{mpFFsB8T7we$J9c>=l2^$4=Y@kvWj z;qlpS{VL1W6YZLn#~}Q2^AAhq8a{^VB`j>bJc=%t^H4D#Wg9yF>LeJ`Y`6Ez3cte# zWzY==pptfb;+c)$ADV`kdpwzGTp=7)_9Sch~cF?{DvOew_6ygd|r8d1uW$GwZoOd2Dbb-TwAnb+a_`_7?8#7)dJqJFD;# ztJFnfTG{z3$VHnF$NA=O>=Fn(F}`7d#F5*n$0v~>BJJw@gi<5i3haOeYigR5_ z;o7oOjadgrKu26?6orcF1B&0cV5$np9HvPO%V9!Q-^<+M>#Abe+s{&pIVk=s_YQ=b zS(ZmEW&vl=B3t>*YA`v>#gn%+mDIq#v=MqHh_So8_5d$PzZ-1WF1?AfL4^T-4BZbs z5}1{sH5JHj2m(1mhpW~nj0HN9y?2BIr9(eLNd#?<{lRWVkqnxibHd$+mxa4klw=rF7w+K`ScOF-0K0paqCZg2{1US|DXL?!DYr zEqhP}9Xwvz3(q8!BeZzZ$ua_j6aj>#nyQJ&038fEi;CE1&IVUEK5jcj2L1>{tp@$M zh%pf`j}#2j>UQgv@Y3WqgmHx-QBN$BY9meSNv-?F*?ECwiBb-i2n9k3ObSG0LE~1{ z8ao^*Z89Cx5_$1#n5YO7u-Teex;HH_O9M|0cSS+_ccW(W0aC#?^LZL7yg}t}(N&cz z@0Ym#2~*jy|24kow|SKZj4vLd0}iu|jsH&FcX(dp#>6NA2m&YXfuF>944Tyd3~X>v z@~beodu*%%$ch*1EK2*Ht`Fh>-hiFm*w#g+U;qhl<|!LM?f}e?JQ_M`YH9`s*&Ib~ zZti;9#n{~9`uh6n>gr8|Nnn4GNn)Csno3DY=@Uvink%y~G5NjYd~k?9~c-w zC*nSzEm1-G`fFweyL+|Gg8-!;a7dKT5?BSOXaHLna0WQs-^aN#_yh%{K$Vx~ItgdDZYys&wfFBu%B299i`1R}8;$liJ z4P1y5Kw}V(eE-RAi39Ma2kf+Xd0T<)5+Dl^p;7bjJOGrT@$vD37Ob$|KY#84@eh>B zX*XmrXL2p6zgc=x?$56kXKGOh!^-YGuufJlP7C-QhACEI;;4xw7Y_SsQr~&8|K&fm&I83 zu5YS31D9^0l}|RQi%0vt^(aB{HdCb*MdIa6p;TjwPu0q2|Cv^-^t`(M8wUf9Nb|V0 zwPtE>@;z0M;LvJ$XcbvV*E(VP?iJS$bSRQ^WlV5C8{a)NMi~yWYZij`JDN;B{pnS>bdH@(?iBRHml(h?GeTl z5-kbFp*7ymP>hXcTaN+-e}sXp8NH%U(b-*Lw>Xt$J4^TRAFfpqJShFS?f_@Wn)Auj zV1u+49*;qw`<3(5TXL}Erwf05eU`Iz_T9{Pl%QIFE_(XaB0?&M?_`K!pe21G#}|9i zFnqQpJq9?f2D4gH`|5SP-aMGetf10Tq)-)x2Xa;Ecui}{KEZ=0G-(Dkd#y0ZYo4l~ zpEPukkis2Ogd-2&t8DKL57Lw$jP?{9xcn}kKnsSQ&FwYHqAH3YvV&UL*2`u&ba(p9 zT0JM0@N%a&YQyXFQ*SS-w~gRF_&>An1(mZ)OW6VzftF?BwVc3X&lxlW3>6eZAoRAn zbt)H#sv9MnFB*X*a2x{PjU-v>KrI52AVczmj}D|IL;hUJ;06Z=+5+>Fp|e2a1hFee zks*GEUV>(Tlo||2_7XtwqPHa{!R;VWQjkhFuOZPSDFRe2SO$WUUb(me-bVmcm(hll zwVh;jiwC0OK(|690^y0^KrmpTPadpr(QlF+cIcqyn=qk8XrLi?M5s{i4rPfh3eF3f zN+A52kT`o1xgMm{ZAgYM2^|4u>@HJI(DAV9c&gY!;LPCaq~K%%2Y}1K$k_-iU~MRH zw->Euiz0~{NCLP85OGcgGXdRp2@eAdmIqtd%C-NrQ$5fPt1e;ytg z0k}`}ipqslLhvwyu3UW6_o?mR!IsZR8+t?>*aUoJDDL-z*#*s6dHd^JW%9+W&wDd zO#zd{8|ARc2Wa4JpRRS#V<7BbFPSRi;NS>i1JT>0Kf zY0eQ5_#LIjB$CBae?Nx0V}i*B!k}I`DD*VaA9qxkKVe(S&M{WUsJSXB2Z4HDg(5CQ z=xX87Nj`kc8YDq70zne@1B;%`LPu}8=JYkzI~Yubb}eRLK=%{{{`T$&P_Ei1KZ1sb zqgDM^NC}0z-}Kq!S$Tr)bfKjN8&sN~aSps2KYab_tto~*0j6KGG)lzUYu)3()KEzqkOh_MxIp|5GP7z?kn_kcIO zcOl=nKizL!zdZ*diGs*JUK<=YpIEYbm=@7jUfdj6YU*ay)s1-+?BFKbcAkDNXrw#S z*IlCb@(ZTE`{L5?b{weGmHsoF^5`TjIF|`YM`2l@DNB0v;uF2cz$F5RVq4zzK(>r( z^1~(6=W(_6FD`=?R|Op^a6bwBKJQN}lnxbS1MA+O?1RLlS8+QYdE^*xERT>9BI&|x z{p#Bi?(woqg@TyxT@|6SFJZLd&zF%ihynp5LOatpSG3N8Sx>(EXI=cGmiKtp%0hNm zk-%f!1|16#KH@0IvU^z?>`KEz@(1l2)C%oOhUX|zA#o~_8z|Y0!2;I{L36~{)YCdd zenxu+UxV=>B#!TsXQGAH;nkskc0X3_Tgh~1U)H&s9j=@YGu8#ILbw7EO`$>sl`XuX zXajN2FE&Tr>QQ06p;ti!*(jc{%^-<_#ujvdy909@kSh$+4R;Mb>34}I0d*s=lr~<3 z2-+W>_xpAu?jv18Aq6eZ;rP2H&9|+$Mj2vzK^tHYrh$1`V5>ok+plQ{#>_4Xsw2BJglDKhZ~=s@`W-@cyO5U%uLB3M9G8RQzvWx@T{IUU z@;u$&%I#)NTc(XG*BAScfQXb0KZh9)M^ zfjx%61p`p0%uG!kTXaEc&equ2*#A&e_kXol7Z(?O1xN!a^4LKDufOHpu|pZQM=Ksp70+Y+I(fe#AEoC{1##GUP*ZvW-%QS1+#$ zJ^Fd1^w4lirO1)ee6;@-*BS!CX}cTgp4Po!RjUg*rOJc*& zC+m5-${U8x<&C1aTBUDOhq|A*nINz8nI_XALQojmGpaGj28p6}5}9I*fdJtO$85kc zl=GvrP}d8MZ+|81w*)!Tx}ZKI{{E$*Nfgy-Aom00=n zK`C%PVtJm>xoezYwje=Q>z7Q^SX(8GhFlKC_O@&iLmKa|KZ-wK{r#ueyJW^KvZ!Rj zA5VQ-hE#DsRV0`Xf~0kL8T1-{jY}$2QvBU9+oQQ6N8TwIW2O3d-8Qb-^rw=owH>W3 z{c;c|ceu2o!ht&jcX8Bpzhs1F0^#P~qfe`ZnV$#TNqYr9JcjZ!&??1M!MyLE=u*PBH7i&$921LlmvaseICcgvG8 z3+9>=lNBKa8NEyfS!C4aW8Yi-ZHw|6d2F+y-9X!_l1fN!erD0JtI;pyvpeJWyhEMX zL>=Sxq5eAH&7-dFsloJ74lC5WAKW{?hF55myEQmHG~12a7nxh7O%bW7pBm0~LUc-Z zgIa&TsuWOc%W}e=PNFw!%`UHfkMVZdVjbUJFCwtnd1x)y+Ds5is#j~!etmKcWqM3y zz$_voznJUj7+(k}h`*2#WW@!%mOR$Koe~T*Rjm;#NzY#s z$K4<2b9I+1(@`;RYmXnV$L;?y!AxXdF11%3QkDLv%V4uIJWhDRu1}UajHM&{cd3j7 z9`5PUiD#0^q}x@2h{`oM5XWc{bKR`?B%(4Qn6qp=BVBiMr-;MQa`{*H*3;P`(;Ay_ zR+_8V*f$lhmNeQj)9Jy>^BpbU8gr&TOYct4V`oml?fSdHvbL+l#w~9)m&|m23MD~k zvTk~B;@gtP%PPM-Zk6T3dTSw((t6hWgB(9^0V4kQA;OGXz1GXvV|Ke9;_m9=A?nk` zCw2#mS;d4PlxNYY<0x&nlG_I=$2xiXaVQHxJ>Jqzp>l8`2rUa2!XB8JkR?J`GFvSl z_Y9ulrKbSf5w6XXphC{_7@0xl8FI=&^AJV9x%XdkaNK~6M`Yt~5#hX&_L=`t#U##J zcT!(JoZ;`iTV0Rk)|VC6wPK%k1I@>ZzN@5O(+)eIS&g7exSdFCR*-61A&wl+f3}z3 zfZzZP4Gow`kB;6&wMxY@(9!~h@9phvqAe{0!|e2QDVB)FMNoYb+4tjP+kk+#QbwRP z_N2+9r>nctXs3hwLrquLM2#%x{PzwrVsNpE!5nI$FOaE{atw`*0@F32fY41rP?wpR znVPB%^yDi1)Rhv-TT4@egN3{NUx2Li-nX-+#)+6ya&UCC-Qz4XCnqN(BcrK_EA)2> zo%Kv%B;a3NRaK?Q(AdyWv9+&iRSRU3>z#hfB`VZ>d~Hv6XI%RVFhG?cyV6y)*3xeQ3F*C2JrZTYaqsUep#%E}b)xEZOqPZsUJ7Q<`2{bXL?BIL0@tyHg-DQqe5 zy&n-9Jm&RY9`dT#nyAwMs3iCU!S712%J!J~=M8TFQ%&;Pn)~cFIo~@AMm`Kp$~#pS zod$;I=S8_tB<-K6H(dl>%nIM#%O#gfjyp{JYH=gYI7gfg2PR;5pwc}|t@r0Cc|8c5tOL_YF z!aR%+ylqOQ{(Nin(~19HCu%y$!`_?fYPJ=PsKz>t#HG&x9?OOZ$$VKW)CU=AMUT12 zQB5aBr-Xh8D%YD>c(|Ul#Tz)KGz8Uwzg^HS0j z(*@4th&(T_l>rLYDR0+@EoCj2;*Dr=cPFYlOiu%CbgjLqzws2#-S}!}E80U&rW@ks zD-@D1B_J)T1T6@S7LfUC0o>BP;S=u*lC$r5FUnfIDvkP$Z!Z}#sq)nZ z9?E(wzHJ7WVEyfqmNW;}5mf4Do-T*!#dixP`yiOFRg_A49~WFU@aZcY4c7M%2`DeU z3(1c)vZjk7-P+SWnmvCvYjAB)HN~mdPMy&EqmU>W=C60lQ?*||I}4?y8!hLMk0-f3 z=eB$lKP3rU!3mv&GDW_a)PYs7uPJM(_3E_W)CtRcM%0-?r8vuZY-_&OQ7}5gQ1T1d z8W6FF+l407@${Il_7pOj1AeFREyx=j@w*)6U%a%VwZka>aqF8Bw6LLAX*qhIDPwu$ z%1J!g_G9|S(8^vhKMSA78{7T0qekI&5%tJjA|A_a0F+_c_Vk7E$uC>1DFZX1)`Wn5 z*&b$jODP%09Sr*IJa_nvQHA{EQZGv^&a_%Mtk-(!*)goWvyJ>T4@(!aXcBbA-Osp} zhmI^-EbumqDn&!sfS1*}FW|V_Oersp$%U4okS5^q>vNjTt4FwBhp3n-0$+B|n}+SqNZaZmm=tAQ9i6|$QaX@O~W+`H$)Mf7>NvZWS2a#Sv2 zOzwF_PmzOmhra@s>7@IySFVbNdHeCPdQ`lP+RFFxPY?KbmveKkuxCWfFE1S$7|j90 zThr@-7>b{55hylfBdIrb90j}=L8^r0`yfTEB{lLnLp2*HDZX3NTI#FhdGy7EObFasAC7XY96<>~J3^0IWf z9iY_$>3ecAKA@zcqDCHy0g7~ge?OMY^);P7_1NIxKWH;x_iAbefZ5#uu>;U;H5n4i z%WVPmRtN*Ix{gUmcp=-m889_9#ee^vikdo(VzE*WkS*3&Q)Ee^2KKHHgDt0Wzfxek z$ji$EU}!)z+1c5do3nYIpdoP9)lILhtp$v=0T*9Dqs2i2eg}|t`iyIUeWmp9=`N0< z;4_JWl9Ey_pn6DrA2j9QYz%Yrw6r-g^1m|KgKiKP91HM>wr`b5-==N-WIm zu%Ec)O-(u_CF6Ug=};dBuIT0$SFI5yFX^6e($Yk+%`XgvkxH4$(aIgc=0c78x1Uyg za@?+?aN0t{7s3y28nP{g*9v2-8H6?J`e*tgh&J_j6J8&#a$&C-Ms(1#m#@zffMg}^@@I+>iznF+tVwaZ?Nw>pWeaozW&zG z^0X>ZiX>v(lt`$^c@uC$6ho3+0Ip_9JmY@HgR+MB;aH@_rWi=H{#GS%^fb}MYc;lu zBK)@udThG9L)PcD=@*FMjr^?6l>$?80KXhCB(%cc6+W?JGXCOn=?x~6-Na8_PhFhQ zK2i1sL!ya>Y7~##aW)FSd3J2K3*r*pAj-Qug|#nKF1f3)g=U6r8+fF$E^hvD@48Fb zkfia5Z>cw4%+w4+o4E_VMw4p0dW!Km1AP{pY&Gcm|9vKCtSZLbj{TE}b$zq;y zm86Fnv{8EXj&ZD(Zh7s#0|qHx9Ga1kW4pG;rN4fsP3!f`9c2ZjWni>=;^`PV@}CRuYPWq}guw(&RfHbE+cTyKsI0$% zbaW3;FSMDUwTE_Yw2dOU)fujs}r&PWvnt@&|wdan)VRvPLzMtG;Mty#`PI8 zt6~@$F&pTbrd74+R;kUju#=SGcM&Xg+Eu8Lov@Fl{F;3$dr8qIw1 zp-JG^YFI6BmUe=rV{UW{^&>6-ERO>dohD9ZODx1I;8Wy=F8(+_G?KLp+*ds6&7IeR zCil&jzJ2HIa`RH_PS0g8n9Y*rVMTHynt!XC%H5uB{IvX$`<>qcF*H=b-K5y^lTKQz zI2;lPGe@u_z`}f(;0(s9fxR~3%azTZ-b6k$HEGgHC%4NkH}-oh7Ye^!O4@# zDJX-rCeS331kq~k?(6O&&<`C%^GAZ6$KKOTapM?2FdyyAzDxd$iZaM-*nzOOfCL2s zjo-Be^-IG>IehzZxELBZC8R(&Xkvjx7e8a|y(EhT8m_}eD)p7qn6k2yjCFQy<+m8u zoQms_K{+3>rC@Zbh%1c;=J9vjt2w9siXEr=_tqJs;OQpANR?#K()fQwa3pun{8?Ov z1W0xuN)PpCucDU7dj|Yq#i|NyYXQ<+Cir#i+8qC)y+=PUK^#Fw!EliG3-z?14~X>n zDs@(}K#;;FP%x0IyLPv6TPF~R*PRj$zEPZ9#PM9EB3^k&8rhF$fh+y=usaq=K$@Br zpP!%4sM83Hcc2ikEj3U2gE}*TZ zrKL3iT5@x9b8&I8w~~{SldNn6kX}Q`BxTApqc{N$Ij{ieGR4sM?+8-gtil*HWA(LZ zaMTt0pJ@Ro=Ire3H*`|@SW|87R8d4dRaH|H6LPGuQZ^$AQPJeIG{X1q6XWA0^Z%;3 zo~~#CQIrDf!t--j6%fWHBJg8?4FnI5%di_nPhSRv(`wF1B#+;V# z;mo}=GcKnqEn{|lQ7Dn4VXm2-J55(<=80am$=;l~$0JYi5Cky|lIqu&o0DU#hcbtK z=|?tX>Ubw8oU?=}(C5o+zVd9oNCuw$vgBWs@w3^IRf8k<_n3CW#?D*kwOg;3w7S9~ zQHAl$(lk^coU0w}rfqgCQ&=PpY`ZB!P9>=NIrtw6i3lasF=Hr*U;1OcmqcaclryCq zA568GVlw4QveGaxbJnZUKEycn^TpCH=B9f-32o-S5u^}e3Dl@{79Cw{?X~6`EUtcX znGn}V-=f&|*3z~~gv|_y{_s1xyiuI$3rs;2>2ev zA;ERuQLfM4f23IiM!m0%6Z!A_(?7&`)AaHqNM$Xlw+e`LCA%qZi#@Hy4-z59Zk>>-e)d$f z!1h|H;w}{R*YwP-7M6(ZAIC#ipGgGg^KT+5gg8r#8PIHs+umVnFtyVg%Qy9DZ z6bufsn1Tk$mha3qr=_U+k&NA?_?;==r>Hbs;|v)N2N9>~M%bq^A}9}US4k`Fq1mFx zD@{N5{mQ|M$(XP?ZwRA^Z(~}yF1zLPkl52S6441T*8RnG>Jue;jz9ow2VLisH(;w5 zH@4<^qt?+`81_KKlw)eRCiaOex4PZfreMD5w7RN}0|^u!F4G5*aw_?5kU9X~mXQw6 zDaaMg4TNYB6zLYa?5*h$cx+L)V|^@9Pe^h$rcirietZh}eX#ac{&FQY_0(O=2d4>} znfc*pbo8W^2<$O7i!~E9}9KKK^4Eu_KePImkB0>F#B}!my(Rw z^k2Sck}aEn@$}9vHcU8o-iSQz0By9( zpphK`|7XohufgaKu`ttWy?y_d+B$FoKl8KKWe@1(d9CZ)D=;j_dyI$l70Uz!&cJ8os_k8N~kR|-GXd+W(xqFUmWfU}A zc2*{R%@K^#a~Pn*@P26BNF}>}8%f%d-~x#N3JT&CVK^41j{Gg@v~fi3;}=+-+PevQ zY8-ZinEhLk7CCZhq8}|fS6V9ON$zUQ#?dr`g@OWACqo6bj(*trW^q=FAPSFz_{SD2 zOt$M!b3x^^<y(LyWzWWE37!-6fYT>*SnzNV`84H~qxTVVN*tI9xmf<+I{w`6E z73NJmbhT<9RRy9n8ZzRa=dA`y01*|wa_O_?hwbfB@-Q{12HXl)D@)d$E*+K91y0BqG%Y;xGr{k~)coQx zQfiWCrMyNKX(_FUN0>>9?O8Y`0}iWVr)72N+5{csZJ~gw#|lDmf6a|87Lb6)srFfO z$F0M~x5Z`h!;yTQRnS~bs0WIP7*X2hV=TQHJ?)_UbsP3z0rk2{n%+SF$Zpvd~~<*V|%5AYA+gobASr9-^yd0d?mX=!M~gmXFV7jFNlXI)&* zfQItjsS`I*ECn_e7EEwe24cB7z`twOt(2qyG7)7ZrIO+A|B7Fs4?R;;tZZz~0H2!y zx2(l3=RT8$iD|9Y?8iU3k5E{Na47hI{<^d?Av6&k-S6$wmea%e0ApEa=Ph={Z5*_= z?~0sMt>XlkH~ZB|Nj%?Ch5~!?(eQ~+@UO0*8Ys~5hRO@l+2RLW;lz-wF0X1ko7sJG zdJ8*Z!p|q(;TRx9Dv)+!(a7oBT9{2SJMvnh#>|s+4%aisrHMy*yvn1C$ODmKw2-f zPl*J=jep^TJ8k%`^t?CF;!v?HoCuLb&psqD%B znFUY}PfozBo~aE&dt`+42WKZyw_nQ9kijuYpn^G+E)N&C%2G(%BqOv7CVibk^51|h zN|2Cu$bXl^w!b1LWK=GIm9-(&l;72IJphm4d^m5dA@5&{8d&#X*)WqpgY^HdNLfHd znA>v@eL;Z$wBU{IAmbW*h@)e*;vY%+Spe9m_zY=oY2o+2w%i#`P^Gg(q6MOTdR?81 z{P-SqUH-op{a`d7?>U6isxFh4vR2x~2S`Jh)2Slal}45}y^{8`YVqaO3O`jw8;g-z>^fvvr&Zm`I2 z8ZVdj+am5rN|nD>P+x3D@=&)+-8^Lv9;F zyRSJN$quIi-c)k69=tYyS7Ev(7!c?C`%o{ktIuYs?s^{>?EEVIV%IhK zVN|2j=TXZ>2?fPjw*-NKx= zEHV}d5N`XY<0)moBibn1FTjzLd%nX^BBm)90B*`7ZW z+r>3H|6{GrXm4y(T-sTe-dK2~QA0y`(7dbc?ZqxuQC+q4mOn0omaH_1J(s2Ps>P?C(h>>HWVPFhEQjLoMZf7vO*EN z&AQ>Mt?XqcVJx(uy5U0c)LpN5I6$_V#R*fuM@Y%s)uj`}d8OQ^HD0 zG2(g3bkDBe`!=Virvb?V7gA9LT1LbDcIkKF>rAcFKz9T1L)(4!`B?iu$77htn>U8DB8LJ@G7NxdpBAKos(o2YWRGNd{K&a3qIea zWEO?busyJ>C5`aEneXnxrTaD=5_;9nk2IC<7o4B=>3q6cv>Bss!V}=zCQ%Du1IBIV z>}FRowJG#QL=mIUIt+Qsk~hSjblacgKO*NG-KDfjR}i64$rQD@1&|l~DxyAZFEhyd z9n-YzYt?f3=DWRSR9WgCv4|(rc--t;QK|AB!||R9VSLcX!hG>|G7ajK0Tt z`2X(p+X=qK_Q+7|z)R0FI|OXp$V>+sSg>Fa&h_D8cOpsnrmV7lZHdO@tY$N9qSq#4 zQ|!6Peq47QeWpC{$aVg@nd=ebf1KMtw3xN=!s7qym%q4SR!as=L#pJbo$bu4dHCJ8 znmbMAGmDysFZ0aviP98B@iNedm|`WSYwpO&ZGMBC7K>{Nank!0Gm!k9r0 zZN?@dk{4^^fc#F|%6oRCei%=Gd-?XHl+hqh-+p!^zkQeUHci}Qj&#{yPQc!c(Q|YK z`{(sh7m43Pmc%+O%-bS?h|%n{XunwHVu;9AUfak z5XfcTQ-GkKgD!&#UxzTDuAf~?;%9cP4XGyKqM^AVFx3d(9i;0FKppgg$RIQfIBsyK z+;OG3DB$>D&jdvnB-fp|OM(pK82`KaqJCZNP0*Im_i_DzOqu?Alq1(!$h=UBcJ`UL z&2#Wve*{hvBp$;tGcoADej|Xq5yks>J7u z{NWVyjrlAV-p7@OyX$Z+gS(fp$rJS=s6P}{6bcKsJZ+DudD$a6WG@N$G{=%ljf_H|cxPLiZSlyFx9Cq<{p^wco8HTJ?s4-eNfhnR4qI*`Q`$#*}LXwG?`b)0V7#eTz$W)#ULIy%>G zb*!H9kCmC%35P9w*o%{PA)>0RHJcO)IN&h5S%iZ0%{bNdCyI5+r!T}YjD7cVH=hbv zjXK$y!VUlAD`RuP>yf3 zNVzW^d7%X8geH|kxmQRk!u;-hZIADyq>s>^cx8@)K+Jc28GGNzBgGjSl#Q`~t~Vaj zG8!Lsc!9S?tq^ve^JxO{KzZc+xGv9x&B0XCbT6px^Wc`&Ve-`bsn^Wu<{Dn8w~n@v zZ_Me9x=H;~NZh`TZ&sX2n5lF<_)}`h1JO~k8AQpCPjd`Wb>H%cvr250$zcbNvQ0J z@_ql=Ql%dw7q5=Yg8FU6#7nW&%UIC_eoyQ>r&{9<`O}rZnHM2PKJQ_%P~LT@4>x2K z0yS1r=14`mCx))H@$z1Z11h z2Wv0t*IGVi`rQEabvZGoqAZM8a&(q6g{}JL zNK=(D8S1> z`Qz-VyX#4JPy6@M}e%ka+(7?1yKDV(kL1aQ%KW`)5c>vC}YCgYv^kU3h zL~bNF3id4I!~B!}dGET#;$hpWv;$g^Kfix1hQqaQ8C;+&_6Wpghko!Y;gp6$;BB`H z8DTYyX1Di7R+T_*;TF3uB}gB%i)wYbNCZ_npTPoG4;IWm6zEkCCvk@!0SjP2IR?%L zVxH4Gw$Y9yB4l?2VL@;$pPo7rwlllz z3#7m}nyvj6tv|1-nwnr68T1&CIA?DrdKafB-ufMTTkYN80pwtTD-|qUc)} z9c}Z_^+bKLPLjehlW`SNyaV*Ux6sxNAbib+7hukY%;%=?|2}hr%e8?(TQe&iPDnTV zCMz0#62Vu3gE!s4@g1EFLRf+~5ZZ?`#~k4x8?RZqQi~O{@A4&RY_2h!c@$Lf@N}kQ zv71Ti`KpCkky8)}1vs?saVn^{*SQHp%*jpN9JCJ|C9!ub7~|?*jxL4j0>7%fUOfEr zIPc`cu~_ZoP4F>2{&KbtrOk3Xm5G@`@}uZmz@`CNNQwPx-qmYr=RQZ3cLDjr&dXAyO#A5=kWBZfqI1qg0d zc>i@J609dD*z*3NEU4m3LV@F|-9&Mj%l74@!MZ{y&S(;`2154O%C=96Zr1hsKf?ZRu4 z;IVJ1O8wOH;`YgMMYZdxsOgKDG_?>Wfh*zL9>x`kc5UCWhP}XBt*z-$fp#Dk6|1i>zLTx%E>s z|7<<}f|7w$G%2A|z(Pj1#u>SkUGr-5Lr3KGJ4AndamVw>fags%IvB4}9+^8v?6xRA zSHUneNUyEnqKST2HMWfg_4%QTZWd_TP#txO_?V1Jr>NCE9+>4=IojoPN;u=a>W4J!~| z)05BWgQ6d8$bV1FM;{khSESK>8LbQnSpIC+&7A5gUwZc!*^Hg@=MXoWd7+Qg3!X6` zB2=&P`!^^2#OgX88-oX5HoAxx5FdN9=WmiERT)pYY?k^{>DBM_j3I?}9&BS@Ys18| zo`|0cPD>zyL|N)x{A}PsgKfN{H}c0%gVZRtJ|4oG{sEN{*TRZeC>gArx8%HyHdPfp zxj({{P54O++`~rbp+P$jG9IrARk@q_>*xwlpj?K{=9>aXlAaSD=K-xNTnfbRl7Adf z8{i>i6Al+YN5Jnc)fY)##F^_@4k`;|_jV;DJ}-F;Ft=~U$E4%j&3~nltvg*7lWXKS znropiQYb_|N)rKnPd>5m9%EO*(`YJsZ0&3d0!nw=w z8z%i>aup$;Z*(P6PW`Sc4PIf;zA zFPFIm0n9W3^?Q0hO3X7{t$tlM1xZxtbv^lGkwwV%SK(V4=l^@B1LB`~JtxjuvRm10 zGK^phuX#5)7KCYdA`ahZWz66ytm>E)gf;_67Z-nqW+zTw7C2X z-a}jV{C8dy`|2IbrHNf_M-xfK0WSKY{2BB1l2vHhYeQ;?`BtWt5}qrS2ZHCzK?WeTKp5ws zf$s8DH_dJt2QM~AT%{#hV50cS6H3F68z{0c+90!T6+>=NHaP}b zzS@X9BabOp;D*8{YG;Q+X!@ab-v>i6SGcS!M*m4&VAPx##b1e4c>Qu9a9f7owt zikffhG#oBo{F%v`eC63;5g!Or%v43nKAomRv51Iq3UKwdVzmiejc+k2r0hT8(eDya z%3pDR?kNdTTkqiZ;8Q9$`E2$JgNJompj5(!^*#$t(tGnXUafcbfJap+h(c=`wuw_Ty5#QN187J-b>(=+OUORb%>zr zo-K9m8)aEEKi7D@(zPDr!;d`uBiXNQ@Y$bD1cUCv0b0(nziwS{<)UtlGrX(?oX49%IAOqzc90} zdKyG8E{)>9GTJKtwtQ)nx0nHUw9)9HCsOz@AJ!iJS^Jt%>$k$JD}!g_R+rJ}Sw)2_ z?=cZg7uNh|{GpdLBw@z)^D0yRxoSfjdQHQb$BC6QEt#q7i`&r!rQVN^pXJx&$hA?4 z9R3eoZxz*M)U9nlAz0DkZiV7jptx)C7I$}d*HT=IySuwffFi|G4!z=;!iqStVKMB&n4XawB{W$uHfY8u3wO(-NW}c7nQsX_lRA5v(9Hl2TjhPSEo9)FJZSl_{mbV zx9JAm`zW#&?pLPWI-XzlUwQ2D>!U#a^2f^8&*!~d{J_pm+V8n17UknVHK=J<Bj55cZ?3hGBNnWKyJugy1hP`}CQL0U8JI za)vyAWefq$GbV!+a{-@2u=IHu!y9S5wUFuTeNn;{rMsR#h4z`W$X1Df*50G&x8j}s zcUw7hVzHO3t6lfV3%Om~SrpW8`I89}`n1w&{$l$P-G zSFa1412>fm)|kN01l7+xbA0_iq((d^G3h3M8pb@dB)(oMH>}6~DC*|KN_%c6!m5nn zt*0IK8K?@wP0!s><+K%##9jS)-I_j_ll4B~!`4(qB>i-cR{p%%MkuauEF34y4n$WN z9v}2x*wM5M~&0-vTxYZHume9Oy>g~)4)GwTqC?Kk{-eyK|az?~zVBUl8yW5xOkP^%mnvO;Gv z2C5Bv$pKJLUHP%nB)2=yN_xFPsI4AIug2`n+?jnhB2&kV2zJ(z&g-hf)2K7UIyPew(SZ%w5P__6?k0e`S>@& z1TyK9fZx%QnsIj*b(Kf~8TUy~eP=;<0Vq5+{A$g=Lp@@y`i`-))lL(E99Cf0)Tno0 zuq6L*f8}*W!5*@tag-8l>%ikn1o()GD-4a}2$oBZ?9+BTt=36@;ERgvh7WjeMYt4W zE5hI7cHa`mPceMOg7v2P(z6*fu1MyqQ?@{5atY)1ymzOAJIt+LYIwfBVYo#a(&6wW zWH4ZfIz2OGXipb;k|rS6+|s$ZriqyIU*PDRSNw&{)_VfIG1E|dfFsdk()HGEwt#W; z3wMv(+Kn8q!=>ZzlSp>+757}9PhL#$jVWc#=> zha76rEzn?haElmz{Q4g`wS0JIUF%(=w17#%B%_W$Clav!!ZUeuy!hMM|H37rYHWy! z%=t7YVF4L1#2lJ>o*(m@w@Uek41C`%41O#EMP#MG{GEMYlk&NS+r&-7gcFHFD;93n zN=tRo>+zU^P~r}+`e~>Ho7VeW>vk-!cbcay^Ut109L)dH0$2~92Ua%D(74(E#ugR- z`&;+Qq5fNN8`U;G<-B$U&AmP8)+8?Q{?WsM*q`;`s#)rc+Ijd}2Iq3QS&n ze$xBzm4*95nsXf=xjlb(ds1yTnZt}dbN3YTZ$2qOgQ!7Lw8(3Mzxi4u0~NsEoz=ZP z@~c-_`^nb#O-Xcc{3@j;8T7@#+O47S%&N%yMMAHO1`cFr_SJ|Q(o|`Rgm**6J^Q0q zu=_56O80usi5xK51-#e(!0miKKLe0m-r_>g2QM>3Px4Ly7+*=>(oE$bi-eaa5zv;|6h34&rXgB zszd6h{S%jw_XhPEBPM1FS;~~s>Qmn+zClLwEg9X3Tf{%vTolKR8DY5Ks0!z&16rwB z*_397EvFE1*dD!%Zf!pc&^24YI~#;%FmMYRt@w@V4rU6(nya7l0*M72Ks?C-R+wZ^ zk7}~auM}088C?K)T85o+2DHgBSP|+OxD?`zdL(lu&I$FEw-3K+XSLrwee6a8NrFj} z^tC~T0j}eub}-+Bk75(+8HJsqRS~cQnT0WcT+?x@rkH)%dm@W>9snf(Yz1C5sV0EE z)SXi}wyIXtU7ZmD17t=xr@*GBx$x>nPlh=3YcLR%@PYF4TZ-b57S4TI4^gm3x7egRXar01hk;Ln$Jmw+NPXWnD|^FU-%> zgz~fE@*Fkc)EGx4gSf_cpXuMjF=C6r7KYVcPcWN4`S>#;NA}eR&>lYM>gjbrO?&qE z_r_|SbIb$U!MfHH729?6hgY$}jI0_sZ!2W`;IXe*!gugUbKeuei@CQvlD6{}q?ES1 zeeXBk=OS0aJgXoi0Okho9fKp9+Mjt$^?R!1XBjm zKs*hMZ7JiC69F}?D8Bz~jQ$^!RZ(Oyo5i~=#gevz25Z^G;q2_U^`99%$1zP@O&5(|DQb9HhC$Vxv6h=r+D9_^@6uQVa%Z|bPLTu^eL;fK zbl+bWo@Ij6I*L-o>a{#6ZMBDl;S73D9r3Y(s8<$Q6*|p{pdKs+EBXTAgolf1n^ z_>IXaiw8ye{%RE;>2lV#Hj{MQ;f0QvdSDH}1~_JTNM)S6@WLW9gYRy;mI;E!5BO13=HZ%C8*#TF8Mmu+T}fF0C^bD(WC!W)f}Jp z`D|Sd*cs`YsWyz3Q9;IsW8B)<#n_)WI@Sg$qYpgI#t6X-0{ww25W8HWbI^OlMUW5E zVO73b&-5`*y15K*EjGaV?jCj@?1Rh*&pf;9heN-_`~QTCzLlWG+Oy6a|BRh{^*w?$ zu+|Zy-h&kDA7LoY=YLQ_qCsCB7*DhhkY~1J8mHEk;&%6TTR7|fESTqY8d`n(jv4H! zoiiEg<-Mex!KOO2kIk%(gI86Sh&)nmgrAxGdCsZmf~OIwR$H%iBY(joyX9gKRAqH8TWTO1VN*7jJIYyr0>RGkOU@LkK9>}(n zwPds{teaT~v8IAsvPzjmA^AV^J*$kRv5C0we}8`*Qo=nWc@|YG{Bq-9&oiy+)^Rx> zCCo|^_%sFGn+Mwee62nu^IPjtPf-iuRW(+IY|fP^)u4M?%R1xQaysuo-7>kOP{c?= zWCW=a5|JxR=P?N&CUu1LoBr=8oyBPHJ16=0}t7|2pZ1l9P8*pmLAl)Z)k}xhTu+J!}=rtfKGF zHaz@MQTxTbZtNct!U59A0U4C+4uNr6niJ6SnCVR1aKw={8(i8KG}g34ZqxB(=RFP2 zx8!<3lj%4a2^HBBd*@391j}Hc5D@!} zL71*CUA%0*qfj_sUSkMhP15pW12*KwFYU|A7FnRh)*C1XJjE?}GZ8EZpt1(Ue2#rX ztyF%OP6E7R1<``X0?QYoEZFg^ZWzY13wGpqP&45rK~V4=q@y`vn6Ku4pP2s~oX+4y z#dX$E36|H%1Mfa-??kbSWuN-J(bxs~TQV>58=O~sssBZdc#n7{=vB3xPIsB}ZrcWp z2nhWRK>qF5ni`cga$bb0&?9Itx+OHZol$bEjCt=p(~&rly}G_mC*;RNqh$Tln7nVg zUOgdZQhK5EEHk0nKl`b<6U4~WlnGD*f05)AXhiDx2pA}y868O{oI zhDsI$3zk%igSB>DqvrIKq~j^-h|4T>Pmw|GxB}@s)4}CpbC~eEM>#)NTexBBx~@sA zTV~ty7lBk3xNnYi&5!t6w2VLdzf2Yvhm?^P&Y#NOD!4ItU!K{wWEUkSk3tT)%pNM$ zaWC0--q13;wAqF0G=}XlFd~VX`Ybx^z7)j|M!3u+W6Rv6+5PhR1EYo?Qrjsm()8&c z4Ad=t5;VoDL&^M0G%r^V)Y!>i^5Z*bGD3u=_=*M0VQOOw`H@>$SiGTaW??WU?6|~7 zDxVJ*W!{kAOMx$JX8X<6ScU8qu!EF9ka8K~kTPwZiqem=kbQKdm<3roEN|wQF_{{2 zz=NRJKn7vXT!6Z(khlt&!TH1KtHjL%1OP8}J)7yrihxz)64vfY*xPf)D{9c@#=?sctWPs|g9QWLIVk zY=XRpKy`W;kh0~7X;0M-NvM71{}sXYusA%hu>v$wU0F`wHOynB4jAR`+ROi6pP7Q; z%#d|Y>9I$1`(|JUXEeo_Qy7vVQX#}APKa~J%Vsa*C-dQ zi1!-*0fNvfajx{OC5Goe^V5;v;U8MYxj`3}Uli$8bE2wQUJZKVc8cIn6j8r_uXlL5 zypEwTMrc}crV5Tez7~ax`iVtCFjG5E`&)sfuzk6uLD*#inGRe0L4t>4W_nOEMh+Js z1J758%9&hl=%zZu%XQo=Mnvh_>_7FD zM8A13F+}x2x|_l&RDTXmVp*jHQ;f*d7Iq$2^3h(BveN=Oe6lnVej}4r>k^iRqA{=n zm8&k>?hG0Lwhb1SY%n(uUwbh}wAI|icZ%JTkMmm8GqZmra~K%pMwsXPv>L_N4!-1x zLRxn*u$m?Z0+`QFWZ%kRrcOb>0`Sh^4B+`ItB^cxsa4=Tt3I<;?0WG5WV07FHMlQx zIyILca$jMf+78<}L71`EE5tQ056Q!L^+}}2`bI!^Pej1 zl+o%*OfP=)g8c&L1(F3AHnIoZ=v#UZU`p+yI40qMO$hUbx-j~p9{T~$6cRoxKUfq* z1Hp*{mo$~Cv!@G4UCE_^|H5Jh?gV^xc!v*n?NN3yq9gfvE9`%=pUw}Zcb8StKe!Mb zdH(NX2aWWbNnf)GGE&bq9#|+SZaqcd$eCn(qo{CGrHqDT#hO4d(zkhv}kgZ+5h{-&3n|(8uh?t1yxI zBuw*;K=3l#<-ZTv0FZzTlncNZ>8&3KGJj?)w*QJl5ht=IAYX%4Xr79NpGaOu##@o| zt?34v5!u=rY@7beml$8Oa>jG4Q=!w!dOux*R)|Vc61P_83Dr@}FX_A;{lHktG+1hv z!#QeJ>JAedw@dK++ikn(dPj~m(<+@~=Eah`-#R0SeHCZC7aT>nXJXpZ7i)NcmKJ&8 zv|3ecK#jkREUVBVb9$jn>sm5ssL4Xtm|H@WaFD-$SwOsn@z9Un^v~K-HwEdhiZaWT zqb3smU%SE1w3ZSQa8S@nMSpj_Q#eWpZ^&md_gd$F{l+-+ln8DOm3HsPr$1RQ1hkGu zXJ8*-Ov*0au7;$mxsEjS@USUon{ucEx)-cG@^*`kaY57APFD)q0*$D~U(e`40U;u) zn{3t#q`Cz4L(-Gt*b>NF1(ya#(;e=ndf~<`qzXtPa`9}L&72GyXy$t95ks86YxybLU zc<(7TkN|v?N_D;mDC95|_8ID5c@@|!{9Q}ZmK(ki!4F`z%=(DGkL;5_U4;fFZkniQ z>q)nrle0J=vW8x69tO-)c=ssyu&r|`47d(Jw-Q|1ElfMiKmZxAp$Ngl&)<~=l0m(~ z;=Z1FycPf=2%Jmv6+Tm478N82F9^2{p@=>qMghJo7(q{bFg+)<|BmkPg{jYre+qB^ z9Y1gjW(cenZk9o;9@m*NFh|WUFf6u#;RKw>(OO|EXEa7l0^s=P`n3NicOyiU_{OoS z^qu+r8bTP5{-2)7GQ`;N#uY`JrN<-k-y!EsFY4wvC6^=Nt}U_qrXhSA`o~Uzsf{Lm zf?D^f{&C}EIEJ-#Z>@B;>t)iq<`Nh{VsBOFkN-jeA$!`VtG3qi&Rx8`=H&1`__KMt4`#jJ#NfhDi7{N(R&h~|kzTctR>s-a4SjW$ty~lrz``I9vHXa!^ zX};odd%8o5*Xi<(DMn%3{O(4Nukcs*>T^c?+0#m zY5Y%X;K~tMx~_CoiwF^H*7_5+M)f_UK__3v2{>fK3qkH$tSIN>}7*(Z-r5=fl(^o-o-Dw6hu*Q@KzpH7SG zL995^{wF*fCPRI5PsiYDxR1$wzb2Yw?a{Y#gOOhmD_W%6ix%9UA8MY;HP=rpeYfU= z7%_(Ok81NW6RvuAHR17pknla2jKv>>wh)qajV{;t|Hbt%q0(FLzQ1cT z_+tz*4@@-D`1DG1S_aPv(Mb)U<-_a5i(R4uk=s5E#vVy9d^;M@CfmRIb*Cn??5)#>oMa>LjT zOeBoFz8myH*H7-=6|Py*u~!7_{ANDNel*1@)X6443inio{+UisR)-3_nCaNFyaY*` zE|-6l$Wu(9m-P1MQ@(Up5**`=Ih@PBFyc23;imlbZEeANHqq1Fh>eOZoRx{T!RiVT z&QeLGxKe-pLLUi+QdZo_($B-m4J09rY0-foAtTHX7gq2Vos3XHs5#afv->yj7y)EFx@ zTrl&)sXTz}Dp%*Jn_ZJ1P}1zLP8WGaKl4DzY;eM4iOaEFGE@-i>Ws5SU~fAVa(0Sh z>h$8(C^kPSv@HFINO?-;vs-I)IW#m*DwMbq^_E-ZXa+?NVcvZ0MjG)+)Rv;(S{xr+ z4qk#wBm24=rs*x!I@3c&y-2ymU$L2m`KL;LA$5co`J-lP370Nw#%AISqW z8Hn7Z>WGSu_zW)x4WT?6#18LMQBdGM1cU;pjR-EP|72|s|5J#sB3HtP0H6T;b6BK+ za$n?hTO7D37*g<`0D>v9VVL6pJ*#|0Qx7fgjh^%>Eo3?15quY*13~^eTjJ~swe02q zAnhTDxjhJGKz{%^IRZWq*61fxy!)Rfnd}Cm18yyBH|V&D+nA0ZeU?76TOsI4F}5XQ}psuAiB(5Cgc3QB3iDUR*6AKfJZl7ZgR`MO8cn#?tvfHRpHM)mUycs08s_Ox zBUsOXJQsNNOG1;~;qgv_;bT>a4er zcNL`bf6-JpOzfxH*Yb$G>^cg{VEea?R~AC>A;ViURR#JoJK>ZHw&`zV8P=bzW=gvB z3qCh(>87*&SO^K*YPf7&-;Wm((mhwMmvJx3W$bK0sB~`YupX%??#awq%f{8w<38Gx z8E;wYvz97uG~dnCdAfKk4LJsRWF_t^EPMGiOm=`4}lA1@LEXIfZ6 zg-xp~Vq}uX&6s%c!T%^L@|DUhLcqJqw&AlnLoQBIVC9VQk~i ztQt@Bw(wJ#PkB}>Nogt1KkE}Ax3`O;v9zJlewi&^e?5K8pUVvYv&Z)9tp@=F|J{lB z#NyEVvlW6=rEh5?6S2?M2~`u9+1WXZ?-EG$SC8$xJiT&4QY#+f)*d{i9)^QwdTI?3 zv*FGjZc@a2B47;So8L03cCGj1h?^|PRpYVUov+HHV$uBnsF^ZjfG4hxK~QgH5owUnk?1?;t(VDTg@cw&>zkIY zAW@0)+n$w5vT5%>DvD_GNm3_QJP%*geEYxpT=#|Fc2i2mrU?W~=aE}n%q#cm08vj1 z%}lh94&To75<)!>r*L?9EWf$G)NmRwXzC{PIb9Cr_?ndRhlb3Y=VXO|6Ip$7?$%>X zu*K4ytp_L`B7O#iqUMYhNTf{$b@w>k$|dYf0>Ke98L!Vm7xzDd5ERW+_sh~vfQtg2`pi9%jET%H-L#RBu)jU9SSjeo0^XH0^hjF;6Mo*W zYBoFU)V6B)$>gWk?t#2HH^kk|$dPdL|IU)oKhQT?^PuXHA@D^^Ab0Pslk?R(kxjma zx7kWA{4`!URM{5lcC|dwC$L-|fHxXO^1FY45j^1XoeJoMYAt7`20~BQ_D#%WM_buR0fRp12l1~+{A1ppfDj10zuN9GTrhTnA zH*XCGf&@XL1x>)%1rU9`=>wrQ*Tcqud;tM+vQ~J0gl@PP5PPFVJYFr>0o*Fg>41M< zOd(JY76rUF2sWnTqObzx1Hj}6qyWc;b)?l90FVRH;vyv4TOyQl(j8U)SaBS(UkFClr&O(6pBM`x;E z?2pL8^IHzM9H2}ZB|}OULkZgy;(OEUhVhx(kS@MJya6{s2BG1Qbq!Tex|5fJ|KJ23g3rIClhS$2bo;UaQ zhkk;NiK-U9SNQ)8g0T&2mKB4wVCr~|=m4^}WZv{ch293-=c6MC(W^9-Cv`Hbysdht zDCy=lyAKcRGdH<6(cj=qGxWm%T~*l%(^mv30b5Iq+zjs#jy|z9fyO8xx@Aa*!{c;4 z#O6ls-uLjAY_Qyo-6NGU z(q-qXZPE7cAK?dac8Z+Zj+sVeJqQSG(0iB~7^`1d1>@HL)xiB&08sTRCSTCeeE+uM zZDD&pw^{ytnqhi*#?lXDZK;<|>@7`tdp2_(bf3%l#N;Z4mB);N$MoVDxLtWh)-tXA zx5KpX_>}!wVI1?V-QdE`56t{J9&?k3MM0_Dqj$d(9-$SbX2L#swrB+{-|&vnyiI40 zRj0z9O5FuArzH)HSIc@K!qH1uK-ObM*JY_#hNJs!T+%7c~@ z!nuWL>~!EfL?B=V5@jC1ZlzBd=+v~>Z+tYNVeikyS@^`_+!a%W&=9!vIkuG}FInqA zY3PCyf)P!?1(uLwQhCh#Z?YkWGj5d_U^{AY8p+w6Psej~w?n zvMxylrr7-a9Qw(sd;FXoUUzx?&f?(Hg*}M!wv7mW`gWpzRdS5FQL#2DVo_wvqOxA#AV7=*!Bl z$fMV?(k;!53@Pnzzy2iL6?q(y4R#&DSoC%D(n3(VXCXQy0p;p<8ZcG*-=b{$Doh!t z2s~iLdk&I2!E$g$8O0Mc|NXe9!m6v8rTjF( z2D0}T%QwWFxfKP`6xz(p^2h>?Y;0J$nr>@Xdr@qk|B2|t;d&1VZAfDjSt&gQ20gji zTaN^BziGN`+{6`5-+j4Mum}v}7GzL-h-NJYv5wYsy+4%W-Wkhk8^c3_1AKwx(3 zyezE6B5KLjOgN@R8OKCH}ko(ORMEv-XW zQ&>V@b@j$^emYis$<}_7%jT@|zni$l-GzU@{R=_wV#SNI_}o=v<(K(#MAMenB4LTVoTdcxo~y{V_dmr>Y^_(^_ME77Kx&gK!oE^&5_wcTHPV*6*ISgl;+c5J*D&<&)^R^rHPV@+9f&5USTdlEbM1G1HYo_*-!0eU>o+tv4E&y9 z$ri$?Yp3R-i(8q=yHHUTNX5l@e&6D^@UMcA{L>Mi;Tm3d_C!*Q@3mA<#>EY9GpBMf z+x|vaiO|H$klUYl3;|-fw10nF3s#cfSX|ed8WKigCvF6Osy*cD44(3@o;_Dy>fi?47)iTO)NByk#-6%kIffTX4a(HaD z3n$C%)R1lJ9q=&*G3u19%HLf)bjcgt0?T_qzpx5O|KmB>DQ)~Wi=wbV&a==k5; z$;*pjw>Z%o2y&xQXSVs{^!8CE$U;c;=0nLb!ktsxU>0JlJ)cuAjqcMq^Qw~enx&!2 z*UG0_$DqpCoN>EU#1Qz&#iz(g^ZRxvpL7^bLi2ZQ&1>h?aZveBBksvmE_SrEEwvj%{^AO;iT)FEp;Hn8VMWE7&d$tVfz%vi#Au?V7-&siZpq7G@vUUSVUiGlOBh zytjasZJRqYZ-t0dyo1!0MnDvs-+7tX0@46mvf~qlamhW@SMs?O|LfvS24|kJ^u`U? zydIVb zSR#GL{qq8t4}sL3Jgw7VSL>44f(e*6iK+xtTmHaZ>}9j+IV?ql;NWk9OXgT z-|UE3dtJl79eZg*t97sFw6!dPbI99Zl1`vPs39fw$+B5>YR{a0!*k>l|A#u8X6+K@ zR*YgloiW#g<(P?X(lq(J{FB;n@Jb2XlfLwCMuVj`)EsViZSRSCX`1ZMXDi#L#}ivU zZ?-YU3X1~*cgR&=bjG5+=%gXX%m865hWKwk_B_a8lAE( z-^^=4G!en1BH3!Q`mpQqRbU!{MCjTxm*GgCA2>^~G;v%MLEqk+_cZp(M~1!?Uu^(+K$K0~TIbW)>zn+v-_wRr^)J>h9SRQO1)vcBKg#^;R+W(H$Lnt@! zu6nL#eSWOd|F6a} zUQV~uyzZIx^Cu;P=~ZRX^Tvm&bc{88UKY=2rg1p4#k8~+R!D-Lr^5CUwf{?y*bRd; z(ljxVXEw9SVNJ`-()b|-Y@ap~YL=b8{DoVo>bTs_aRIGGaNF-#3#AuPXMXP`|d5+I(_eTSarkhFVXgnJ0*E^uK zYl{1;IMLe;zp1)}8E-%_p2ct1e^b!2N+Gh(ZsN4+T!nok>%ufd3$GHS}Ef9K|m8#!RYrpbsK)a&um1RX}B z4(~Vxi&8*ya;G-A`A%MX(a69fj+_0MZ%vVq>*d24#;(AM*F%*G6!DmU7CmnyZQ2oc zS@r%YlRwWMuU3NMC5m9+fOcB;#}U&tnubmJ(|0fFMnhacE{FY!}S*-sMX~N5#eHx<3 zOzM})j1W-&onJNUa{4=CU>N*}(ww44uECIj~r32((&p7#N9YdQY#eGLG!bey>k+9KfA>%jt}{Yt8F~-@8u$Y`BP_ z@K2)+t1l5)cEPJPKV=u49}}xH6Z?@DNWwwn-7fmu?k3Me&V0HnnELBvCMdit9#>&R zOFl&-(SsM>E)e@5>YH`n9j~(DhdwMR@sckgjYoFhYemmB266d6^;E(b5iM|f8Tt8e z00oO(6@sty+D(_F=p*+h8;5!Q9a)P+A-m#>zK@9$5hec2ueT;J0b6+wettO}mP`S~ z&Xfht-%RGc_zfYnFeV$6dBc_5U=O7fU&G7qE$zuewCp)eK?$x~fH0*9iRS=Pn6F7vdp zYJOLvrj?Oln9Cm>M?%%}oXT(N5G@owh&Gwn%t25{^ZekVk2}!t;&CE{`>sSV%Y#Z9 zP1x^seQb44DC^Vc>%^&-K53Qbh)SLT55XYIXG_ z{Rb)gGtTKSa3I~tz4=Xgq!Av%YvZodcjDH&jnt9_E?@7CPv843={lbN(yd9Bg{IUj zW3mKgwvVl!_B59?+xOXVxy7`6Ti)53xs_A}xLW&& zPmekF26WOKGu&3kt@pp>a zj7>S6_?p&tm-f!?5ve&?pm|8B^&DKtri(Yl8l|_ za@eU$Qo*dH=L~dqjp@IkpB~8EuUriEqLsA$OQ$>DV2fjK(`FB*lBYrvZRYz6P;s2E zJ6IG<)FTwp*xE*%c2I`OWX(Euva~w3GfLTzAis$wn~9g7FT`tPtsJvJsm-`#8mwNqyg*1GuicbE?20bp$K^qMg@qhPJCMcXvM<9lids1dvcBipPvDxkc{%?7FvN zSKTt4|FPyJ!dkS$zz=ma3}GxS{^&SChP3(c;u)Pew)-U*#JnvqV!1-14hB3+HmhSnFd;e zN;(FyDs=@8QI>Z%PfZ>NoS6C?y05;rr1+b;T;GSt*CnV4*rZoiPtfoGyHAYowY7=< znSw`@va?iI;JE!_6tS|l#{Z;Q9u}XmqTzgHtYpga<8u;G^W9@^Y(`hOpcr$CXlT-; zQDCvWZB?qKNSRagr>_J9AB&u-vjwA(%9E1l`sc0xIMtM^H0z{Rh>5M7E5Y^=1>`$# z_0jj2?k7^2rhLm9V_FPo?2d}j{X1^*N&U^Y(5HF`Lqf+h?)9fVx;ulWkDGwvBW+kj z!^6lVN5ZJt)MmIezLBLZMX2N^Wzd9oVq8qG0yE0;`qW(sRt6s&V`h^8kd?VI9Zq9B z3c?i|u89zlZ)l^Is4MeSQMdNy>^nSBQFb`qwWF48C&aJd2s2zBfNZn;Wz_q&=_{~I zi>a8^sR8d+glZ}BO)uKnMMv!{n}6>1Vjd@B($w15KL2V%)Z*2GEa#_}$leIhwl@Vu z1B;|2v}z5Y5AO2FyYU2&Kmi`wry-Zmpaw9Vb`+ziF{z{i zbzI1gLJuC0OZUI9+aAf*ZK6|FSh{m`&aFpVkEx#RAf(oB{n=@eX04AvpKDbi2HmN+ z6bz-4SD3)*pYMwh{pL$VLD+?+lJb7fpRZ$D@Bs8CkW)9+)`8rYV=+;*M2@L&r>8qs z)+)BZ#;{O#o%^O_&O=0p+BZKhh7Si?05=d~OJ;qi<$V6ODW#{|v9Ht4fXnCIjvukr zCpx=_+`+ox2A#S_4iQn4p-q@BTI;qlrqulHkPdCKZJ9WqndHC**$GV?YEX8E>+CEAc>}+k0ZRT{Ii}xAVfgp_>n_jGN$i{x&M!lv{MSo z8G=%MHXYVn#GqS5;IDRC(4&hPE>4AsL2%s>dB2(bdSs1yFXq_cMf{nMdxvQ+;cz=&7q9hMM`h9qqxYloci18ZzGczJT)pjx z%8aZ3i~lP8BRN{tuM`e`wVL;#>k^-sIPodcq@|urF9c#3o$f>ETCp-=5vWX=SOR`tOo2#4(rE0U$XQ)f+2q7{hIuaQ)jz#| z$X_H)Dm~|I)M+}LoMX88KHo%JztVatxK*nh++vGAAM2!BS{l>iC2=>hmF(E6o%Ga| zOTueLb?1r7s#fD}7Z=YvSq>w&lM0x-_5_|vDH+M|pL zHpOVFgMagqLJCezTexHfno4Zyjp|Z!EEa zQ(JBm!H#4NSVrc`sY0**|LjyGcwyfXqJA(gIcrZvyRF-KXA&%x)7Y@kKr7=7YFsQlTZD@oymf5h zD$_&BarU$_pFI^)DsGE@r%PC1c9>}u#DVdy#Jk6CcX}@z4p%`fz|ril^I(A9%gWP7 zs`4DORG(d?e~pTgXzbieantt1++5Es;S4NUX<3O-a>?D|+hcHs4+a2Po(KUQmHB7o zlxGJ@76k*cZ$TP<8Xh|TVc@u*^u5g0x-jc)xK2#gho}c{AYru^W8`OugxbSO z7jQdrfK%BQy8Pnp;51Ja^C!-+AxkYZV>1Pc4sC01$r0yzitoZxVgG+S02WC+f*+DL zcQ!0eXMbxFI&hTtm~+Wq@gwjZG%Ql3=l3?G9^GwWJHICR_Pi0K=+djQl7v<-vu?|~ zKYS+lxS!$L)ADWm|L8iaxTxE%3;$<^PNf?Z>6DZP1*E%M8tHBrLb|)VyE{e@=?3W% zr4gi?Z=UCU-^1^8j^;OT-}l~ot$i)egM@*n+^4EzV_7jgHvxL*Z#QB&w1m_l(GJ_D zhCLL9hDYmn`832!?_KH+cYi6x-Mpb55LD(_EdE%+kew380@3+ZLN))C72z%7>wo8n z$uul(!~owAl#yNoX&2332-qCqS?+kLc3GtTEy6u(#qX zo2?Qw`QX9D)2+rBpK;@Nb$OFPXEn5;m%?m+veqAHu2sG4TcE=_=5k$Mg4;M>HK_LH z(U;okYu9j(CWSRTaO{PLk)z>n@ocpE+La9gR8y z4|8jZl#5?D4a;RbD>|$;AVTg}9$y%2Z6Eh-<^EOF=)ATT#{OTE*u^q;g=|U)MHigw zh?0gePys|2CEl~|NR~HJVAwE{P0r2MjKKwf=`)2RkYkm4$K&*JukeZ+)nSJJ5l5@3@EeVqaDPBn6Xk&VsXOBtb0K(hFb?`V{})1H?qq1bIoPSwJ_^XP^d zgt6R)O}xs;^h+m1h4s4}3g)3m@v{={XaZem1QC$dVBt<3jE)q0p0bYRZJ_p*y@e~m zbcfIk4THZ^K#18xgBdh#sv-yEQAK#&v~ApcrQBIV@kmH!Gf&srm*zwtaOIUGx@2!z z7@DR;V@3XUCTq(4fyr4zOfpBSv*rbgJ?{T!^}h_^Pw>XL?GG1Tvy@<5o!ZPJS?%_Z0>mq3NvRNOm&fH?1x9?>Z^8b2{^mW; z;xqp3Mo!b8A!tt(n8q|8dC;MO4Ry_BFWq#QA*tHFMcL(`ym?y)qOzAH|MC8&B^|ng z$%13d<`zTmFO;BMR<9F^$H&0UF#T&dNyAVx_0oj(DOJceaFR-PEnlcDJk=gP*w2#_ z!%NOlxOP7`g&UVB4#KLoZ#4S7-ldH?sdAFMAT(q~R-S92G!oYJbgDe}HK22vY8#PDOYG zS0!?Izjvh%$4jQEY&`u^pkD2!GNz2(`qYehrQ$o!!rG!_=F_GQ!eBKaxuPj5hT1 zr>Z)N*@xlF-d!A7iwJ`b|N{LY7ohQc0RbxKZE(ujA2KfsiQjIrGVQ zJf4PGbbNM61s7AfAhfFYe%D?X5A)q!{LZqW9m)?(puZ#A%CnD_3Y8x6 z1`cOJ-OEckKwO2401PnXmbtM?K3C*8BqIrFp$sVb~FdcylFB8v4Ie zDGGNWgx#f!dZcbTyn_dB*>~V0ydN#ldF(ozugp!}Et|Db2X^%Dl_ZM^!HB)qzB-0m{xup=Zh;TJwPDj6I4Q|z^mKO%|bv(vXiV5)ojyIPxK!A$d z^{a03CN@EaUj~f*!xkfT755k!99(Zbl?UK%JUV2a?9pbjj-x|AByO_}SNC+5=r$+1 z^m&uB)PBKt<)GHw?nV0yj-w8v$?q(^zN_sZk(kkjGX$hHeOr~p?NMSE10R_jUqW{U zkjTfPYKKIz-5vbMPgV;V6%u_4Vo5r2ik!zQi=!kQN+T57F2sA~M;7VRQ(|ivMUMH( zI1x0_-M3WfZrK-(3kUO=aJ}e94`@PZFlicTXN@%9*n8455?L!%e68b7W<5PGmzX3` zV(FUv4-5<-nr$6UYmfe6w|aUU9BvwiiRm+9zIGrImkg2-6zz6`2j zlIU`S1Mkg#4M+%D6^Ok{2&<#JJ)qIOIJYdsVe2Bno|ang4^vCgU8twXzC0^93GK~z zza)6ML5`Os(Hq}i)jXYOCn|jn*j!k>lLwiwd*{yZ%~EUrzuax+4IT-3}NF-fuOdlR=kNyl=o_4=ap9m9C2q`s0k?nm!z+cMLntzMR z0xeKr0#AG?{`XU-xx_B|*i(b^88&KL==FhooxO=aFNg=0>zW%^p`2d&>XL}Pu4OwH z&<`-(d~1Z@0_s6RfGGelO&G5z@e70`uztWa6mv=yo{_qVvWn~kVl%>20^2OScBQTZ z(<4410v^C26vrr)@#IspWQ|ApQiiH72gPNG4b|MPrhQyWdqvZj#=LCd?tA^mWAGxl zToe_Cqz9FNa-Y6}-u>uc>_H>|R0HJgmxcd(Um;Xa&(4}O4pXy(xAcR?%@6nYlasq~ zy3qnof2f)tckb-k3rP6P`W#ru7!djG$oP=QY78tb(U_eLX*sIriK7)9-D}3I)_NLT zZ8LqOFf6Y|P@Z}8PxzcQE|2|S`|(nHx95iXX$ zpay_IWf@yphe-PnuUv&{BtieFMiU$#K694|wXxlz`h6(8a} zbIF!XJ$8CvZM$XR)$>1lIq{{RztBx)ayVZieLwan|CIZ7t$ki!qwa{jgD>;#E6zG@ z;G)X=>Sz|Lsf~-9OZX!gMsGe@tJ&Qe^YmLYGIHUh6h)h0;xS}-UAO7yO=P{BM{$6F z+w*a~*~83$CZ#QQ8Hw4rzCy>NeP7WRU%js)KCf9)pO1}bp)js#o3FO zvAdI$cHK6YC&eGj-?t=xxMvmi{T)i+bC6n{?bKPrP>6{{vrzM~>hcV}JYGxgt)%Sg zIQ?DE^=pbG**kJBr`y2V)-zT_A&U+LvW5EhXtrN+7znYDFgFCf`d>}llA@w{Prk+F zm!giz=}`60`|V}b$Db$u+BK`ow)gKmvL^vWk4aYXmhN**kyrrI*>V_vJ0sGG1ZEbH+Yk6}pSVld6PW*R$rf_c=Mg^Iz^>GP^A= zD&G@S7@*1T{y5k?(&A-<+>Hpz=EJTZ2Bs!d2kny6mcK4dH`N6ii*)#%)F+aTUa4)Z zpx>{%eV$EAJbx#uA*a}l>O;L}F~1(;J9vI*yxM-g#RlFvX)i-MHxBgTOEjf;HAYC#c{jYK_A52AV+wMZ;HFwW~rwzW^#_l)?rTLcRGO!fG4`0-ALtK@OV%faXZL+wPR_4<&q=7_W;JGq0uWq6gLb(2b2L|FQRAxihwr& zGlhTNrB+t)&V^nSgbsxO&;U3sryvTrv!JOslYTg&|HBuY8^TD5UITlVK3W4~fr)vQ zd(Q(!qOKP>s z^Yc@jEhew}NpQ15C!LKszxuS>%>`}~1G1+(XAKNv`;FP;7Gp}q&fAa^`=y>iYUOk; zR}pfZj&-By;QYrD*TRO^>+6OBB2U*QjfYIywMnh^IG_H=|74#<+Ugv zfP_Wpk?Yccj$l(}WTdK_%G_*bCAQU=h+u;$nqJSw(`6>cuEHK~e+=fEEDW!+S55Dd z3L1m1CIt$befH`kY~L9nRi9sb>;4h^j?=~0Ij8P=RUjO+t3&v`r@Q?q?(edvhT-cs zIXYdlru{@K($v)^8|!Nwf0k|_i4os;wU-red01@pnOXjHe>SrRZam6*5BISFA{Uvx zt534VyHQ7LzTdbYj3P8Vi|0FyHypAB=f?dpo#edN28RbL_KEf-9O^$Jx^{5+svU!{ zdkj8_&OhY#$wkQ%g5vW(CFEQ^*+#X=W>4liB~>;ce6Lp59si+tEMU?>o%RKr*Yobp zjNx6i!r0dMiaenL8J9U$YJuvXK|>$HqNG|e86rpL!D<~7T7~_}ug?!xJ0AO_HBK2-C0>%_i{BnxIxe%fude-X9Ftt(9@(Xq9XQeE(zan}nQnA3rnbY$mwWM=xIdIXY%keUL z^YGf_iMJ!i$%G8C{AnRK6T9w6q+cvZ_V<9~U1#-Dr;v&sS47i?*7592LC3K&*51ww zwPNRl?bktTlfpigKE)~&p8jc;w!=@qp6QjY{9Lc&a*0T6j`>S;irto`FyyyOSmfv9 zbK{fLsKC1N`ufrGVoIJO7jTPFCcWuOlXXG*p}$dWr-cB0#C#`v60@S* zmseD<-qRHd+BvSV2m0PTdWv2lzOF3MxO4|toAT^Lg5YP{} z!_fDzfIVa~kQ%fd_G!QI3dRp3Q9|~Bp&PxXS_<^WyMbtdqd;SDKt2$a5&|Yd97ARX zxT&ziKs?}5GqiHlE`%{?Hq5xV(!kEQvUco@nPH1$6T=0G2l4}o7KdsBeiOq=0)Q5|HfHa{SAYfY57ZAl7S%2Y&vd7XajpIO`rftwOvJG*GVepDfW?fQ0pH|_mi$nBHC9>rG3}<-rv8SaaSTO6w%hGWPK2|3 zVe6FRmoZoIysJ0NDHGe``xQUgdEp=L>8ZKlEi=E|E*rXsrM0r81F<38Ixg9C#iWb9 z#dV@gPwM#-`^RMnSHIV0mciv>UsT)XM@@?Fb5*i#`62UKsX3TwhSpr-!_s{^lEn&_ z_%uTNZbQ7Cl93Ope}x1b)Tck~SPX~mc1BxwFbCU*hR7V^`;k`Ju~IgS^)mXjX)wJG z8HnqqP$lXtmD|8)s`2p(wx=r`jnS<y(Mu!Ou5d=f_b6y4$AD|mid7Lcmq$8x?? z#?RJU*L)l#B6)mTxt!#zexD`S(NcKbg!Syl0;Q$MdlsO-0$2hTe3XYIcgrws2L*ro zsL~?z`N!CM#3uX&x*?n3+6qghK#7I1xZgQ{xb#+horJkEJS$ z@FGlrBw?ylSY^oXz^PCy4s2E=3eYaJMwHhGU8(DX4^tkxAXo+T0Y+kJz@hpv?=3*g zmNSyq)-$hZPQ&7fX9UrN3c@5M5d_hUKrfT`XwMc=zClK^QjH)EP~%DP$AB1^3l#kg z#5bTr^zX|MS}3rHxthwON;dUk4>dT6a0IF6uH-j_%m^xXa`%LRlmhYDq=kX+Ab3hX zf6@bUwz0e5XCXS^C(F-nfaD!r_XFV+B&6`DFwPF*3-#Em-@*VLIOd``AwtEoP}gyip;dBz?heg78Cw)(3X6z-+sFe0 z2H1@jiJ-S3AAvdmUuY#PPo~vAdF=SiyvXVPyEql-6hQ}k3>BIoNsjzk58QIr{k)Ue zL!LuA0DM8dn-y8iJsF3le3&)}8EReVFaIfRP8+Fk!qL#t;dUbVSuS}W=nF=431vKq zdM#$SFUeHnHzQBbSE>UG@RlY#PE#J9C~v6_hX)D@HJn>X{B+*xwvZ*h9{9S@g1F#f zR#fL?kh}EIQ6l@#mSn_`onVuu$=&hSO7Ta%#xRBtr3P4cR@fAG_<08|=Uj=h;Sqjo zMfo#7O47~O59=IW!OHadL>S=}`G+s?^`tk^t0OIMv(K2D`2A4e)_932CC&=#_r+iQ z2y}>2x@t>ngO0Fs8-lq8jFgRE-sV&wa4p)p)n=Ify*U*&50$pfOydYf-CEb3SUC%U$Gk;nUs5MIV}x@6$GKyZtwWqsKoxmRGec z@cwB#2_M^mK$VJ@esykVz=5)vc59`P^#o_zIUc#XC#2a17ZTEqT$yr*C#${n+AOSR z&NBtn57-7*r?(0bp3OU|JJA`IhK7l`WIMiI17efzINsqz1a8~W&Arwu;aeO}N<{qjkrj1TW+rjjsj6{q z_PeI*jZHbT4NL@`>g*>6;>D$s9EwhM7S@@QzVO12>t?D6Nv>q(&wSt8o6|u9is6?c z{(ZKg{-39LjqWkoG#9$N{*;y7zq24U;dyr6tj>0h>1AF<`AX{e)uO^xnPOtljvsR4 zv?u{8JrukS@^!HU#snvuAw9wlvx$*7$+MCw1u#^J%tXlggr!8;V-LbUeXtKu|tJ9mH z8)NV4Sjfp~CB*!oqR+=b<>7ubhc0%IX*Qq@*Zjxey1aAwP=BEf>J0; z>7nW;NTTu5J6HQ1`-1Opzi~f*H_K1?qpZJZ*7;klGCh=NkntA1CKQ4On?5g#W~-(V z+|QldnQvSVCt$(;csQ)J=@?j?5WA4fEXsX|dtzNQnRV@5F!WQsHxGh~f7+%9Q==e& z4gkbp-xFT$Lik|}Mi>B$;uP>>V@NneB}1Hp-onJv?{xv(&A2GdX#r71BMb)>uU*V3 zN}!HJDs`F|8z~B;m=`M-2v>=Z(O#gP00o z4d}pTk@#3Ih<1~@TM)Aj8m&SJd%fAztVcK(RKquA049^gMN}Q)V6_@wCXv<;6H!HS8&Yu{iS`%>;1romov&JP zRQ5<3dWNX}kQM*{Ikc&nnH3ICxHi0jXmj{&{OeS>8K5%YK~;|tm!8}`zxDB(=eN;N zE?>s|1eag^3GwlhySlnESP}o@a0t|U7oFX~1$XmIJs&Q-RInYtwqBoPH*0T={nZp zX5AB?f{mxam1L{z&TVo))3j75Kq*@3?owX8!I-+?TeHk+Y5D=l=OIPR40D6H``^f@ud6k9 z1$|Oj2ahZ)6VHb=(cFjrB6jru`>^+dl5bu9eD> z4_;)tr)4gg3^sxRK1QB3KWBD~vnxM}zkDB?6$YOk3CP}>U>NvtB0F$%~;Hn5D3j*J0bTcq(zFe@BMk&@KxunJ1Zr|2*=Ulpy&674ifaz zYHvDuZJR0gJNA>td-JT{yiOT^7s4cDUrUZ(|5gQ>C~6FGY==yBpPsIpO4wTMI&NP= zsW{~dVt3HMwKeG%S>xO!P1DV0#Klh9t$te7L>uTUhQCRd9$UPEk)jcP+p>(wN-N;~ zLWe%g6@B|gCbpkqJm3f33X*T2># zAG655w_djIX41b8a6KuvCc_XB?C#xlW|i&F)1Lj29W@^_$mGq4JKDCA^@A-YYoq<+ zfbkqGpHs3V)TZs%WWWq0f~PLKEhpP+Wp^%KZmxdMG{0YxpYd%Tuop$%=;HU-ML0=$ zB+WKgyWzJ9CGIR|DfEr>HJ>%V`rwUZptJARG&Qs}DOTuSe->hhAyWc9!|WRym6jbE zV>MO!f~bu;eCyepg6sko^Bke9l;A=Rh9T|$HlZS_gV0L(B2|J10U;wEhrC~nwjhc+ z?7H}MRInFF-|X8r5XWCtkkI}v1{7%wGqAgLQBH!`9z~O25J%v5qY5HEu=QZX0MjU}7fk3zTBBSg!{HAUV{V4!$cD@8J#j z;574~x9`Jb!ep2}$pRvHVK~w&@GA;_t(D#>=}`|#ChG}=BZWss?3xdE4DL4ak>lln zw)-6hpau*2K!{yS9-(B?Y$GCZ^jw?j{%h9@LYvM#6J)`sqN1!>8ZXp-(qg;fs`r!f z*2-b$!HnA5mEUU-o``*l$96M=*2|lYXW&V}Ue)SppgG8+xR&c|S(mvqZ#L+it#d=w zVaeG0n7k)fFUAPLOcug*C%`U zJ?5y%zgqEAd$G#1??jj+A}I4C%XM6qTr|_*_ywOa0B;4Swjc+lo^~0>2jeC{=o+q~ zK`ytC+T_}(P(((!v4xlYiiTDPf3}AxA9Pr9rCTj=K zn4#7_@%uqO1el4^vg_~c7(!pMQ!dhW<6=f3C_t`(_@>liFc?O7gAOPGd0kV(qPTg9 zbD`Eao0u0VpZ{2+*-|O*Bp{n^jy@wtcq^csQdpvKArt^~P^1aQ)Y@+gRk2x0h_?oy z8nuF+h=2M+APdw1ZZ_>HNq$IIahxm{9;uEuHUKNA*#30@U|$kmn!2w^U-8W$DrFz* zrV~r2_q?P#PiNy))S&R>tmDAT{qY&H3ZaGE2Vir&9LYND39M51=aSe;eZ$5U3tz$8 z00oG~rKW0nicIO*;rBpsdO(udZwKvIKKB#QWMxcQ^pDDTVHmHl_PQIjz51}(TXP-E z87}@yh91J2nweYsZ7V7GnNjW;Gx2$cLXWqizDKatp9?n6{&qeNH|zQi7SS%yj}~Sb z0=F;tvC!ioR5$EX-`-Ht+WMb7-CK9OKoAu;c_=a1oEV35I=a< z;jvNP!>!aw+V2dzPM1I&^&v^8I+(7D2IW{lh9>dFTE%7wj`pam7?|2N2c^)@uis9| zL`>kT+l-X%CB3LEP0*Wf{3DF<^RlVgYNQzV7B5IaI;rBFC66Xz{Yimm9Y>k`bbTJt z&%PrcnZkU1XL5M+R-R>eV5d0Ol3MK*F)4&ik;zmaw$oYq2{EX;s(Mp(f|cQ=*^h-9 zeBS*dn#tIKpzUrW$}tStsiWdd z7n{D-_8b}+BLsW$fnR-;S5KA%4NMZEX?tNFU1%$(OOTfu-!sh=b5 z6vdTN^%H%?GBp`w*s<$-4h`&1PL{kvkDU^1E_nKY*c;1gIXhK?WhaBrLQMJLL&Oe9 zL+w{+QI5Xuf~JI&u#y$L(F%Ue_!A?D@vz~B7F!FI8Icy`4#G}P!=TD%rYz3q$i*i! zn?-j3c|qhs0x&FgFoAmGr;6sW2o{D{>Ix&;0Py&^#xMOH$Qy)~hAEiTZjW_pIyVVl zTD^piSB0R+a9q0qC>oF|hT-Iw{!{|=p&NiM6=V^j$25<|9T<3B=j1(M0z-2^+yVa( z#c(K}NoSmJf6cBkp;~{|y~3DopH8EE_Lm8cUF3E^mqPdkt7~vmN|J7eR}p1neWThi zR3b1LXc>+(-GZOsR_Eb^ippAF<#1wYV%4yuuyb3rVUT42j1*3?Vxoqo@Gkg%VHAzg z_?NrD79#cL=lSpf`j)`g-Sc)m)(hp6KxUfV8bi_s1wc>D28_H^*K zdu74St6R3Vhj*nme~#3J@^-Lp-7V!3lRF)eAs*yD*>>D{Mw^)u!u#jsY`1OQcIL+AddlQ>5gZ-m1Vo@BWgu(r&_Gp+p6VzArO=W=R&J9b+(O25SKzFfCO&e|UK zOGInY^GEfWq0JI`%9;>Y!H?AE>u&Z4-D6<9cNF%4E%sv1(->B9y9Cl$^J!g!~d#xJxX2M2P${CmID=SFn}3d-EE zRixnn%mk#s`1FtY{P>I{jjk5jAq9E7{K6I<`OzoM%#1j_7MKd ztuxO%Pp_e8S|&4rues%I=yL6t_SWukU<0RrLtLWktk?DIk3hHpe_94ngVCbN2dUVE zrQg;>4Wp{B(U;6=kwk4w5%eX^#JxgD$V}gSC!p&amQ@%ku(^zWH^8ybCui`t9r?8| zz@ysS0JfBAjOjt*Fu_HXU}!|BN5wxoEX6A?S2kl^JO5frkMar}{2oYBLSg4)28O!- z!n~&_g$P(zh2P%Jt_1JFG}r%ZSqT=OELHLwP zz=gbP`c%_e%W#V+HJ;^XU&sBy(Gdcf%fH43lC%Rev$N)}4b}cRmk12SA(evC+B`~> zmJFBI7UiV7_a=L@-v(URE9l<@C0530VPnBwo>sj z1a0J27O33jclX*JWkro0*{gc@y=z@S1R9^h{9{<_ zXXEFhOjZk4ZZ4}cEn{;0XmzCaU0oh6MomeDtT6L5dyiovy6!d&mcRN|q;X=0lguNZ ztJj2ULm5h6i*SUgQm@kzlKj|_p(sF4+>k7K3S&(pb{))aYk?gcw}4Dy{9cQ!KL*2> zLkJ>i@HA%D-u-950{%}kLwZ1ac{xz7y!4tU6cgHS3K%)6E%m(*5XA_PLYM;k{q>*2 z3x5BSo;FlDTe#Lw-txD;`dQ`rBan>e;WQulEy0@ZcG}6fQHD8CB>=kx1FiXJsX#ij zgN5I%;b^``AGh^V7uVY6m83Xms7c@P=HzRI7~O=uFGYL?#9>VG*nu1QBRqAADN(Zb zXt`K*P=iIF<>dEF`8AYa^zPV0!=>xp2O>X{N7{`soRXc=5Y6?AEGLtPwhyw*cxq** za)43EM?I3Bt1pGoX7?S@E8HjR?aA5w%`ETB8zpAsg|WZ6_?pK#*S z-+0sL3dhWm%v-#G|DB8G>;rK43Z=`#J^{E|%-v1MRRMCFM#Gj&AW9g{J4WYo{5%L5 z6R|VLlu|g@jk!Fw3mru^JO7l_2-Ol4gU<`?y3;UBPi`s_RDjoz1wXl~F`@%F;k<24 z?cZ|XxqzSK#u-pg-?~Avz2XWG-YRzTYX({t4$Nyo=m0#&kp3x=|pAD>1TquIu)Gv%lh*q z{$BIQlL|TB7N5S)$e*1y3n|1xyiZ+5N zV`aQz{>$+B5AUKhczkCjO9xYDS-Z8N(avn4s)aeU~w*Zn4|j^{?YBJ|NyF~_+o^egG< zNEpqVt(6~sO=@G1Ft)6DFKl^suxLBqu6M_2Dh37iHKZF_$ChiI4DRy$G2pN~of`*z zF<;B-keP{)mD*}_QRAQp?eb#0zh>oWUVU8;e{IU4>FtQ>-`_KF3ejX^uPvJo>}*xe zb4-^w&m>e_X30nF43z#B9jxN%EK_^mk{)2-YObCC^Dp{OyL-(&KP8Uq;S*b02@1M0;K|_6WjqKGc z1uaOeI>(|^bMJ~<#5_Q%0b5YoILsoMFV!mntT3fKtT?1a@ImpM2aY*l07KdAQCn@t zu*8f))B(o>q?SlrU@~ag8QLPQ7Dg~Q7a-X~aeyMzY_B5sfjBpn2QaB>{}B*>vuM*Q z!@ObODTIn2Cy#GoS_c^OC~XkH0XYT;1<S$|wfY+cxFnZB9mjS*AL|7MKFvuT@L<0T{qbWXr7^<#tg?dV&IjgoVSp?Tg6yA zes4u5m+fNQP7shJx&6eMGDd7XhLDDJm>W`q9Q;)`vI~V1b0sE?$J2rL5a~|OPx2^p zP!s!DXp8PIoEN@J*;_5Nw6Cd$OtIj2yHecS{F0ODZlGC+^h!94sr!e##f@Fg?XGP0 zl=oL+W56T9YRkj42~~BVDd#2R3y%>0A-m`~B*sqg@||)NN^4Ba%H8L~#q0XnE@uIO zgVn(F@*n%Ll)j7raPs0ZW=(V1k*MVIpzcM8S3f*Y(DW-l&FzD8x>NQ}UO|!7o734D z4`blP`OM5Omf1>frzhvFUp>|1bBN)uTCY#ACh`5}qms;LpAt)>9BdYbw1P&YBT#%C z_V>1!#P$t;&B;A#^u|&OlzEe$FZ1h(nfr)OZF6?Itwu1$_Ib(N5+n2~#^se$* zE$8#z0t<6~V;86pfmZg}PxwWW$e#K)3&%c69YjjyE52t1U;Mc~f$DNjUUE>r7$76w z?qW%Mh`DzDYFs}QkN7{w@ISA#kA%T{Ef1OL12jz9kG*2Pz8aEap>tiU?*U^2T1BG* z`@CA5RC>p#%JSNQ_rh$U|6E9Tfz#tFt-a*OvY<`frq8odS-xzqTMmSTQ#ZR6 zwXR?(`X8;3+sz2s!s|))$_GmP+gqCK^fxwksc}UiA7xF$y(rAch%BY60bS^anJs^nIxYhot=0dd3MLK5y7@0{f zQk(Ob&+{S7d;_~4vq>o-UT$`O>(|5U!@(mY6$wl`qn!cm`bQzyBBV~N$_K+@Mdr$( zv7* zMuc6yQ9m@5<+0sJAL8Ol9{Lyw>mKq|^LLWhR}8Vk^X5=nus!00mHx1?!YHvU^x zv=#{pLJ?ucM#HT?I^>Sby6_g@LwFz%09*ke1zIfk@be@*8Nek%bOCFL(r$vdc2~G9EZ|OQ4%2nVaXA~+0bn$^%UU?$S~|O;@AB+{ZJ}$6=VmT9l*Rv zW`wB4U{Q$*W5|O{VMrmef++*=MKw3;RNVU z>0Z98bRkE48+-B%&(N{Sb&df_P)u7Ikxm=U!bJ?F|2w{-_7?cn<4OMNU?rq_xk%!} z&N0=iY!IzpK7M9u1Fp^aa671?adHksO&>uf~L*}P;1yJeFA zpx)?4t#KkJ!}Ky}=Ado!CZ|4HJ(6RA(|Z3VH2DK|=9lf1b$J|!5z(cAzp;sks+%%K zjTq#RRr|U%Qij~PWacQ=-|b;q4j++nVl315MqU{9nf)ktTlV-e9gl@^h(+z~XJZj3 ztI!w%&cQ101bzR_s$neiZ@j0IieCnW;g`z`6cGVp4JNi@5*ohFc;Y6zxvXtsGYQ-V z-KeMQgSLz2LV_gnEjmfphTY;%xz~?8ewW>SF8F zvDFgS?*kNcxoDB=pAo*8Pk2jO>qk#;+x&11^YhyrxSwnXEK|$}h0Z%sHxTZ9Wj2+U zQ>w}NIc93QBC5i^t5t1uWp@XOjomlO<-Ptms7$Zqyx0)mak@!em_KXzF|t0dxXbvvhsjlYi+rj2+9wD|~wf0o^tV1uOu~pd>=31d} zsY2KB6*t@Dy82$-v^0j8woSk7R{S9SmSgTt(j>FR`dDY=`Jru#KW3Ue7w86rZ07b)mvYa&i&rvY1ylgw4_4D6*TYk44&XV4`l&_7&rEuy8PgfVv5yk|qyYP>Y< zE%Dl$A6tI6BdV5=h=J#`NKv)eT_rUkCyn*myhWVxI}BixFd;qbG*R_O9Wfi-AID7( z?d!=@oB&(aV(|;aj$vsxpK`LyQ6G)qf2%XklqB}LE-)K{BxvFcamofJdB>-; zujG8o$B2?phoMC)a`ATRukMFx{GF8-Q@1~q07p^%O=2Zwi)SW${sN6D=Qlo37lCx2 z?<4IZH8{5#NQk#~$StGT`V1Qy!z$U2ntzkU;eM>t&!2r!xA8+F0^S?BYm3E;`tm8N z!!I8GR#|#?$%F-7yvanIf?o@*X)`8b=}Eumud%;)CJQV`Yhe_rUdO?$KYlf3VN7vkC3yI_C>zd44ZuIh=mI#U?Z#QyYE+Fi==*iXB`5wyGk;|wz%hwXK~V;6emz(J>laX(@%(f#Xefdzxy@YPKS zr{49A#plJbRey&D^~d~kNqvuw>$UZ-$GI$BejYC5CF~yed5(j^;UJwicPSM(Poruc zNB=6KO7lv`w9)kw*FtXmXT6S7!5Y8H$0HxO^4F$aeV^@R0!o^g?>b$K?+iNb-nEug zsy3G4A$8PdJcNyNAGD4%`^u;OqhG*>IA7u^DW51V#}0_nc#LNP-y7J7j}1m0T;<+X z_C@%+4k3Qnb+H!l%VRm4-9R+dal91Mx{TlkDZMv<_4zxMT)6+4on8tEddO;0q%jI6L%Iyn^U{map1e3UDkM@Q?0>#l2K543`ApGjEH=JWHk zf4KJCEe?Lk>%LmSZT>T-ri2fkv&gpw*0u=qxh-9suw%g^uw;7-~Gj66DP}c zZE8Ou_XsOhY8r;cFAw5)KUQW5;zu_Xi+?#!(O1-i#m}}a1=Ee zZ!d&p=&cC(ed4@LaFd-SLVJsy8ju|t>ugMDJ|J;t;6X%}AXs_lG5_GI-T0mVS$I4= zF%{?1=OjZ&U>%jI(=|6F;_uWk)BT|=>p&fG%Q_N}xZ4ij7B_r5z;f!W+Y_82RC*mI z;l}rpZIZdywAY>y`2~Y&>E}1&8zN28qu(Lf%T^ra4;PAl6v%E~gJ>17RIz`0M23&C zRkDX~P1R&Whj}Xc9*&BNuooyjLVftW<7Irjb^_g6|30o9oX+;=!h;l^&NvG@Fkg!e z*{xRzr+z)}KBuow@_q^4D*fpl<@SEw8LWjc26juv=SNI?{b++FuKwm?@F&KU*bYbY z>wKI$kOpizqXs2kvMl}wB}x+z2#w3A5#GuqMhsRLuTuQC*AEcAdHX_=ESEdkpxrOU zLm2i^aps0f;}S~fE&2`ZcB6zXdsKZ)m9SkpGC$U}$_h(9`5W|U(RDZ`y?UO( z@yG0%t{rXUS99k@JV1v&?&;>u&iMeB=vy_lcd*n#iu@VVqZC)jA-mPr!E;iqFQ>94 z`s7S&a*Zv=2kapJLGOhTlXYUQ!1bcEMT2Y;27Pa1oZynTT z+iedAZ_!fR+d|ReZp8`|cefUIhu|*7-5rX%ySuwfaEIU)WCS^UY-P$DQ2C zOtNL|wb$O)weaTcs>(nECvKho3GifH=(d~xcPXjP4S~<$PV+nwLpv_u9h%FHir6-6 z;(Yo#9dSgm$A>RpC`|W7&mUlyLPTM`B0%bf7gmpG1bzo1w zpVYC(I+!{x$Z;tqPXReEI03dON3(c!GYR*D!j>kS3+a>J6!$nk@O-)7`6HR<}&fC$}`K zx0#^mVNL$Kr)t;5D2cK`Q|7t%LnD%mI#h^Bqx*tmjAcW@M}_r7Itz)vzcfHF@qhqq zIJ+bW&aL8I0#Wb5K2tOghD>S%MeTQJroK1>(5r! zn@ZmNbG2?CzfU_Y2i+%`MjY?}!Dx^3?S)x9DFH14|DT&R-0>b+QQU|ei35%^_52vK zuUKy>AK%R*VG6z_e~a09eZ-Acx{{5b$r1R11hdRoG#45GtNv~bO0+COJ!=bc{h|fJ zIRtP)N4YE$lLVFjM++dMmL*T{&AbKh-zql1+;0Mv=ev8f$ts9aChNu>2@A*R+Dq!q z=m0eXNY`vBJ|5p-Mk0d>{SuPvhvhu~9bh~XCg5Hp&#Lc{;&ujwXj znAF}RedL_n3LK3_B;F0oNN(JeuKNJciPjT@%T~8^R^USn?#%|I;=VJCr%gu`;Q*&C z3I%`s>RRr!ewrql_mqQbPNWF+wMP4*=WdwIJL zV@qRO6%kdXwxvE<**b%V5Y=bD6M8iE!LL6Bso^>+%K=hCH%BdL5TxR(0FBi$^kCTR zt-7=Rw2z$tCdV5`Y6=e|@_$+h|A68lc|YOp-I#yO`=1LBTfbpvj7&0P$|DF!Am6fY zG#C(N-vYQl->vjji2I??CBo$F51aCEn)VinLBBVt{I&L2z4Oz>Zu0?bG}iSy0d8nV z&f)Td%R@Qz&k3f(MD9YpfHhulq<_cWb8qvK>BHIuCkwl3(em`aOo9;l^WUWN6G~m) zX^YDZ8UJx^#Py(ijyo<)Msb+&Yyh;v@kR~X86FAmU%uq7S(K*SDGmOlmLP4`2yp;a z*wN2L6FKYO)mh20QYiG(NEGWQv(WH+@*_eMcTh(M1GM5{WtVYVp(xzQjzCIu;?(~W z*R~*|VauEv1xBG~;J~H-h-_3Mp`dtA6@MxN@cH?zpc6kZk#V_ztNsLkicQ8}1NZ=% z1xG$oQCCubMg&X+(!`K`KS(eQe=rO@M~8|?w}Qn@Pk(LAwX1p>urvHv9&T2MBCiAu zmmD`Uww^>1c@ye6lep37o->+2%XDKm%eszw%c}(-$lwjl_k!$;T)$Z{P-qicaj*3d5&BP3MW(1Jrse^D?0W z!$OK+*H*CZEk%ezy3;D&>cos^u#xQX~<#!@Q;1YDNnt{63~epsuxG)4vd>d@vi z@4gIY6r!6vBQQb))V-^KwCTXeJC2ku-&ud&wucJGn=V|wT<_u^)P{TmAf#7Z%aWZE zvN+5U%E3hcuD_b8=)Mf*9H>uO%4;r`l>ISuX2pvxFhtF`EF8HWo;P@wy9VMUHY|M{ zwmAqo$WW7OWF5lTXm=j={rMU2hq@Nm08a+IUR{wxnSg_7Z&JA$qZKlvX8gG^aHlLG zua%Acm+{~uF;72)2Su!wIgT=d*1DL^U3Klj!{}_+(IC%l8dnGrBeYKZy(+amy zp;ahv75m|=jo{0Pj(UBU_vpwG6*rrY-z4UbmuK=^5+|>XP^yw_7(>xoDeMl${|)Up;Lc{dwnlf zn?6HRGYd)O3DI2b<1n|RiSV#VOp&OMqUo;=+sl7v2tORvpp<%%JK*{S0P>v4QRoo` zQ50imMr_}p#+%V>bN_=5&9Hstt?-2OZA||$nTz?OM_IbcbdnO;_QxUf5iD$ml;aIG`2*o{2gV3B z-i@i!RdzL_d85|JTnSUeo>;7tcFs^A%bQ?6E`qkZGm@xtW<`-!0b%m#?-gz~lfKEuXtMYe|?4 zw<(n(M-OY6d+f3Bl!ETA@5juuF0Y<8=q_PzHTTCY&ooo3GKboH&4Fo`klFUueLh)E zyH|s|Hg+i#69%rtq5ybJx~HDVPFS{PF?5DecP|o0^T96y^tR=NmSjC0EiWKuyg7N^wb|>;yT`a%vd3{>+5;K`H1Amqhc|q!$o! zF|J;cvop3#X}RfOH=;3qa0cS2m#A5cP{PCN3sRNME3{$lnq%htj&%h1mlX+kV_;L( zq9@(@#Q=^F{ouKO7!ho!Ag#fc4*YgrLdD1i@&>QeGMcx98;RsF?ORc-noS*A$Fap^ zV7s}43ebSDQf&PGvJoChgc`G=30;kL$vk;61?e(#xph8LQo3J5X%meTvROTbiZXew z0umk>t;zW0f9e?Xzv5A(tw8WbwZUMwKN55V z4c<>1|5l8I>(RYCj`!YEFVK5m0g;0ok=+omoqlnD`32ypD@%%o7P?#q0i;#odGM3- z%y8b!uLslk2jN$*N^7t)8?QIxy?^{BL+)@Qt0SE&mwElR4VP-h;nKZ!&&#hrZ&mpE zyC3neXUfDkKYd6+lY@2bPP$>?Z{HCkqh1(W-N~%l$1f8h9#0RRLm7~);0O+*#*``- z>7HTRUf+uKdi?BbI8wgh>~0RCs@F~PLY{BfVj$kPpHpkqHV(tHxqmm2U@rHlE#@@k zR%;)l_P3+qu8m3F(?9Se=T%k?0{}q<9i&LjdQzpLc8qY}!@i9tYesrA5Tv@YF*T|l z6nifXed7!10k4N8n@$JljJkGjSJC73AWOTA;oJ1X_7OyZpAO*OdErL0X17w-ZoR28 z$UGcK;zt?Gjd}Sv^-LHYe8;7_K3}n`Od8H1;>`q2&1P>*)q8j1cO1%wa;<_Z^qdz- zR|wbNw7-8bTdj}AhEZGnTP<@iktzhKXv2hYw6-P%FcB`@A6@KvC90BG_v~~%q^k2n z8;b!YqVcgqmh1&EoS(__p!>Lp0CD{YCF;&)1g>5nj(A9(X5!=k$#=k~0#shvhtfe2 zGU#UN=Pyi$@ehJoE`s%YYurOmyWR&zUhiXs{C%;A>jc)*uLT-6JphGzyTl+~r!HGa zSXw*$-dElKqg*xH_1AO1(!XfKuzFY&i0Xk4?2;YzJtmKS~0u+Fe?ws6FO~b6n*41J^r#vR`|)~yX)3){u4=z$aAqr!NNT-k!$gq+xP5fFJJ*DR16;9<^G`KdP>Z ztIySBc}_VvU;2B1aLx~&!x)eJh}lS5FAuqn*aeQCH%_uTRr8pn5yw;JB;#Owau%(h zN1XNievo0jP2Rtp_>l90@79)9hx+Vtv|@hxPk1|c+}HnLUTTQLz2h)a05qXJjjt|& zM`r^!q==!Dz!|4Hu*SU=FRQ7hEYo&ABJDdlU?Zb$>Fb!*`$@3=7!wS05 z`z^H8YV*P-F<%1n?~-7Iex-V~9~#O6#Q>pHu4_aAzLtLY`ciB%V>LxJWvaesIc9{y z=Yb09JH(>wnNbGM-vrIux)57&^)X+gu=Zod zLxp@zFr(Mmi|E8A)yv~$s?H+I=j)9l)~JorGjsjfWB>ZM7XbHT*WbG@Fq$6%lGF># zZbM%{MVix!NpNO8aYwO+fl7fD>!(VU0z6J%^PBd~aNu*^v3y z9FVgD#Ber+a|&)a60X&uR}*mU0~(5Yu=<&)Axu+-2SNii;&1d|4Zqdgt|ro z63a<_xmAMOxJ{BZtfho13BQ--dbx!sP|Aay%|cZKbIOgz&`MQh zdNi*?<1mqbafUv=0D$u`Dgz_7jj+K{B^?zos+SG^1Ir}?-r{KR;W*13IDM9Y^ zG=7`pWuQ1Nto{q3&v>}?wTKxeNd5cAB3RYyVq@{(W7fDs%wO1omRT|42mKo=0n!%Y z#OTzR7-B3(H%M;vj+%}vLNvB$N)#jO(py*mpZMJi2k%#hx8n3)fq7alp}5>yN#AG} zfDRt}a*`{-t=~yFfuqZiOS2TS%g=At$yXD};OCDwaX(@i$_aTrj!W7PgmdT2QkCa- z(gxqu`J9&uA!J7N$<7Ar-W#uu5#WAz=Hb+_F-NC|Tk?=(f)HX)m&>;G-zhac z9kX?e;tL$)td^jg?!1FC0o%_MiW#ZJ<3A(ZU0y&sC-=IOsn0xTJ)v^A=g9A*evXbw zN1(JjU&Ri2;lgj_CvI$=A(j)asyWn-KbqK-(2dN+H<~TZ)`NBnbSwJjwY7JeiC&+l zrDA>ADw|OjN!Q!JBd-RHKrnyJ^DJ}yw;ENY@U`0eH?d{7PX@a80uUfsOATZyA%d)) zPV&S|*8$I(t7We%LEx>U&FRF~l?Hgt zPd*w|No^4M0|9Q#3#oh#nmc|%kd7!)arTW#)zACY>Un{dfg({}*^0NVq!cAe*F^^x z^NhNOKU9;4f5O58ktZVaNr2t@2EAErf5K$r(&?6n?r~+(uDvBSDL5?ZGjQ;NcWhqkHz{iH$S>sgwuA+O6WFLETXd$V#vjnreii7h z=f>k^{AqODLGbcD--H*0FAH?-C%kfaKt>cXir!+j9OEaPsV+a}?8TxazhVs<}IUF9Z3IXkhA7fi(qo41Kh4hc0$tf0>*W=x0 zJY-@zxIqVBC#gSM&S4)FiuEetw-S_~yB!EvjoGZ%md3`uefsq4?sORw6H}+vS*KdR zA5YJN+2g@hO>NQPV4_^F)9dNxsNP}`{oT7{7Nbzf{E=Gy31i^-n(Jnl@9wVQ=IL@R z)Ylhm*eEIY*MT53WMoN6NkT5$%_Kc94XFDo70Mn-`uq8{I3C3&C%@z3;c-4+Yu0YI zr%gcqDJ`vCWBC4;k+lq9^}(W?r^QR{=zJqa zS*Gi+df$@RW}}t-titx&+AUerUN3^_vg~`GVvNYLbPtLsx#m=F6DL!?(IV4%$am8Y z@KN9}OsTljFfa0W3PJ`EBa=Qhmd&{~lX2l|>g)<_@IatdJWiXF&u~C@rTKYgV(-!Vd7weV#)j289vWFkI0)#1Q|GhR zB68$62=qomwzc@7hmo2h)9+Mg1|MU+BYokq>iW!R7kF!b!m-_H`h2&D2t!hjXw z8lJDk^At1IFLNeWcS!xS`p4&TLC=ny%WEyplbiz2mfar*C%?bG_K3b|CFW_(vvUJ= zP$+m?IjcLjfy{gJDbJ!GYWl&1yxVPRg-~%{s!QEn=2V9XD5%z>gFhqneg#=ts_RtK4DaJs(J7;T0rjc<0qjrSU+idmnRV-6PPjg8u27$v5JuIKf_4pz=Q`&GJT*n zeZ$3%*&Tvu0@7kC~|QS1r16A4CtIV#LTRfadzP^HG6FIBt0 zxTtDq*noP0(wT0DhsM3gY=Mj~^#0?dg2KYqmX?Jgg*>y_g270VSt9{BRaI3iD%rZ) zT8qVs_1lw0e}8|>e}7$Sa*BLmGvt~dihkg$uImtE*~tCGL}Y~6nTZ0>Nx~R_iB3&1 zr0YllvxMzEi(kX|I`2?ruo`{;<0q@>MvLtJ0CknrADS}}R-kd_$_~4ksYrqK*TG5f zexZ$%`NZutAY#2V5K&tadwEj$h;=gKTj^YY*q~pnlxPSSfncd^@v)(-$B%H!Qv<%g z@|`lkX&MqNpQEKFB$nG0JyEO%ZMQC4N8{7rdu9`?`tv=7)6SfApz4 z`Lsop2IsQl^fEH|+!k{!8_Us`hp8rc+tb=#(9dj&$gLMYdTV_fUm_!JN(7u`!|c?G zdJbx|7k7I%Hu|g$2^uWv`h$8J7<&!FCG2pBO0wzn6LIoXu@|yv`%hr4aedXdWyB|? z>m97!YWX_&kF6D9SE~74gi198cliCDD--H3PvY+ zg4*L3a&jl7?Jvx70nDwJtoo}XD*BiujK{tME2P(D!=ex-y%9$%k2sj!YqY9J6O zUoOH(ZF>>g$su1mJ|-0oP|b)|z49CmegdgdeWQyT$)YpA1r2}&$6ADEPK`ph$!qWrO)44- zbo04vtf)w(Z^CZb^vTS0FXhg8L>LVqNe;BY!+x};$|>QjhyZ37x3oWA5qo$9$pUXl z=R2O`_={9{R7!3PNkZlz%+oa?7EX(8t*OQf)>CWs*ZVRyM&!m3xc_))4yuIN25=#v zA4^w`@hlR%uqnvFo6Fg~qfx^Q_!pZ(<(TtnHBSqDeZ%{-0j!pHnnjpWQ3@pEWzLbv zVTG%Y-Kk=P>$ywESao>as<~g&J!-5gmT{kGX>#ew#=B^Y6Amlod|axy{_Q>ye8%0} zd`9QAf-HPF;+b-Sq4o()ag%HbhQ)D=;q2X;AgrMvF)?h?X>P=)UeqRZvus3xcEw&N zU;a-b?p!b-(J<55?!xAH7yh;AFIMy zI$yOVQ2ay=k+^rg@_qMnD1${k-i{OzCfg5Hz8eQWvsIL+31PUZNqvXD`8yG*1e`d! zJkoKZwBP!>cfpg_w5h1>H@Xu6*tTO{#SqVU)&oDn*w1it%f0U`{R2M$EVf9SgyOy= zg}Uh7xghzW)(WEW+h+;o4-x49mbn|{|A1WI-8@8IOy0e^skFAv;19W{sf4S_Z-Qgg z9XJ(&!z5Tj`6`|P$tU|OIB~-hbYHpk2Q@3l==!;TQaatI!DU1$8YsoWOtpkLSxl6g-By#zs)RMN*j5!YvOAOO22} zA}J{;m27%4({JHI*$hed=l|r%nxCf1Z=?$OnTU)YvYIiUaBC)w1k*?gPU$PJlN zLnDUZ)8)-9`SQurh`0@}!utzuI=t|Ty!T6uw@m|otV+JQ%dXpBrE%9Hc5>&go`AjE zT&!(c18Tj*Z%+y=)({zW?$0-Y5brAEqZ9TxxP@2F3kl>(ic;B3uJRI=Up)tL9#7#H z=kqjl-1sO5h?(@OE{$gEZJ`KDEgWWN>opO;*%pQ;r%KQ}$x;^EwE*VKOwU!|>8|ER z5!MF96yvAi+`Yam$_Q8QDM%?tL`hQegS075@PzhN|E4cED!I{FTf}PWoSjNBl*ctr z=5)ge;#W^!@J*A;VzUbvJh}92#u@ylm+n4@&i4wV;l++^VEF*@HIC|*l+E>SySY4} zSnjE;9RewdXdJoE>M&mY%M6B~x_S3em3C}+*uMI-G~Zp(be7s%WtmAK-V;r)o5=IG zYQx2rI2JEY)T*@Z&rA2C5kRrKu*P5ZO2yVdlTuasQe%*8e!Xq^;!5ov>szV{qu$>{ zT_<#5jY$3rP>Z$Gy!+E8n+F`TuVotd{tQx|SGx>f^&_sum|;C`o%rQWv6UfWk{J&R{7 z)aj2#wYR!y@rmp7QU#v9tg{}-c$?(u?V>z1=zP#MZti5&MN8Pk<_nUYlCCVGwDb+N zMefPwarDoa60?CGNeLB+G9IGpb<5|Sz!G(1I}5R}O-_6>gFo9sCPqSN>ZPVevVpmv zPZqJb@GG|^^HJYZLlV$#*2|BUMY;J8~zWsTd6E#JD+Mhi2v#99rc+k&&vj2`%Fsrc=Mp zR_Z^0{%kN9DdVwE2TYK3dbs%AH9l@N6#ZEyjROq@WtOqs6XFaF0<_9S@+&)3Z61oA zRP1;dNdG`*$)LTLi~~Hf#%f7VZC7Oxfbid8e(yk0k&mB#Ie4Gwk8 z5)qwb7YAyii%(RCm2n==X$i%ZoviwbMuD^seI+<=jZD=HdjF#ZVAdXSVxWFazpp>Z zeY?qg@Zyx7iDckEBx8_DDX4Fe*5J%7I3M|J!eYNN6XP?yylmsu@|C95`gy@splOU? z&f%(#bNnm!@uUQanjQmJ{UV3+DyJT273rJU{TA9b_yMf`WRuj-8$jN7n zPOLZo&^Fb+@pyr2GJgG@xR>TDsmst-Wi*# z6nkcWfbXtio82!zUZKd)sx?39<^u_@lB7KE3)lzkyV?v(q4Rpb9;!UzJbGm4)>kYJ zQFg0&FmBX*ds?_L8_!q|cB*Kgx){??SfCI+%ed2C~MbkP)7cp*+FrXt)LX1j~$Qnqy)Lpr(LZAv|&ixZzjt zOEbZ|ohEhqeoQnh6f^3vS)PXBh(;AesPaS1h6E_&59NQ^NaEM!W%5_Jz*vLBI0R^A zi`ex+*7Un@+PW}&Qsr@`(l+m!#aZC2d!2p;5l#{A`@J8dkNnr+ZCK*{vHCHO!fxi& zaqwd<-E$~fR&W7mWs8tNeU~@F$e{}YO^!*u@FkSThy>)qOJze|x%@+OVqq0*6oqpOR|eY{_ZMfs8{ z9%DAuz^>F#PIE)4U|vC@^ZmSND}kj~&P(8M_nx~l>pPzG?v#T-o9B73HXSwlVndr& zYuDg(N&y0~=G>e7mV4$FComDTMpcP)rgb|l1freIP?1^A*sPS#!q@FxY0);4eW91rBzhq< z0m19juSs1*Z8x#njE7cSgtXWN(D6=P?y{%%Vlsm7F|tzTxdeh%%i*e*mnXYUPm3X; z6)=Wf+owr`pFga0^DbY1)9jg)hoS6N@;;;;;TrTst0~e^gv*U4`VEN_a;g(oWi7J6WD)25bjd_K9JG=?f3+i z@ZktSO!K3GN+S6Rf?5~BO83v7J~VW2_G7=IBrSs4$Q2L{>wiXV$^3CyNb#?WCLJ-vES_^HMS*~KfcjXeM(|A(;cL7`Hr(66Ue9Gf2`u$DzsRpQ}UrD12@y3BuV-Zo0y zWj!72pJ$ZSaL706^x`*Htw~TKTVLjVF?lH51(!5ERTb@*< z?VVdC(^_^QHz!Ka&q|N)OO9?mSa@Ard>d|WG?rOKeaLQi$D-1Ti(j(v_4aOEd<*3N z+FLP8gi?TRt}_cwqedW=(#02o3WOo$rg|dQ9Pv`)`cfX1s`wo;#Pp?>=Onl!2U4fp zgml92bcmN+l({U2qNMTANt0^i2H@EqUW78O7LTGpDV8G@Dy5d2r;X3{1a`k0Rha?|*kHacQ`Rkpw<3h0Z zZS>h7i2c#v)tg?|yU+p-FKoB<;4Qh;aaBXcx%kaH)wq5deDLxDke=}7wp?Mf`(!Zf zcKOInYAl$EVIxs8W3wGE=f+WD9^-QVys)RX`f@=wiUV$c^E9(g#`5Er22Ccy%cz?2ERodkz8}+GuwDH-*zz z*+D06QeOZ?DKDRHIX?Nr9^=+b1`R$^KclU0;q&>qSD|#!NB%*jgI4$W5}uDF-1!zC z#{67d^lY{YH{q4G(oVmAX2SOJ?~P9J6ch7Dl6H2svbp=4<-3K82-ON0}6 zqg`(NJsL5$J%vf2kB?7!7L-FpQyZ%`pD$4<&CC>+?v$Kx&V)krt=?b^fmXgr!T*%y zBiUC%TCiRb6`G*&g$d!AWHlykchAXXeL3Z-PL_KTGDoEWbIx0EqeSK5Som+aYie3N zhr=kE;7sXL&7tBHnfoY`>bGGjni*jKU%lJMCR(UH*T4^Tpg-!pM zDt9O8EJ{(bb~rWiPe<{JZ_!4^_q|9zM9}Z^!7T%8NHE{V!^y!G#H>;B#(| zo4Qbk z@#OlI$g6WArG24hVu}E3fS%NI*!}EGy6f&RcTyx>TAHZ+yMssjwI1WTH^x+--lC^& z;?MT_-`i{J`)1<<^!QgdST?iYR%9ORr?n>TtMgJa_@ADWo89bcD5uL|dOs~`~c3}!NcId_> zz*uo!oiP?H{QT~|KQyw$7ED+UxzA`4elqV1kw=s8K70MXq^-Je)I3x>(&^3#ju-;a z$ao5{pDZ<;PgQ%aFO?3;WwvEe^Uj&Rh*jQuF=?^yUX=eB9*PtG0B>;kRZzhFBaQvr z^ScH9+6k5RtZR3$fwYeOf!4ti0^BC@^ZTvrz{aZ)BCif-sq>c4d$%kmIfVN?cucsj zupr^c1mFAQ4JH^jVSZDjv+OS(s`|e>eEy*)n5On_F zbmFbj)$7Vz%1!|K?WhF8N>YhFnl~^7UZ;UPu3s2iG{WT&2IMl>=UQI|ZeIt+ziBZ% z?*hS)K=bH)scDOXNRQdt_v)r~Z|UQtx9P4ePM}pE zS(f*mQ*`3kI)pswQTCj-;U%%w%REqKq&jQx{5^);YrJI`lzR#t3M!=)fM3i(4+Ww@FgXOO(i5z0V>9j;(;|6nIQ=GFUQ zWe~=_*K@BgQdbJNwBC%hT_yO*!bT5acPK20T%xY-Fg=9w?)Z_FD$88cY4DA&PXoiJ z5W1VNwVky}?D)|L>~%zh{_L!=a`nQr)7g~xvzcj+XdN}>KB+u4Ymvv5^sc5@q>%YM zvgF!g;JgW&uKP#{1Eld_HmK|Qs@cwe64&-OoV)rE_{M5{;o{IHG7f6Jb-TXEpQr6I z8Us|)Vt5WdSAmgz+V#lEaQ6|4-Pabz=YemOyP-xgIfSTP-{_NGBs25Lo z-M@y|qNSvXEU|WI4GuI|OcV+39KJ3*tfY2wda^$TA#cvIs z>r}lfBdl;TKQ8LYG3ChMR=uCMTl?b6YMwG{)$R5H2?ntktdLl1O3$;OhEuktL)BBE~jY$eYJzw(+{xRC!#4H;qRe1m_ zK2xP7Xg1cfguLL3Anglsz)Ru3UX{F`A@mWKhx;BN_nQtnX`hneuJ#Y486~yhY7&-e zcMt+ThSEH`pwvl$r(w`OliuODQe+t+nK4qNRVm-upkv7Q&ctHW{vA9v3i)|om|_3_ zDld=M?;v%YJLEFG#=0l%zCR=TP)6dvE}vr>RMyFp+p?^Ll7I6DyEYV8j-GPP17Hgw++7;NB)*rPAV7n|?%n?)716dt%{;c~Q1JtLjc4+3!N2So=6< z@8u-knGt#Xb!4qN;~*6LfB+3FTY=0D6&f1Iomer3F6_%Sv3U_+==r`VjJ;_S`(Zp| zr9$FDzb(&M9QTPpdnC12-^b5&TWkm2<;gkKX5CxX^h1(Y3-h{BNkMl$;h-!&{9`NbkyA+2Xge zX9X0&gp8MPf&xkI2V;uwk&z6V+t`AVij;Si^A@Ir_B3>Gsiz)RGW~AnXxLkw8EBH6 zDv5AR3hT{w;`TP`d9lO2uu95K-&lIANd?N3*Ict{ZnXNQ;*k970F?HJcixO%Q#IA@ zBv_p&HNug|7?Y%~N)9N(HbF&Zr|J^1A~Dgk^E5{_ItD<_(llU=T*T>yal?(Z1 zmgZiwR^G|LN2r_kZgp(7HrP|F+1%PXG~OGwa=@76jusRq6lbw$eo43rHY&#kf{*vg zsudBD)*}zk_jG$>Ma`>2QK4;B#jo5I^T0nh_O7?FIq+g4dV17g$}+U23P>F z%`m`$hTG;lzd|M9NQgKaR5NbdQ?(gR@S|aE0{=D?Ew|A^$WhiT2mJfWlzxQFNT+yR zZ|#gdq2r8a5gYn_!bWO`Kg60s7c-#}ya#Z$<1h-_JP?ST&=Vtxe`?AW3st-?1}c+v zS$x`)4V`~MFS(M|={?g8W?+KDg{vYS!C`^n>2$6Xp-+YpfjN99^OTRMi8{AmpDb4S+f{;&k_ zL1FWET*_*N>nFAGy(#2cN1PCwytQzl5b#NGW90OM)C zo>#N5eCZA4-(cM2l{e`mhLsV7L-O^z`){A|?)jg0c<>QbXxU#9S~pT-CX7Sd3IEkU z3ddNvHcltvEv0+TwP>_2@B^LB3SM70_}k^b+z!Mxz$LbIG5y!^xAy+BJ2;o?m!r4F z$jfhJH2c>^q~B0WTFp5l_){nvkWvU$v>7g!wY6tRJN}TCr7ac1A;}C8f4r(mzx{cG zs^SqZaVN$mF+HrllGqrxcVUmNQnqW4Hu&7WS=`WbM$aVc*hCAR6AXz`mWWL&7rDc3 zi&#VQTFX(1qism!Ue;tb#?{o}gynoh;sh@MXUnWHND&8Yd?B6i1FqxF!fhH=!x=AbkyMm@@%G=o;*;%B9f!sQ zeI45`HQ|9$DtI(6hwDsxwfQMH>1E@8P%y@q|7h6n=D`=2i-^{y)0>oh#G1|Ay*2V_ zcKsc^ccd%9p+Zs?f`PVZ7*VB&^+b&>;?RSqn;G1`(B05R@E+5So8Wio-=;&Z;u!z( zuM{V^#aPM&8YD`l^LCf9HNt^cltt3%8MNkmnlA@g=UJus;f}oSDNXu^4#L{@6AFbA zaZJO6m`Hf;Se7eZwRKkKzS8y8?38H2C+fW?1~!wC6$!M-yK`vFoRcBy9|H7qfEhXf z8xf3);CF}c#VxHNfMkSorjMzDACiSVi_kK~D4KEetv@|<;WVM5Pr;D-qtwle67eFw z`F8(;qx}e17^vH)^B`U%P7=x5WbcTc)n{r*Di|M`ORS#`Ji z7@c*KmJdL{*M%Cnt9xM61Xpn(w}p{-Ky;9Mv-}VPm|%AhLG~1mJ9a z0BHS&D}&=~LWcV#c=trG3D<@D09zpB{u2N!f~ES!3drhCn}U%Qp>Tw!w;bE}sUYd( z@rw(Pg~tLT)u(lQ>4o}|LzEsEJcXLR`5~Py(j{#B+nOJ|t?$YQ1cxvK3X>kocL?mt zK|ImOrMl}cX;_$5yjj*sBd_dT7yq#LzU}h%mmMgxo9GliBy8{wOK>p%n=4t|UkR5` zppwOlOUou}volcc59QEBy8nU4|BI6YwtuPQ2l;dGs3(}28zU#DPy_N)^3B9j#q!2A zWK@^9c+}(E1jP7s^Ic5Yj^qxmMyMl7%&&IEhbh!TuraG#HxQb3kGdy^`-EacpwBZ;dBNPKK=1iY9yQM`NrGb`aEw# z8O61Fs@loI#*%=Vz35FYrE?%T8Iv0c!nir{+YxPal6UL9W+$krH}vZN_;0a zBfvQ}wa&3EA=&4t3^VkD5u5A7-mnXPg-kCQk#+O>Ac9AZB**w}a$*y$>norHc8c1)z$zpC{<}jre2|d4 zyfEKG50aZjq;F+5bFv6mnV(U=GBUg{b(3H1pZl-jKw^*gXgLUdy`=4YZC?hou!ZC? zzI}Qx+vCBdKuk^mpwD5Z{!CEOW&Ti|yiK15X~ba*vVG|EXwjjCECPBBRDS(Hyb~Vo zKfxf$`K~UE_@he*KS_tDG5f`W>r1bR=`x=;+Uft(4PsaRufpX2XPTk@#l_8*#*dnD zR3Me1P36XV2m7IfQVhjfJoEAl68fKBA7Y-g%ij&i{~he>m*P@&WD?DXj112$RL8!b ze*a$4d8KHkx+W~>6PDn>Ty-w}(dYMvKeEH^ilXHLu~>XTPkMKm=ekTfv7EOdfe#cMS|ByIWK zyF!0yMQD=<$n1Fvl*Q(ABz2lnZ+n*egNmAdMudy5GVqs{U@$y;bcZ;;pOTc_19|b> zs~30RR)ICUzAvaZcfP#rE9gMj)qxE1i9MvE`E(~*eI%V85)6jG=$}xnHf3Y^-hQmz zcK0mSx3oU7{sN`?*isrbr(i)hXxDUauP^z}O;3Yzk?!zuFmuS9Q@J56NIJ>qU&%N0 zhzCPmZ03>xM_RQywU2qD$%~rHyep$L2P$&s4YrQ9OmQM?AG1}KcvWJX^JItmeqa*d zlvjOAdVRQoQwh0eOG>V4LN!uRx?PW+9hj)fBww#rNE-;9s<+^%m_TN(m%&oX*A53J z4XMXF6z_e??+*mY(3bO}K~-HtFRkXhmW%GLn6X=KccWNS+*#w{zRb-@Y(p5bPIoq=MgyNXoZ6-p9t4Md!X+a#*p!X5S_so=p_-nm${sk+2~=Jt zEtE&6hX&_pi>Y8RuGSfX-&vd|q4vu*4I9CpB8pDZ6W9VdOLQ>Za6D99`0prRNF{sxtw(m;5A zpDa3=@vO_lU&OeK^{evOefHtrB05oepg|Yjh4s1%*VY&sM-u;D0v^NS?ZHxicu)HG z#%f?%wdC;Nt1=Zl?AKqG>IGE0uSntt(>7=8f{eC^W0@cM>(Vuqr`rA?!n*9A3{2gw z5$v$%_eC0u#W{{hG-4lZHFGs6D?kFWuNoQL8=CH?WGg_@{Er-LG9BNyMM6w!B=a+o z+M{PHjFV8Ey~Hqmwsi zUF#Pv#B*q#8cB_Md`OzhI6Wm;Gdu>S0xeaOwHw)g6q@s0S`g&@%#J{JjEW?JZvA6y zo%~LqYVvly<0#8g*iK(kS5#mek0a5U^Go{&9rUVT)gHPH?DPNC-TqBp{qx%X4qt-h z0Y9x!WrjjMtu*x8xRg_(%u&X%^mb1_-e0jpem?I1E#SwG``qWav<;tvP59PKSrN0{ zVQMA(h=MUhNvcXdZYPV2u)KJGbKiz|`?Zf78%gMoi`mk|gJGX+s>b6{3*^nhY^VLR z#>mR1v*U^fZFh10pDoGb)yJPt$z7#dbgJm@_7=P0mcAcJ`4ptK30uJK)CSJlD3c)Z)MWBCQrpXpUEZ#R%oUeM zU&-1E`G!I<{x(CwVO;|Kt?GPQZHvZBE6N2ej+xBve7Vo1z!u~kjIhr|Bx!7fS1MB0 zXFCZW6|gW$`bOa>T(7gLnSb7|flI!Pv|XK5M@bB&)Qi(`yD=>8xo!v#?4Ge5Iz?~dhtZ2@A?Wg=`Q+hcl#fIm@ zy}s|l{su`VPKwK-ge?C)6o@WBX;b`Y?js@8h#T$HS=4mtAL=2aP0; z8&)!NgO>%@noCuNHJu{66~e@=quG~;#lrVup0|SBX4^&wGMUVU?n2Rx28W!QJiR*0 z)#M~tLi2%w&wN2H&n60A*Q8YariPU5?ae3hLvGI6V@wUghkR55m(di#Eoh{Rbe9(} zPK8*Q?$=iKz75jdOg5TJ4|TF|vP)XK{<>z+jgbwdoJZ?e9Hz6q-HEMEf5YMB0js$Q z)BgEQRnkJ!{4&xzK?P$y=hvN?@skd*2oghdJJ!W|`=!~6dom5|M5&N!1B>K%WR6{pdB-{^swV2? zN$Dy~>C-OzqDi)W{J-=d)b(#d=6^rbd;lGUfSt&?yqun1X51vd7w|*KVUp7$&Oi&I z^>}{aHEYAC4~*KawmtOL|LV?bH`YzUQM`teqfyAHFdb+x2zO3K4=6`#M(wX9#?Pnu z^6nl9*lRQS(QTfu>8{y(7EKX)4uAS^az6&_94Z!3{a{?oB>mdebNg_0xIHm86%G`T z;crJPdB5|H%nuK)zg_x~c76I>lc%oGGa}RV)cS#LXo3E&=V5^(kILQgu{b0@vVFhp z`7!#Y&UDe#g@*lZJmI0kvXN+EepcaIL`d`fBh#z)HS@idqrywSZ@DWw!F&?)URaqX7L&_`qS}*J2EvT2hT>`FF4&3|a z5sHFQuN?!jVYQc?SQN`#&SyFy+_jhJ zQhBh|t{rZIlPSeuv(y{(wwpLvvA&WbIfaE>cJ3v{p)5T;&x{Da7o30fy*)V8xSWnF zGxcVadC1?59$Tbb<$rxV${_Z>i^4Y|z*LMedg<6)N(uSg*Kl+b)Aos<U zO#j_F==49IgXxbmgI&M=UpC)g!NAy98Q_oYmJyPY>MpL}zUC^F#_6j*t956AuU7e^RJLG=Q`%!BQ%f~9-QFJOmmndj25AtJ z@F}N1Rh#(sy0G$Z^WArsZ}7jj&%dm5kP6H`+nIdbo+J_cN56E?ot?LpX&e6bbA~oV zc;-I9ZighY&vH1^41}blh!(LV`nFPzI6w7Jo)(-Q-EPKI{~Gy0j}VZMW5%`Dw{l7e z3@%+{l2||ndi8??ygiK@a@MZ0DordVKZQ;HkT;U%pY`G%+=py%K8Z+mwj>3&l&HJs z4ZMyX;>Z1CJLj|Q&92~+PVT}LFQ~dx@ML(a5 zYu8visB&Lj1>~E~#_AGwm_C~A&k~H8Tb?AIZHkWL`w z>M_GWDw@Tb^L9Ka=QL8yd#p9ZrMtiTBPsSqrIRpFBi?#6wo6 zIYv*XZN>+kK|$85>Gcz}{j7ZdM9^$xmn|JkF=;^P(XN7mfa_8`_p8PS7{vF5vI&y; za7>m1e0bPSeh34Mg9v(6A^R&fCDtlc+ z11t%@yW_!hiGb$=FlK;~iVCN+1vn?-cW2|~R==Gb9xexZpx{x7s3j4A)~N^rb{_XD zGxVTe%ggP+*bkt;*gdipU{tHzjA3VdJd<3S$rlGF64&`-{YHwQ|_68 z5v14lpuX6LA>UbDz${o%HZ)3Yd#ko^&rWlS<`K)W>^-_F6{S1dtrJ6nW5-SZF?I4J zk}O{P|7I0Zlf+W|?(0*n&|qg~mP@RA*_tkpO44YsSn2d!T3gd?%J;ax$jr?AD4XeV zwHHGn0d!>kp7Fi9x=L5k(MJl%Rj)I-FW@3%#6}v*w@(_-_2Q}ab(-YwB35-sR5@bS zznSJ-rlXDfENi22wY?*SSX;XtzH-|0O0@Wpp>s`~e-Ns>b?3rb8t5{!$57)bF4!x0T#_a&$mR&4%Wzw2EpO zTRx`|;bUMWJ`D4GD*T4(99rKeAF|Pr^lt=qHYxYqZ1F3~MUnr4$+ohu;r(a#nyu$M3#Q$w^4)WEhF(egR?zsdx_Z_3 z=w03b=#flINm0K(n0dU~2c|Nl1LHNehPTr*Ggpgbv;6(v1bjHeG<$xywq0qx-kTs7 zO59El$+es*oyil6dU<|qYiZd}1ekCgf2Rb1Z*R`d#+9q&^F-G=Js$zuWUJF*dSQZO zK~qyxSXdaa%Mm!K%nb?(Dk>_9ii!e$YiZ%u)7PIUkisP;&59)Ebv~FLn3$07dIgx8 zC%{CI+mn?t<+4Ofd4NLa;o$)gnHB3yWH2$~IM+NB=OX^T<7ggTjvC9h^S&@7BqS=gQ;<-<+&0EiP(sx;X%{n5Nm`obP$Gjv zUnM#>*=hBepsFGqu8oGb3_V2i4g zj>6z0Q^5A+BpWN;l(V?&9JW}t<5cZ)8R}pDY``wx(*lI zwnNnsqQn`pUeC)4$i@!tLk_tI5@L0E{U*C!{ZD?b*6e3ZxJ~xuT`D>3lh4q}FM{A( z;@RozUZ^K9>jD@F0U8wxdpR@|9MyWLW8Gk3-QcWdSvIR|eORSYQBkI`q@h_x{X?@P zH#ijeUG#f!Utoy_-oG+s=(^hDHx(<%3wzW=WDW zV)ax?3=5~y4ZWnhqS2CNtf1UJBgdka=B>j`IBp?*zA3kLAxhXUXP*a;tA^`<=c5ckcc^}mM$BqjJ z2M3^0OBTfH$U{P_i|t-Zox*!x9HFv|%tF);y*2^kwh^8s|TuFe6tAz&1MhzPU~ zpcFh@Ts^;kCj=B#RSDWGH(xLS9K1AOV^e-U1!Gc~676RZ5iMsKD_2e81S>172yWwibM{QVauLm!q? z-s$IaQqsUJYTv6v_}6bQU`M9sFkGmFy#vysO(Xhu=;w*#~_#iXgZ`gD|ga?C0*htROW*bNYCJe42baARg8q-&hv5z_TZDj6PUIx~aDe zv_qkQMxj>=!cxZ~b~wGDtY8XZSZ0iWM84sI=&T!peBQrQd zesM81Ir-;~SmUx3f0>MT^a+y6wAfL7b@lbrQ&Wm#3d6gX4)*r+3=E2jiUkTZKDCY( zva+%%(l`L1+&9C=$N&2EE6Cp7K0G`e1Q=96BBA@o#>PfR8M|r#$pe_3-@n)Jpj;0R z0(by$6R>hhO7XF=28M=liHRoG)+4)@b#-<4fi?B@0OdnchT*@`&ebVlb7O;uh{(XepuD0&TT4qVk)4BMbZ7_$@E+%P=Y}9ySlACS#U8FKEG+!) zSIq3}o)1?g3hMVwVilTAjkUE3vN)HT}n_CRRL$(op7csMSc?);VOiZFYR9RU;Tih@m0Y(Nc=y+1MCi*C+-|b zC2^<|x*9GT+PFz26I2b+5Q0X5KcYLagf&Vs(S z{=_tmj0pwmlf-4et>QtuX4_oqY*(dqD8q{>(vXsr7(`xMh6}C$Z2r}3F znNvBr`!l6~w5Skc5-%^WYN}JR@xs%S2O1h$igck)Tw0o?xw*R2tz=0DO1F`b5kS5< z-}oKo<>dv8??UNT8c9zSrvPLmFfhTCPKg>TVU`&s?@!Ahe7P6oj~(rC?l+&tx;iPq z9!W^l03rhji6bb0BxRh&xVe3v$O*9VfqRwl*3E-qIqW+Acb5 z+i?^543(%MCio@G;Z7W*_4rRi;&i>Pal+MZq#XHM~FUgfHU zzXdr7>O?FuOJ^So)eF5pVm^!Oy2%Q;=pfkkb}qs#`ei!D?RT5rO3<7?h3svSZ9s2g zg(Pv&lD6f*>9)jM8S#R7zQaRpzgW@keqj?kwnCAW)^%e8Swx?_W3P-7Oe+cvNhyrqBB3G7(8v}fJ15a3MR zzFR?}ugd8|gMx*$L3Sa$z`7GkJNYH3xgycjzABu!CdBU4 zS;gZ>mXUY&tFu0Xn1aN`#oG++iWY2z6+phD-KkG_+dOY03mLQ{247MVpVSND2?=lT z|1jGa??Rz6IhetmnCcEbITvUzr#u~srI1@muCb54`Q_@DQO|Voa zrl~$xPNz2=eExlI-Gb{<@ZcGDvAGm0oJb2|-Z*{AGK;Bbx9WbmZ1(xpr?ER$-rK@7iR_F4%sVk^VXS;7w`k`Qq z(~AxK`nxAotU&l2$(#gB^t@=c{MGemjwSw;C(Qj=UY9DtDuEC3n|Q`l5VRiS8Pgfc z9_$rtB$Ou<=mP`_PcBtqAEmgsMlD-RVew3GD03yH1pVnY(YLmT%dO4R-Mb13HrX% zZy?CoL8upuD@0|T4&;Mz_cpoW*ym7@d}LEK?0V*%!%9`Ia+PX=5;3wMsKSS`svwdn zNXHxT{TD*Op927vl$e;`ac>-8U>?uc<)k zN}pYUH|GbfVx}{0wi%jZNXQ26ZAs@9(Qv5 zUG_FMjFLr3z%U?V^%a7izP_~7)JOvMP~(NIt*z!}u0-R@iwn571o=v|te-wjr}2B- zRm)}x0H_*pAp86K+fnN~J0_;4MM|{BpHf;YK79Dl8;*~Oi;IhoztH9FZEJhFv9VFx zQamI{D(H1|aKK|WP8J{{&mmY3;Qh!*)S+5{i3flbK>Gna51a!h0RaI(Ol*u+?;Lu{srH z`vfF8UxrZ{Nrcc4BP%hGk1#Li3S9_OI#g;E-(8*^I4~l;@5(#wazEX_y6PAM9}>KO zl>Db|qW9w*0@d8O&tYc4*O}hF>Kj`bX z&}bE6m@j+3Rgq!Q9e7hu+NtfiwzeTEzT7*U`UY9CuBN+;2TSPoWaI>a{YW1yn(+e#Rd75PRfZ&}mWGG<>VT_lt?H?eW z;hd45Aw!_+K-ZplZ-9=YlCnh=OCR=aj(KPgq~S?tXvY<>{yQ>wFWM$ihgW;LQ9%o& zVpG-$;VHR>zl!?uFsqao2LU!|@4*a1UHG6uO*`ALot|sXxa#qey?FutlnFr*LL8oQ z)?<>nyt7Wfu6R_gKj_2}(fh9#;JUQE)G&vi8K(K|Q~3;%h0s3%0Y5;ChSxJP+B*7s z5WRaBotgOpAn8DTwVV4VFNuGSZI`R-YXF-hdM9`x3a@zH(QmQ8exy*W_-G28J ziKe`%N{w}(2jX)*%T|bW(bL;rBf-YO(E+%tiz*eZk3i%MByjV+JpTL&kX%Y)>To{R zD43*Et&G>Dw=ApWpa-An=1N}NTm2X_RYt~=z-=1sXl`&kAB!FN+T{JBr|`Qg0L!H* zJXxVNJA9tqTqMANgZHehrMq*=`77Ys*a%Ugx;J6y{eJMho|3GN;HqgLI5}|>G4o+| zf5)_zT%AAYSf1`QKfw3LI_gqwb`TT=XVS{6+f&2@lf#oKyf{nQ%6;T>?TulLE}|ag zo7P?8Nh8&xhTr0*z2^UPv5CE#JF?qqlFbICG4AW_C|^obMg1r7rB#!C)lnqA%nN6x zgq+Av_K523&<(eZ^Cr4?VbJ_eu08<_-uI<66P?J~S!da3V5x?L6!y=a@#&uaP~?PM zY9?M98Wwg<9UH>Pk_bdu%7MY?uB)kkpdyZR#8mN zJv}HNmpK!Nwjvxq8O1}%lv3v(?TSXBi#pc)JZ`+o5=5a^mC zRpa!-t}-+!JimQBx3?65;?<+Jd+SPQhU^M;z@$3mYY9Oh>q33xFrR5urY!Sq0*P4% zY4qtNLpOtRAlLSEmdCo+!Nbs7LS$!%plpfuobhEp_K1L9E#0^5i#iY4$ad(igm;L+ zrN_%=y_-!MS)fD+?fTLr`*(=rI_J89XJi?FYM(8D6J=BKZMT zF#nA;AV8=&HYQ7tQ%6roOw5qP8HzTEf`XE-P@+)6#>~vj!NHh@p7QtUjD$}-!)T(a zF|wJ@Vxrdse7z6wVH6|Dz&lxQwa={ZV%IF8aQn*i-Ol0U={&TP@!sS~#0`DT2btLG zcMA##luIOpTS$~&1By=VM%So=6G9ztWpdGZ{V_`g8x128DSg$>6xzL|n53D^V<#FR zf2&QuT7PoQj+I|)7 z!F`64O<8M+2rD7k+GzgxGiWS}`y(9WVq_%=u-)0bs@a@tD8E&8$Q;|-S0-umy=7$T`mp<$hMr=+hwv4+UF^v@?S z*V>LT^|ptJ+Sn(N#jGw9O{6M!w5h&FqkUy)8B@wkNIbV5hABy=AEn4E*?5os6c87| zkW@GHT}dWiWnImb((tD{A?1?M~vY>&GB`A=>5>#$idp^54vx_R93S;=}c7l zFI@U3DNjj{ZcZVsC`eQCm7FU-ub#{lwB%g4`qvj@jYH2;@TrhRlTW(2Q9;EB=MRskZyB8 zr-G{sCwsNlcrxSO3*n8U;lX~H0X)*&xh5vAkOCtio)F&p8x$L9#Gg!(-FMcs*A+Lk z8dKLu?BM}15Ir&kW*8F~P%{X~PSk?Dt>b*@cpb7Z{is$qh@FOIf+dF2K`jAiu1)ny zGtLMpu7)v-6-5VbW><6azoH3%F_XUbIY8PJg1(@?+%QvY}IM z(XY|V75*u{h559u6=~{5XYahn`qK(mWUqE2o!FiNUj5!wCZ20b*iR zh!u<(U}{*bUx}V3&4epp*IRd$!era2D)3rE*s$B$@jZDLJ@YQCqIDME)uV9v?u6l9 zdW%$z=_RMR)4tT^0&8NYVUiQ~`W1T3^6=!(x(x6~en6|!zbzPNWt>ANcoj(;f>k`} zR9ugrGQ(>*UHBeD6#HC8FvFS;j2nm6w*^tnQ*Nl&*GHt2oA6h_i#Dvo3#9K9YoVH$ zb>m~{7&n-|*0#r^n7?nDJiojuv=L~SNJHdiwjKi5i)KJxCc5nsUmAhL=<$=3FU2?RnYJYL|bvVH(+Q!nEB zGDk!_XnU(AU)7=O?S6G?5AotI^Lt)sr4QG4Ls&??j6Z7Cc$i8PFKYZ&YpvqzbPIR8 zS(!3@-uNOi9gzbvV}!+6zC|1mcn6u$AYcs{^SURizl>y&E$@pR!ht?*5rIFc$0 z2(SMG8l$$0q}tZ`jjC;9F&6d^o|9J1Yu=xyc?Wo}5aT0w#>eb{_t6goaIh}@Ti9u> z2Ymrri)m_lvx}CK{RQ9v-qm)zk6$bNL}q&p=jFou32&jl?d*IXSqeYl-v`tEToQZ` z^vc9upofQj|0Mz=n^4q6PcR$C1K~-hhWf1?s1+(oa<2IF;v;R-h{U?l{wAsrqg~t2v_4^~a zP;et=_CGb1U{RoK2ONu$KmNMuDmY}~DdDV2FK-1^WGE1I*w(v7=hGR(z4(-{)2!7F zo3Z=g-V4_9z#KyFjg!Q~z=v9;r50ikXgORnMi{Xm4t5|RMfOwfh3;K5Uu2l^%`>_C z1S|@u1?%@9?qlRgBcdMQ-2N{V$XdQq+?Z#7b5$FE_4g|i}eF+bNB}p27 zXbnD1pY(it{7*Aa4<|wq#?G`M*k&yWz=33amL5gd;B<^D zi{u|jvpgSU!jT3QJMovER3kly4%N=s5reu-;B1K8-ZRMSOhR|T)~I^pu{E5I*6m($ z(;H5=m8CInM-l5FaF5($x;Lqk1oI+cpgj z>Nwf1*F0TlpCw++o1^>hx1BbRxgt<@$a#4uqGfneo9234h_XKzdz9LbWADr%+GKs} zZ?o;89c_jdE`w5c@0lJPmu8@|&qSKnlSl*G#TC1jv-R$Go_E0-!KW`zvukLz_+)!3 z7e6Aht_Znzer0yyO>Fd+OS~-_1d<~7#kN2 zYDdU1VMNEl^KcUR5XynZ_om<10c63;nYwm2Lr**o-$|>@_J#+B-mZ;iP8D$8KrA_=Twi#Y5LjbJ zIWb!!HS8vZXBwgM$@P)T6;Q9LhfWU zxHI0DW=SK6;Wb%bIfOfq#1on54X`SZW!*wR+PK7=+WuE_B9hbNC2RrBsK_@#SuVUE zV*$R{E`#(1rr&(M%in>*n@<^-bY@E&D@_@jRCQbY1n%I{GxJ(li7;ig+2y@wsNM4U zKGvmhYwyG`bjE*WZU6VPD5yfynQBro&~#)!7!>5HQ4NfRdgBn|5NE~r)j554`jqR~ zmh?-A1moR%cI(XrO;{h;F25!^*{eI@Y7l4$ukp|-Wb15wJ$4INSmy_p%A;>4@Evfqaa3F(!Xi(`IF~j{oDay`3~mR#D&P z{<&k1Y!;vKFfZ4_^gYp(JjGY7E<9IqYT=h(B{m_PV?btkvp ze%Bx57lVuEP%#)3vwmJLqmc3;cEneseUsG<09FF0{6j}4?<%0)(HAgGR$HH7hgL*c_P4i^p8Mo04Jdafz51GKK>)MmN2ChI#Oq)Ud-4|;&2U5{=& zEKB+Sie~{3F^D1=8n7}HBm#j)fRbBknRx1D^3eqWTW6{(d@n&hg1&wbmUV}9atLxi z44*F!5Eff*R#pWP2yfoa7@z zM6wXIkM2EUtZjq&{b+2v`oq1P-Q^`Zfj$BVP$a+5wKQ@LY{~8W^gZf423~8@etgjQ zcD-|a4Gxh|wm@(SZ)Zll^&s6~$S&!c;kAqVz6-wOjBb)s)UbrdL)!NZ*u^|eE{^rzywg9-I3LMO zHT0q16R&h=Zb;Tn7HiK{dYu1$WKn&GN8(xwfmGLA|r<|fs!3_*u?X0)JR zob+Lr7KK#~I!*@1!i5hMm} z05l|njgF)Jw~~Xmnte3qMuTDd=}5@n$!uAj_BAgbTiTKnSM-Br z-kmvtC}Gqg=W8OoOsUtG6`ZjKnO_3M-jE17T<7p`&vyn;q9yt@@eyd9x8%bmdR*tQ z*JC&y9UrA^&`n6pp*S@busBMs1U{t%dO5G-qsSsNCwM>hLmVJ z?|h#8oqUOh3qexz%Wn$AL0~q{e~8kD7z813R_x&P-Vl6WB0>)3~rl41w$=D z@d!cLVa|iz%)04<(0so@psFXGd8fN7M0{+7#C+hfprd^GA*-Ttx+5Lq#Cr(&P_Jb+ zx+BlLr1PkbZ~TP_stM<35e~|L0(H3)J=-s!ErJ`g2j2|U>H{qfKMG}=w4?wOErtOd z^YU@nH>A_U1Fb`W^oSv_CLkRUT0iVl8AU$_@V;Vnwn$xuS=GbLQs01S zRW+R^Eh-HO_Js$_wf%YV(81$wZIdjGZPq6c3hfc2g2yET8gm&{tmFL!T0|xjZGsp0 zUbSomBbtIt=GWcY4D7!hDET&5np`PmwFbW>JAKfEAlsmIM92Hk4=q~1k;==Z-M-Rr z8+VazW9{tFXe{ET*emA8humT`Zv-#>v>y{7);y2Px;fsnA?qZK&IaLvG`pl3wjrD< zRtGe$j@{myQ~C7jH9t=4?MCA{8ctM-^7W~k-Cwq;!(3gNd{i2!nzBe0Z0oS>vdj%Ea{ z@|JoB{u|inH}!9RSUgawk>|Bkh|c{%&?u?e(P_?O!5eu76YtI30n#PKFG*kUAeUQPT+e9zEqiJHEqhg{%DCiUSQ{vdk=NfX(yWH4*odGRw(o99kX@c#JDR^ zWFtR0+E=(KJ(LV$1RA z+k`@)eTo>NdCtPf)`f;~GZU(>_YPvzehPqD+zXjoL@}71rqtx&Q-4lB6emmERd>9PN$FlkbJzL(0_0G zOJsE26qxYF>z%>ts`dEnLQ*TOAZfxfCi8LkArJlt*+19Ds6RF~Zhn=q@+K7W#%tR& z{-cR8jLOX{ioF_0?)%X(eL%kqZp~XTd44|8cb2h*-q0v491t=wJRu#~bpnyrRQ9`& z%TRqw>p?&947Kt}&IV@V0+-(hEs5+XND1Seb7Y#!4~Dm?K?f1o4-YVOE}Pzta&{wH z)^GhokM`=Has$CQBK461AFLupVTkEQH5c0DJ<~W#Z7)TytyhaX{|AOe2t?qcRJ3l) z#~YO3?sql?`0QL0(@1=@Zn1e)uSb>yU!`8f3;3w^YX{~oSV01n2)HqIzMk+ zx+%1oPQHB!Ttp4}$04;Y`;s$y%4 zWv^!55N2e^E)mz&kcVs{w0zzD1LJ=CF4#6mR8oo{NuLrOja7fHQ!FaBXTd-*l_W3PKZI+Pe!ShAM!OOj;!!BW@*q20Q%(Cag zaYVBNGOY0Rhk)Yjv1ZrH=1Cw^y4}&ZsXsD2fD^l$J2RmNHXmaoLF-7qyG>Y(ll!ce zLXW%XB4TUn_h4BaeQD|5OGEop9R!-{BUbBFnU)>ZgO)|SIhy>vG~x5~S(sI#O5GdB z@R*X4_aMeJ*T?TDmb`XyMX`jWA=6?vBqMryW z4$lc``XcylYuB(`cZLPgJlza#&t%t75Bm&Dt?*d9xKl~DlxZgEKQg@+#IT8>47tQOh-K92+qA>gJ%UTHE_o&1UO9;Xg%}xBSap$5lE@Cf3OWz(z+U zVNn&F@n4W>r2c8=g8s1JxG^31k(ELU;oq1)B&QY#ohRx3MTMz^Uwx^omOtorQFfOa zZItuTfS5zeLZ_0Ha$b%*G+8Yeity&4P+rmUIpxS?xp-meynT1*c7?wf zC&A~qFKlyco2!_~gJtg{se)tN12l)G!j{wex^1;SyJTffl3y@qBK^w1g z*k|UFg~ICX{n$$bHQ&YW7A*C>CkAB&>V)Kgoc4VyZff!`X8Vclj5+YhmCjk3{8>uz z8y3SYquu4QjDZ;O6~E_*O2%cALJNsvp|j%e zmxs^U@i3rUnA}hSk~(L!!%JJF&2NO`2fHpi(;lk&+cEUdYP{hquLhb1LWV#r8YlA0UM8D_XW=@6CRo*%Xs7__; zZRL8St&lc%EFC=)1x-o7s#NO4XO3_x&H96tWtCV{8VXF^Geq#lhtM!kz*!S)7~v%`-Lf!aX(GoXA?{ky;H|Y|gm3{u}1-KzM4eqccf<_Mu!(^+Nf#x4=dAS7^>2wEPErral+YH&;M|* zWg9t_$SR8Qna!^SuyK9wxHpnB=oh_&H0PahD+P8HftX+;%P0wrE~W81@vt zyRR}ZT-SnR?;~(V;)MPO>t6v+{s6SuFg4lQ*i<%le|+VpqN4g6a!y{2BdZw3#k~K- zR$u&hqvfC~wL&0uy|V0|H2TI_!^LC_VUu*ll-z`!p#U0tyxf9&-^lHaK49Cdhpv%u-|NV9p$|-1I+(c0xJs(P8cO^Lg`5x9$xB> zl#U{mnW3Sc*$0MURa)JhDt7B*sebP_$Qd}NkEBF5uAB3>(8`pyJWAoe%Oh|b4@rq` z&t-EY-C}CJ#)J=q`*E?_s@iM3w2l&O>TD5d6L#i18&3E75(QXS7u=d&9D*<}7TbH) z$0Gj zqlFP$=6(&>(8qafPkLgHz=()D{Ib6aAGvDTc(VE)s5O8~psCS#!L}acUw~E2xvmL( z$7~9r0B52=veUPuAtNu1M1<71z49LUGNQG43H!HyKnb4ISmVuC`CAMnCx(hd;pr4X zZ!~2=j+=CjNg3=v;3wZ?CmQ*#?_4f!_`+twYf%J zCTMXe?pCb06t}ie+}*9XyF)0Yw79z#cXtT1#odF4;_hyl+!i|d&gj9uQ_Q{mq?+{#GE%!G3+#mzWvhqCkPy91g9o0r&yZ`=~X zp8SrQDGVJ^6=eVX{P#`#9{~7@Hd;A8Z$bnbxH7VSPr;9XZ=bjx3|k4f?sz5+ub~#* zxTN0H;aC=Z&ie3{quavlumx-+R))+94)nPVL=ME0k!|>`X4xP1IfcDmZV36fLs-9s zOF1xO;7eFeSQuIj^S$k72XPF*K5~?`fq^@HHs+%JFvx1z%eKhRkL?2szSsV2L^SrR zR7hK+{u9$ca3De{lc~RRZ|Qtk?90-XJwkW$^Z(9}tv-@5@du@j!3qKDe&SczC~R3f zX?S-s&q`ru0R@4g3azNfMXQy#EU|dk`nX0>V;l)QTajnE!^>5)G^+_6i_l`elYY6#W z>dvm*fvDEXH49Mt!>LkMft{NGo;)nt}`SofghoSLCdyY1Rd zE$kjvaLkqYBkM+?nSIYz^`pDn^u+1ni@&+!ZOBy0&ouv`u5Vp+KAriXv1pvOTH8Z# z&x-u=_Z(zYJCX4R`-9Lg8d(Gv%w--wz>2gmTRs3;`Yp&-(x3+^4p`Q6+S5KgT@w;p zaa50Vr?za??GeqFim-?3fGUiDK!YWSP&wrlZP!GVh6ZPt(crv75J&o1?%;)#h9wK^ zntm7E9bD0GI592UthG7yG;R%F2D};t9s%hku|ES|dO8SWT}VjKbrAs5A%Q&)!m0f1 zN_D}98U>0yKij)@fZ-$T#y}MK=tj9lxYI7y(4Nx8YS+Pc5ug(a-V^P<>!?bMpR`*Q5+R;5rN7ekAFuc??t zK%nIXm9|Fp)3?FN$&DiK-F@v#{T}AN3-eMEg++P=?3NqNtBp)F>A zE1-%T?#N`rg2swxT>GN(m&p|*(j>|wyK?x%&-QjaA5XSw0X=&yna$_HtV*#lCN6wh z7N#pHm%(BU0?Y~*`1hSnuZ#9;bWK*PxD#sUExYuJcXH=(a~KDtmDIq z5okB(Lie}Nd=DLAaXX5ZM=N#FRl=6KT4{L1iEj`AT&i*jId3pl^#wgVyy1A1QAXm2 z)83Iapx<{lHYolKr)H#A#X935=wbx1`tXXKS8xCqY%YWNKC=a_JxbGAjBi&Pe6?jCrQS2Ac^*h1AQEZ zra$WVr{!)k^PskWs#(GGztxNr&SB!{k<8=d!ed72R8SyB{21~GVx6QWL23l<%vLqB z*hTykc}i72?7}#&LPWN`peTx{=43QnSTgez4LcPPJL1!Z!pNqeSddx|v#EFw0!Gt< zFZ>1_CH(%_GHo0z1>!$kmJlFr5Xi={Jxk2WXC)y}Mu7`n?%hrl-g{U4<8cAS=6#GD z208y*zUJ@A#NDb^_%)YF3IQtw@}_bm`3Xdu(1YjX&-r-8ivW>O$j)Aru;m2~>q zNF_(Mu;&HI^D$!lM13{<^TEpPWpL)YCVcG^V;g>=oe!NLpY4Jya8h|bI9&VKE3an8 zaPw%azBrw%=d3=eKr=-iDlEG^8;xkE_lltHXPd*Z#4%aC?VE$##tpkF2Dh81NzaP1 z4FfE)(Lbh~B{- z44ryUeV5m#S?6e-^sh7M{@&yQ1vppQNRv2bG&8ij(@~RGzg^Eybh=NO{3gD*`#@!4 zL*mM6XtgyBryekDa#hBLbHPOb+N>uE*p)|FZmrRoU~ADdWe;_1Z$$}HinaMdU;hsf zB@>YcIepef5Z@+(0MzrQ+3844`MLC$^T7#Zwj9T(06}aAqz|CL9^M|py~1O8+q@Si zh6WEc1OfzfNp+`81`x4J?KgV9@s~M@%aP>yqAIe?9dB{_m^O72L&QQ!y_;Sn5b`|2 zYg0i}6IY9|A|*AABk03yX~GWx$YOhb6k!@P@&ARA9r9;HiBzgZR9Pf_L~Sh9K=>N3 zx7Dy(l4jD2V5|EB4X#TYR&J3xs^Ri`YD&>yUgoonx|&bP&w3+GCy#3<8W5+ocGvy8 zW+AHnZMUelDx#Ml$K|*L6EC;rZtsiXZ$a%*K^;TnK^-oe^JBq*w}E`4?*X90#UM zxo^@Q$4qpbkWlVU41{`$4HI1{nVjw)#^zVoB!d1vo#V8-0JY5~Jr~z_Ax_AvXV0## zw+VTk%Zf{9%YEb_kO^udkz%c|LU-eqIk5j)cIQJJwEKNaP3{tRMX1x5*=2jx&1!?k zQmV`Z>KfQH2$lkPx>0@#zCZpFQ~XEZAzPxoa1$e&w|vVQxeG$|{CpVpOj%yM zP`|M(;rz6})DTd_Bk=V5+K1KO09%l~^C8pt@mi`U9cZ~+Zi`Z?jTgE-iQ4sOXy-0-$Yy-X9%y{%A}(5@@ZhumTRkr3Jn7CU<= z+B8u)%`@Td>w1{kiKS;P;eefA%^9>HJ_qSf|M?2^!KN2~?1MpjGDCo1)Ko8l{qy@U ze1&BWf`|B{d3f*a9vk%njFM>Hf zTzf@mGE8*(pL;bobunn%7AL|9AGfFewmd1x4A10un$Z4YHHLG1c%ZTUbH6z|Ju^Lx z}HFw6xwX9*~k^8p5QYtnwrmev=cDpHpX|6SZ=A^`sH?(B2&U4stptIKFECkz{OR&g@_LRh=D=F z&6rVjFRhip@s^dH;~RQ_Q)mmx|D>p_m>L3HZ5SCB;7h2(ffNG(!&iOU%Xsh%ozq-G zh(qiE8B3A_BwzO+ezWT!BBMY+2YH7~gm{BABo)@p`y90a@ja>UDDquzYIf30sv9{R z=LX+(U$!lgMWYZQm;l~`gj}jT0$FJ=(g3m`cS%f_F2kPjy#fWNnq zEnSNA^pqT8p=m;o0%$pod;b7=I;~w@gPl-zZ93!Z&v?&|1_k4 z1>B5jxenfHgk$ACbmKDg*#2F8clt;g>W7o|EMmPAF%)AcIpDt7a=J%tBG`Hw=RLkq zy}-3vW{zH0JCOudBWLuxrRu!m>7#w>=|LiZgP%XW{ENv2aRQ zD>GhBwBx1VW4>Ncg$!11({A0jp^gf5%T=ML`%2r~JG0)Df=7trd2MOF2FahBlV+7a zKavt zkN#bylRk48d`A1#emBo+zBAX&!KzM)8NS(YJbu_O$uT@+>FL0Rr3j zbf?E2HBqsSzf}|3?P!xrk=%-^DKb-uTB0F{^LFc=>dFbPl<#t3 zTyg)}f`;hSkss<&^8nTGpx~GM3yXB_jMCbcp%S4m*-Ny~!V&DsSEnZXhC9 zR2?}At+&OJ{5BLk%2e$o>dO^+9&9q*iNp*%38%P&Dk!@RmQu0OLYTRsX3 z<|rE0TWsjb`}$>l2JFhC9Z%&$zw~=v-^jo+3U;2Ej?T=37@H$^I@Qfkr4LIKmsQpx^nIzCHo1nX7HS);HQ- zx2ua^AmHGtoa-^)uJqvS$2xz879XzB$H11Jd3)%#CLwbUwhm(o!OrHidfnHe0*!<~ zMbcJ|hk15W=>Fbqn&j0@DR(9g$>-;@-u>?Z$#i488;D(0Vg^aaVZ#FC`WI(MWoH|H zqmE?Un)_%hR;w~tcJ{-A3K8ckl8IT8I==ESO3fdrH!y zydB;qT};KBIQ8!m$$m~&^c=IhxIZY1vIu_ozGvr2Su*k94-ARzY`OKjhJcq@GL(KQ z!NnQ_ey3rg^n9)bfUN3t0a`B`#08_>)Z>eD?hv0BJhphS@N@KTxir&<&*iBD{8fgH zUUT0JhHht*)&@(Djf&-1NO@B5I3Hgw*TXG?>0a{!Yqx%|q=Txm#iUQ(()}yDbvH;l zYS4OmU;I0_N#Xu_HR~buc1+piv+#g8F*Xald*6cZr&*V_ zn85HDYSl(wVW;{4o`jo;wN{YPDTxihKw4TOsdT=a>7BDLjitS`#9LDI>O@`Sd4nf?if_gxlCHptElXsE|xUDEQCo`YTC9jga3K zTPo+W=`^np{<+ZJW>^hJP0R#*N;8e~xlp=BDrq$W={j9Dj?|IQL3?fYnvc9DcaD}|NCJIp-oQi0~vrB zoL**pSo6{*5Qzd{M4h@-Y53?DI%!JI`VTn5M|>FO)mrfWE>XjG!tA!^_o+%hQe z*mXhxC3`pAJ34AJS)AFG1dyx~6572Qb`UvTa#$;uAq{>yl>W&{?AUhac~kYA86*T# z$0bPPA1g}cZn18#-gOp@Q#4we+YzonJY1|fm$&L3<|mX)&PjTTi9}Ewd9n9a!8>|7 z(2YVc)hRjrd^>;TYB0j@Fz?dxQy~fcyTs8VsiT!O{J2;cW7~&iQpkNU`{BN`ZPGbAQ z#K(i@nHS)cbl-u3eK@HPFLSr|EgHVExJ0skK$%gQz4KU%+!%7Aa_2tmXr%39Kjs?s zMz+3g`9@l%8&CbExJ${u8nF4vnue?c&Fs-6_bmA#Lw%^&d#mkq?(*m@xRohG!F@SuVd0|)Rb5Hh z7qG?ckpr1%@MT>TP$G-mu=Zv7d3UI3LH}>`jUi8~?_D~3iG`G#rFm<)mr6ODJ=^j8 z=NCCzTg~ecNO_n^L@W-n@7Y=|XU(hS)(%T<4=m2}7FRt$urxPM|=N zjjAj{V>20QCD09Ye`>trZ;CzH2+n*krYmpM)awkhLU8NB(M1DH?)d9yBkc6JnOdKV zocFw0r(P$j#h(Q7+iEALMAba-Ly9#6O>Em@o&B}(r*1Y(3_%7xhU+Rc<$?xRis=j= zf%c{Vmzwfvlq%X7XDqt626m`l5pEAVvbCD^o{goSNy<30F$59BK=2yEw@$@HLX-Oa z)t9@k-06x&tKU2yl4$I{j871jMJXwf$f^O8z`$IfWHS5UyrX@-0&OY^iB*bKIa^5S zXd$*obq!>bVNx@vmCV*2?<$}gwE(m z=UysjwAVyd&^x{1gW9K`)^gW<-jtKRdW*=vx?|Ga-#!}=7ue(}ez`DcVP$r&N1l=) zQhE|QFxoG2hW=HRZkRJC$s|#eMeO0?=Z)Xqk-g|m{@TN0=hru{y%c>Qp?1B~8x_O! zmeWF5@xk)&s=rPr_KC+|!{=t%Nb)?J_^Ek*&3>`|vIfK5Ga1cQPJ1k)HjWNaS4p2Q z2tPLusmDF7)7?I|M1t)}#4yz3kDagm>|R!7Wn-j4xMS(nj~gv#eoJ8{rlY6kd*Wbc zG2aoeQiDm%D*p2oseRqDDSbHc?H8%|Y01mrbPiEfJG(f|#!bj{{40_-N+8ZCcf zcO3Sozsl|9Rz@--?wf%s&@A_6cUFEm5aajqy&H;B>Q2@ug=mjN%?x7MCF{^pvDQE)ECMlgC8N>t5dfqUGhqT)vPED$P8`BrqR`|Mz z{JM^SLX6)1BgaP$pJZAfKt4V_ftJ6~=3p`+J13db`Y=%M_;x?7-g4o^<)RSz9}T;^tw~dI{bdrax1B@li;dom)5*JU zn7ZP5JjU3_nr!>f{P8m7u!qQrwhy-kA#tSoSEz%ePb#EXIqu8zD@m6nushbT)mxa= zQ`wdlumZx13@SW=H^?Y!SVd54b%j#`0s%CIqX@rwT+28eTgf)==GlI}<^`a*bby!3Q`VE!(bsMN8m*LPOrw&;4uSTEDkFF9ANds( z6$O9HK3wmtWhWVH$h_NC>)a_Qz8$%|4lLT@qi(oPRjX;P5+o~{otm4Q`wO;E#~&_y z?8H(KW3%_>Q6}|&{G^@RHY1!?3n&IhCuZBGqrY2lh~Q$QSkgb_$sbmTkj1UnWs{^L zI$N%u!pO*ok}5Ob6~bL5 zeB)BT;m)ku*}vK!JkPe{I~$%;n@A&$FdU_6HDEi|#|!Q{H9r^kn_85f4I8xCv_O`k&bKhyd@X@>5-Paucb z-KHIw*0o&E6CuJPZzN1N z?KVc^55XJsw#JO)pjiS{f>|)p7n%cLI#Bl#V2lL)B@q-L%!UKjF1S z;m|8%00#l=1ZfLke7#EkuVQ?s0A^?X_nNlE0+kKW*7NdQ zC`^uErbx%Xd(iKQx0$?nLa zy=dx&>f1&g)v_+9P{(K@_=1C94(j$>6cBl4kcM!cpD)>lwf+Wg#hM*@<(@JPj|BL= z+~C7S8}0V9Ixz6meaBN2V#|imYu3CtzYvZp$g;w;0vcB!TVItqdyO zoQo|-14a;SU6*7b*A{Qi+kj+)z6PZ^0Ru|Q>HFY~@Z*lUp%kV7*PZY=0e2g+t$d0w8ZUDcteuveO*X|R9pjmLc_h^D2)o$MNGN+lO-S03)dxs*K zP2ju?vZMv;q#M#hI_Hr0(in-isATs-#mY1o~?bjOc<=}$OI3g_(o{6%MD^Ea+_ zVXtLrx)zUnj%sv7XK*-)*0 zB86St;4RoFiX9W&VSTbHMyS9dn-c(SK6=C6KP9s3fApUP0X02Ydx}ye^ZPyWurA-l z+qMTk#qaL*3e&UrsGDlvkP(nNC>ry7D3KokUiLYEwZLRG`1lA{i_CkI!RcDE0YA>)gFeuEA#rY~d zjEU4Urz-X}Q@C0!;+}86n|Ve;pES*kBH^>rAJ87V7F2(~@eww%{6sgS0NWX*7QT-i zQwr@CJNtS09WPX$k0+0TWhzyN;vCBwfs2aAXL@Cv-Qn7Uz+6q=ZR`GUAeNE!z)CjC zFIKNRAj<&5`2DE+Y2YJ{k&+f+48erkq`(YHM~jowjBIWNZm(!}!|D3piXVMr>S{ZV z#s1^eD@{Lya$j2)_93r|T+9ruS8jyt=-t4q)_Uqk3fqu#X?<-^k27`fDY% zJ+~G$pxoNpI`*^5Dq?ncpPU-qnn#XD4a0v2vnj5NtAy0l2 zCIV2!y&H#mEY+;AZ@(2AIjQK!JhyQe#ARA*3v765v%f9^2vW93>CgUY>uLjPWDyd% z`9mzB)*i`}>I!e|A79)%+FRkRqO1ndRap`ykKeKTI1`% z)5Re$-r7W@1d#b)N+CWDbBdx(qGBVU0UF%`Lg$*>_=^r6%fTB}(*rGH70#EjJx-FE zPi}ORRB?z9AOHubV@l^j0x(7NK>FN6h$Vl>x`qSMSrP&_{V1KB0I{rN6hijs*ZeyV zmMep5eg;LAQ~ct$bdnK5)Yar?ZPt$F{8zGmu0X!M(eJ8g&Y74w!O1b*wc<^DA#=1Y zgHOPECO7J==HTpnW3aK*2Z% z5zzZ06r`GNX`{9`L|u{-AVdFos-&BJDBy= zjz_I9J8Q14Uhv&f`(;H*7V3LkviR$)`Tn`;Y1TOTHrj58sloHFot3886GrV8D_iP` zV-3G$gUrK27eYO5TF8rU>5O%)zmC5}HJt$CS>tXsFA#qG73(;CS-wOfXMEAUOIvlg z6na#_#L5uffMNnnua^97`h@{eI;g4RQ2f0uUo(XQi$7raj~{CDSVkTpR`GPS>JR05 z> zx$AOR+PJZ6hQ{OZEqYS zs6xmq7(tdrxDsLcs#i8d$1Tl$8CJ&PP}lhMKDzC&wT!dNn;Oh!zxA^EeL_^G9}6bH zR&#KqOD23%l|+n#+CFwH>>BRbYkOKT6!=N`=+x_q6B?z@kpH$-sa?g%{*3RGeU2!8|yU3EsND6=P!%l{)D$s82BUSc!M7%3GnK$mjsB4DsWE?J;x& zD(GS+!#l?vez&{d(9&a#=hq`*U6GEw-d+ZJvsF+8b&RWwD^ITV3P5q;pY@vgj!YS` z&aPV}aRq3lC+jX%v}Y_MMx>4f(H5Gz+gEiIhUBf{!bps_RKKl{Pn;~yLQz9vq|esZ^w5U zW2;+6K2k&JwZS8b1AMvo1IopdpXQq>A6lDR-pYY?V#M#3A(R)0oF=VQoZMaqB1g;D z^D9Lrr$6RsmNlf)Ta3J{X1Hc5dq#9>`Y>8|s{EozrmnBL7MKCQKWz$}EUd>&W?HvM z4LNjRQ9t%+laGHXV=^JuQffE~AES0Z?NzbIIS^EQ2>P`$#+!>JN>YSmiW}?&87@Il zbPDGTA}!eM3Wg?YGB8W4x0dgr3=aOm5aV{wb622F2G!37rR=e@mjg=j_Fn7 z4=tEu8v>>0E=L|dUWw9z#dkHV_6lrS<=J&I6*or|!1~3o)!Fg`lr5oE@2!6qQ zS6Jh*87y0ErRHNHJY-eK00%`?`}iW zIW)-exJ(mTAqU7TzM~Z2yesGA9e-SFdzD`OQRLRv9Xti?ur<10CM1b|aJK)^3hU%A z-gaX*LOIx<=945<&g?u>I2;uLr^cdfGww8xS1mO|u5Z|n@vjYDy`PQJeu|b(5=L4; z%!jjYG|w~9O_>4fsHW_3ufBu&L5MWItOFj*T*!c0eUaC-voeKX4?-@YJv32-=@4TO z`kg_?rWo?ms{qt1fE&=d2mK5Yol{&p&A^T?50UL{P+8dKV`=$+v#mLWPOXQ#X6tLm z7lb?P?py##en@Q?dfP|13_&OQU=jVIdz`n=75Aq@dE&O=Njifz& z!`c!Ed>lFU{jZrvv4o7WE{rpM=4LCwV+Xfy4+cty@i5`qw1zz>HfWy#Iop|4j++Yq zoQb5mHkz{Tq-z!~6HOA+?cx4^-*LWv%dw3tF4DQz{8yYY#Zm#Es_&C}G%6w}sA68& zyFyGZO&Ndp=E9zp5q7WsX46Bz`FuB*QFQI;LeE#Kik_Q1Gw-GZMFI3wiaygZ!_y1_%t9jWSaW& zD=RmjFkOi6&t!kUW)9JtIGunjB!*OZAh?OeR1zYlUeUb{_Z}_&SCRe(-T|IeCM1xoxZNJ{lJ3t$eWU?V35ocGO=J z`*DfH+tu54i$(@TCvgrMmQ3s#wBr8$!}-jq@;Blveg4mys51>g2*|DHzheKOMjdWW z+rYXs834LC9(I*cBFEXNTw7M-o4ZX(Q1Rx#MFm=bXea3#Xyc54SlP2C?O@>x?$6)~kFAV*hHm~HwURKswtLo+29Emxo z%#GH&KA$)L{CrpeTXr_rzT$=($a)DVw$^(LzuV$ zM-iJKZ$2!E5qy|n52dx+6YT)o>gI5D>}?+(3XM$Q9Ez_jJxY@S_M_Lo*9^2=+qf=q zk^+>@jUM^U7*^0f*v+gkxA@zIOAE|G}CL~5RD5i4}k+h z-U6&yst3*4f8Tt}f0SR|c>cG-+#ve_tALb}z(OD-q|GuV3&Q~^3#2TG?NX+wY?oYg z{rWWgkkw}UZk~ruCyz1?T~oNpkTXyE{FJCi6429di&vA8G1s!#P~H{sz^!gVhS7-N zBPqU)Q;GOAEMTzQ|F3z?_lMsOVy6os=}J5&4hC-d=G8?1Bx|PxLF6xUM82>m!&8$F z2$zC8R)&<6D2N0j-z7FKKZQXOgxcSACcPPv9l=ur9Sw=sOtQdrg1#1R0pnIgJUoZ@ zbB-19G#Un^ZKdSS+u8>eR7SPGghUnIMadHyr#cC00iP^6A0`ps$k5$Or2B*?cXa@1geLzz=?0(+60wtMlug+ti;jgQSbqL2 zAi(1|C90C1K(Dp0&)3BL1)U&t3@}Lf@-7MPBw?6DTIrLWT;O1Bnm=V&LeTrufd%ya&+_eAEr+E!u!m27<=XZnmc@Y4^Vac^=H|&fY{r`v-6m6 zT)ob*y)BdVL*N&T1}gt1`w~vfGi>(BZKC1~HQM;|1Z8%^O4p*5zh)~S^CkBvptUy;Hs?p~&A{_+}e zTZKeod_Sz*7BL@eJup#p`FM2bVuAXMuSaYOyvA?*@TU4qvNldb>o0qp{7Atqozh1? zvH!}5Vx~sfLh>mC5Gx!K0&lC8w5Cc_jK(>^)FD5^7D)klg$^B!4FziyZI7L$)L7=6 zt5Ml4t&ZLz!zEmVnLlStP>2|&4z~IWcQmR-M;EtiZXi;R)!0#(P?6hGZj+NFc3tm6hv6_l^vRdWv2C9bhuK8ra|H&pbew~SZVkEDdB;D$&!cJ z4TM_E3SaPXSuGkofFPErAW|hEk$3jJia`aHTH%^gMvJ~h8XSl9K8r(gWl3E{+!(g# zpk&f>W=Zi99T6EXw?c{w#^Vk_?$@zM3~zyXTHX98$aOG31>JUOp&~gTce=(>i%Y@- z<3Uo;S334}sLi>7Z^=dgLsVvqoB4nQ>xm&2qZHG3eLA#B{eES~eN`+wS%eHgEJ%O# zR!awOQLE`8FwZ_Tg1CpwG}=l*KQ(xfrXTXqm7=|JjPq0|4igkQ4G&J zXGgx?^D0Xzm0A9#;W>MX8l&h4^fG8gDK-6d5v90p=YNs6@TYSU5?)#^hX;z6dfu)yH|y#+{A9v%Dw4xv2*;^%o5 zf`eiA(3mhCm`AtI<+Ma9b-z|1p1|2(jQ(MM_H;ksZ6_iaHr2)XSf@fYP^2ItY_2i~ z$@!r|IuyeA4Eu!)3zD?X*M3p*7X~?p5HOs(Grj8BRVgOl4Df*y7zKhlR?{q4H`7>=r1ku4;otP z$+EMyd$^n4dItBRGKPLMY zzG$Vfvw4$ryNp*FJ03?VWGFJZEQm_Vz2ETP$(_VDQ4%|I(NsSg89aN{&1)=98#eTd zUwelw&EFyDMD3-a_d$z^Zf1Q^cUiT*gIg{+s_cuSy%j!?C@ zI;oCph8OxpoCVmG<2jt#U`B#gh@Vbq?e?!2=X=jwg-^J1(y#L*K79=F>VaObV!r15 zk=m=U!Em17BlllO{u72s=Wsm-4C)oUW-_Y}Lxl5HW7ltu7)@|53U4%yWwk7;-%Ro_ z1B#b0D&$#oMaK;$hKgyX1cl{Zu8Z8Cv-~hKc^sw|xh-O<9?v1KezZrfD7KfnMG!r# zw$k34nd7`8D#YV^I)>Npki52C-(32ku|3%iEbKnR8`8~ zap@pcgJW4Qh16)-HVq8``t+kGBbbGtK4bKAaiug1faYbvWS1%lN>})^7pL=wE@yg| z%{Q|zkTLuYjj#`4Nvow3D!CoWMjOU1Hp-99hz@Bl(@uevx_GIG<_fp(HT5UYa}m`? za+O~Xq{1+hKN#;H^S$?OjzxDA=^3iiz%Qv5Tf+8m_gv|=m3fy{WBfmH@fyyEDumlh{U4qUpAf3@EYbf%vzdR868{Gc1dw?c=VGh`Nqa+SP}wzDMT5=g-EeKyY)n zWk<~kB@8}ITY0{1@}3i;(L34Okd!uw)<94U8eC-EA3(*jd6DXF*>8|Q(1f2H{u_YY z&DJd%W)#eHc1aJ3@R^?MS*hW|==k!tw55~Z=;+@49!ygyySP9<==e*`l&%is>fsRf zD`*q`CVYCyCsLI30hvpPaLt=+a!rw>)%kC7ttdJGi!zdPxF}i;SE3D1#L^R0AXU&- z#r=Gf_1C`i{`i+d2jr0q_J|+By)%`B&Q0=+a_u|09%E2)VRi_o9)BZz7nrN#W3*M3 zt}ld;FU`s3a5p%poQkOGMY#~2>tcER<)fK&TUHb{p-40XI>rV3`3)4?we^pgI3tc*|E5|%>4BFE#pJ|){k^OP*^lWm;)I=6O-KD zox^P!=_*{TQ)!rJSeKnluQvVzV&1O?$Irmd`#frW*=i)+%7nF&=+<^N;TCLh-Kt8= z_%D{WcC0cK_yDZt{30ODWMNWz9EYZA`*7Bs%om(l(Qh9Zbu?GrXc}f#Ka*|#9SW5A zN}$|TIXQD^5Y%CXZb2o^>~j2f!-n%+>b-z;NU z6qc=wArc*oG>a!Bn4@zLn=;LG0ee`Zff>YyZ;%IX&XU_C+ z)Py8O--}$+X|og ze6$m9ogeO5J2nnC`;TR=U2Eqp$@hen3>rSOot{h`k}3k&d>2e$;&wc4_Zw6@wB7-O zgxo*n4PgFv;Nsx<{WjC_ES9RPJ+h=70ZMMOwP~=|>EHN)e^CK0Awl6+8JRBTrymjM z!3?;F7e^2?wHn^SFomQc$Aa{4^YbYbzE;{!fl3pP@goZOKlFC^m8A#xDR8U@ZZiO5 zagkr?h9`$x&E2?LR>&{row^L)7DvmSqIAdc9h_;I2xLBe@4(wg_!7=ZcY&G30sMOB9E2C%p!panTKx}p4ktI{A?W!rtbu}Cy%s| zvw8d}<(XxNkaSNk8Bs|GEQezvQDa`X+l12P!kMM5Kref2)u$+N@c#OL%PO2$;L z_!Ju$zD633Mv>nvxUfM={SDEo24MK^-i~n5vc~d^<>8H`l z{T$miLpTP7XsR$TKGDf{cG%Z-2c`)Pe*=UD3<38v>7iK{PqtX7!N*B2jn9A!T z-#fj=BfGQg@N<4Qcpf_H#mRIHT;^l$A4)+6@X@xc3fke{LX%5~i{sJlE z2ak1l3n)0P^*Va6hI%k~j8u&bqX$xOwOdHY$P>7&rE&=6n3tgB3hg!9?SDOu5xHAc z7!3Xi*c&Z%fh{c4t|839q7dGjJsY|9W z^Eq#>te(t$qcWh@S$*?oGO*jo9m;nEfsS%QeJD81(_2osCm*IdEn(jQy(Yp(B0hg= zn}Xdr)}quGy~xGq-&1)!T|FV;nxTj2DdCc9#s-b&0F)Q2YEVYBwLc7d(u%=jK(CBVoLoh#-FSCHb%ZOyuZHa4F(9{YIAcR5s`FE9Ht-| zA)zaE6`AxZ7ceihDDOvOC3%PhBhBw2O_cXUG48q7ZBU@0_bw@s58^6NW%rmU`o~QK zN3%Gt@LOX;fA!ZH7qhea7 z?h*7S=;(}~ypVf}6?K@WBjk&F4u)!TOZy#T-yeSgw|S2b&;2=qPL@o&&@rK;x8j~_ zhxQ2@bP9*JjcBazJ@G0|#^iU(x37LR98Vs+TK9t#_9{s_50ELDJ@|U$beJ4&ULQN_ z0|ot0IyzHZLa_7;ZmXEp{#fPSYfNh7#`B~&e&$Z-eaEqPs8cZA!HmJ?S^R6V#K@G{ z&zdhU9=ayt9vohW)lZ~czSyOIo}+O^qDMiU`28=ywmgN$t=Fl zmB?}36_&8wOv<~*x*rS|l#97ISt>__4yE%u>>j@I$>X?JIFvtWXOo^tUJ4q___jfLl(NcAUyDuSVXKG$i`i^**JhuTk*jk94)#icR7 zCVyx69~#E+?Q{CKqjhBBl(7&ui0Mg>|E3q(woUX@JZg8y3gKAUsn;K$FHz z-SI&wHZ*q8pSRqs==pI&_p0a6vHZ)Zx(iW_k#BL#;rjyG>NF549JDb-No-sBWe6}2 z=FQ&E*N^u~M&AsV8@*V5!CFG4(Tdp-J3M@R4-)5db{ZQM38xf7@C)*2zY3qWB|NSO zDkQu3{YdPL^!4_KjKWPkMZEB1%lSJ-XC2pB>RI9oJ^CvP>GsRpr@G_)^58hbL`L7M zT?xgEv!*xo8Sx5E1-RJ-%znf7PC#u@QpY8I zWkftA;l%{ill$dE+a|1H4If9BpNA(4FPiVTC;HGL8&?1(`@h?FRyZ~}X|qscH9P}s zucP`{OCc}&lBZ|qBI(09F4t$uB363(g0>X#xY%^I)r;Z%W;~2wQ7D6wRNY$9c$~xu z9$eR;5{`?c_+YDkEDFZmBf2xk&F`ER%r^;j#L#b(a;CC7KrN<7sLg&=I&|V&^%_C+7U{U!NEG(bOPdCP5 z>f@c^VVo&VYuj$Ef2cJch>GIm`-RUiG&W8W%ZaG@-#oBC^SQb(zm2zaXta$OzJZ5{ zv1{=`dNL64+$H8S$jmbx-6Enk&0Pj7?1$aKqG?6t>+M-aeK8+rRSnt&8a5 zW2#Z4(qiQDOtCHhdb}~6dGdREAE;sbLkcdBWmk`wdva<(B?DVpRwnNqK<28h6n;_18QdJg zeNR>!^_k}JroplI;~tF^aFm3|ULzz1y=ij)*@Cp?ZUd%`J>`oRr?j!EK4F%0i7NQ| z=qgo5ON)zrIgwpVg{UFyE*D9OBO|#~SPlN}l)bA+!Vk6GI_p>K8TkR(5ZQ4Q`ii`5N3bgHeLlsf&0L(ULtxV7_M4S zBK0AgiX*Ke^2aeAAqvBtg9V~q*s`>j-Kl_N198PM`vPfJ3$UOlL?He7QS{+;^~qRJ z9Xv_^X%x5|y8r&_U?yGThkg8GNzfE5KKPX{)dt#d-E}0R!Bp>4QNk1~IC+BUzVW)o z+Nb{sfQaj{xyn)f9Nz?`c?JGCdGj83{11mG93>BP{ko`dXcBb2jZ zGGi#_O1-}xWOL9)y721;n*t-fJ5wEcd@ky8)*Zk2kwD2%7q+Fm$jL=n$Z(;-b$ujy zd-?n5SGa+n|M3$fruuzQldBNBmTTYh`q7R>1Vz&Ld_$WgEnqyrvrjy{&PQ=DbJAztE{1{=TQj2+-C2XJG^;~IO7q-UZ9%cwR?_0~ ze>l;^={KJk`$atOLJEa!M!eByae`+<3O#5RG24PJ>eX14&bt|?IBFQ8#xy{dWqfMs z>xfn1aWW7%;cIm01KQ$S#c*~(U2yCm$xM+`cuE96Fvth%;l(C=n>hIxm=K8@Nsyr2 zkFWxn9DYxn-H0{{&33ZnQjp`tJ7jV=)DEPI`_so&c)|q#Ua~&q2y6WEN9xvc(ODDEHqEK!5Rq&_|`E`PE1!#nTFAMqK zmzA2_bnKH#qe>zD7h#;Sad#U#&!LCqm>5rXeGP*9WUv;S$Oux7zm<>4RtLGbG>Yph zfFC>g6&I~9&VL7qvAq$BAQiK^5EO*0b&3mQW)JCyD7k*RL-2sMB(qI95?;>V=<5!v z0h?M0R**aXKgA7#M5*d3)dRnvzs-L4dG|hDyYC7)&2_04o6xbtVx#qt{`rP4Sl>z( zY`}1qk5+izR0}`(`+F~!-U|OjP4|61krkTE?h@L0eqt?c`vd8yT4b5yZ4Q4D3jQ4J zX7YUfOlxg}RKR_0bkfg}$-{xg4+0UQq=K^p>xjSq?&WLS zzt%n0Lyvb#bhGf!%k8Jvh2d z7M(eWbEZY&(zD;?+&(*)lHB*9XO`6`5w>(yv*t$l*HEYE{|jPuRhwzw69ql(`K>j^ zcSv?ZMaNUwecPfND3uCJ`m0y>ZS?=K9nrs0FH1RBMSX)e8p$nDGA$W37p|C@Sx1BJ z3u*^e03n0G4|aHvDQ-J17%?E5AF+ZX_7J@S`~nUWt{il~uEungmjfmSMhyi>1Sp7H z4efveRFk`l+vr&XO4fapL5TrUnY2b(olUj5K^w(w8MU!@ZAF~fO4dHbOABIE9<`?a z>a6?TlD<~8ZD`YZK^wZ@dBcBz?SI(*rR%roXF6UwVLDMZ9zG=|oWXGR?;oqaf44=P ztdVv`qXs~O$y80=k5_7$golxI4K%-*_o3qOCg{A8yKlN_gVOD!e1Bp-vf~y zF>G1HrzGl|G(5!*4BB zyLDlK=m3{p{QtjVb8A+NpNF!$ZqYMfsvI~x#aQwm4gAkpy|sx(*mYlo2NC9W#)yGF zbkyH$iGk4O;7X7|41xXuR^53%nIMo6+%6arpa%+wN6M1{A^!wFAh*CZ!HEQ_ijN8& zCYS}Vt!i$QT>-cKJ=h5C5_}8>>40WH10V2SW;-I{rd z?k>i-FWy&ylBgdaOkFy&TCLqANMCFIlO+03F-$rlm$`{d-Y@D_C98Kh8MxgowoG$j zN-^1Jfd93%kFO9LDTR7R8loHhY}3ENiT5M)uRYm&1mBVB(2X!FCoSko(j`t@El**+ zeGh(_`~%wHE9ssUIos(wiY>(%|J_Zu&h@inze;eHP@+QX@r;FXh#Q&ya!;cd`iX^@ z_gZJi1cJpmGSmM|8+bL0m+u40YX8qp-*-kr9f%|Fjp77GoS6vbaG;-CN;E_S(6^(L znQ?!cbpzJA(P+T>gSl!mf&cXA4hijVsGqQLAm5Pj!;N*Q1;A(C7($d2|G8|?Mc=%~ zSJPWJ)_-Q63XlFVB+IsP)o1-1hrXvGUf=04>VME=b^=4SBXWyyOUQ^=hBW+8!i8c6 z(S^%#)Nr5^$aa>Z?1{!ebpO}()ti(i?_IG*A`Rth1+$@Pk^3zan#QmHeEIcOV@^U) z?8hgsM7bE|I6y7`{Wgw?y*!gn-G)e&S(dV9Hz){$f@prv2{cd!-+YEsR$}15P_k>U zAQKX{lct?rL0Miw$xb@voL;wcgC6+`f+>Q+m~{1jH?M54@sv9}af4$AF(vX(!|mbt z0D%fjLSW@wY5;-}HU|iliSQE?@If5&25buokg8SzSq%7SFm$k#HQKG$!W+QBcp@}g z8#{bEz;kf6h%8}T)EE0q*-RUygMT4{PT54;#QC><51#{Ho9C%(O`gm)KJFzzzRl*t z5BRFVHtVrh|0dqvE^cG7T{CHmj^Kb#^LfRE-zIAd;Bi1d74!rvzQE@cMK=uDJ>Jfm zq`ENY#*SsbGkoTE^zHo@ERSc7>$R~p56R1o{vP@!5#GJNF4i{i@JfaYqqLM&;C!fL zDRPaCd-3?o5#e7+fb6Bq_BR6r0#V(s{fJ>Z?4aD1W7Yc_#v@=g@8vBmyEbBF<=zz; z%72zj(Z%*>wq>qm^@54qfu85&*7bDzgn(qq-J2)oe_ieiT%*Rz{r23(+hxrKc2fFh zq30{2e;RzO>Uy8TMg7ng_$r3Plh?gjTJh@5njU*^UuJ}<*$#FR)E$ehXRB&v{95x9 z7j?Qz-+?ObzAil>zgs;FR`?)N_v>`$>7CmVb4liSUyh?}-S?SCLPa^tQlz?+NwzNm zz92*&_$AhhHZn>$w**gm^i>c!2=pow8T8`6<~dvpIW!A~7mTXa6+@h^Mo1kXyF`Cg z-xl?KjPHnb@Wp`%AOi6{T%@bQ<6cc)F`H`lt9V{nHTDhNNZot-hV~-xRcls%TiL+L zZ{)Yvnzbjz!6dJMxG%Z7w)TpgTol-CduR8qb?xGIZEX!1ll0f=Q>uV7xv&SYs#+O} zK?7niL>9%XKRN7!W5{7!E+tQE$={#IVZ_Nq7ZTJB5kZND3rL-`zAL+Mf20s=@idjD zkVgUy)by@DTG%gL_;31eoK~fqqnl5T^4X`&Rb9kz@!6-|_T;%Rz17$`DbGkd=|C8C zJKd*VdA_~9zcmnO8B;HB`$GRuu(1?p_+wrwVyWC>MRh*BBc_hR;ZqZk5V1^5= zFXynDY*R?tR!GqX_7u&PwblYN`Onm^WgQDYS_F9-^~A#Pe66O1g=TO5w(xoHTlK%4(op*kviPhd zeBSr6s2}EK(xA)NQT=nA-9!%>T`ny9AlEml1phk?q9gue7L-b=7TDtY_rb%>qjhU{r&xuKFky-Wi&k~ z10ydzFSZ2)1*_%{J32b94rbduFY*uJ$jQl_cSa!dReA4d1buE!D@}Uk!fYXs_NO}= zU`@G+$;R&PE}4***qmQXMmM^QVhZvEE3E+1z<6E33-NN?;>8h@W%>xDJXLqO31pXtB%$p%DNZ(cA8i zo0|@Mlv3BtjJ|cU?DTw(Dp6b}Gkn_FOA)g+h?EIgM>rC!4a1nD%>IPF>w1tSW!B}AHGuj*_iuBmO!~5CKq=n1EOxLj1EKum{e{)W=6&{1&y=IUZZc$ zUu$Z!8W~em`p*&VvDa0rL}Nl6q{t14CLC`YJXt*u-vEn*mtDDQIIK6mN{ZQW>hgFm zjd}VOF8xfb=VMWNdg$XD4iiLZWcMv=Cmx&joyZ&)bV;Y=Js+BQ40+t*v48Q8YsJ8$ z?SJ=bvpCFf099^Ozpw%SI)YU}eSWW>ku$w9NIFGhsDi*TiAMJ7&)RcF>wQG{y5$Dw zY4JezBr|}r`!mD!XW|MNK2L%cwtR^vjtw_m54i$YB^|K>H?i)a2=BKrGK5!q@8aD$ z@XDY^_r2qTjxS!U`-u6(G^Z`k`e8L*>UJXh=7|C?+lxzr|51roOwm0SR{IM1-IeA$|v->WrBwI2pYoI~+)3oa^ zl7+LLHV=n0o?m(PsxxKhTj7tR=iyMBukJMJk-H^B{SDFZJkU)Ozm-Hcbu)pYyaV&n08X>%PX2h}67>N9awJzuiN0nF#h>#{!hNkuXusOei` zYPofBDf;(!?MQfk3*AEb+STU3odbix`1ts~cb8cV)+;TZ(s?s8Gbw!b337R}W%>sT zwFhB%%*MvX$IHzH%v%opetv!u5)yB-94>dqh1`#U?ds~q(;#4Ry4drBiJ2LX$LXq} z-))}7Xu5G_!> zxN&>2Gn&DEGj|Gu88$eq)!NJ=zy<1*t^j^urc@V;Oc0lt7F^7Af^3eU=MEx;U*3V}Rdu!OXJ>A4FC+1;{Xhk6k0Om$A6x{G$JOem z^_Fd3^sZgQSY9s1Ve`{niTVllpaLnr2S0UpQD%iv^og28_6hXz(^8{b zpWzl!8_?>_RV?0cW=pH*^3gCYogSdLTE&wNUsv*LJT~p-QH>JQ*%Qg#Y%=ysojv)N zJsu|<4_msN(!|^5CgF3eHQz31eIXwoD3ko{_|R-}NR(Ab!C*Q5<&V!bS!O2KW`#-S zdZMK<-BB;0(co0qFrRcR}2U$8b%rJ2XHW0B_+-iL;Ag87PLzJZ^YL#DThtC5)gF zcpbp%fajrRTpa?YfCIIa93imiyI2e;%u{08?m0B04&6l{Zj5JcRw2)ftw z7n*f_ZX|k)!>{_YbFn6ENhN@&NoGF%tnn zVmz~E9b_@#&l&cCt-Jv}{Wn=dUk3W$UxMb!6>kccQ$iVB1sYI?Cf4Ep}#$NlBr27sS9!CntH zXRlws2KK(P;170nZ9YBTw?AGl7HgIR<4bc%1e?&qvVr%X0sb&HHnugGXklhns#R$W z)DU9n6xN*@4`$1e9j*_xK?Vi}eD*82;oTcOKd4d0>@m||n~vuXibsoKg67LYQKj)> zD5%d(O78B|6ggv1W1c27vjtL#^kcYqnPNogH=wrrG0%%$ch~Jn;sjS`4dvI_Z_5aEFt-p7gauL?(Npr|VMi~2?BJ9%)QFOvPkq*BGk>}kTEJD9cca8w?G!IKH(UDH}*QuQTq zi?7-&#bHkB4m)7_~KId>lG5prk1M zjNg+jpwAbLII>;>i>Gng{V=Sjwu{*@VM$e{+PKo*8)*uxYnV&T+WXkp4CK@Y6C3eV zDb4C2=uUqC4ezO;%M2iTa zvJ&;}x?zOqc>PqS`Z!dYDQPI8kO1nt(yQMrE_4HWe2Ftg9uHJOxK-(Pm#w7QEs+e1 zt+4da8;w33X;*CG+6eMT3-^<`?(WW&uUOiOrj{Mo@_gjKK)|f8g;qy%rP?x~(zeBg zTOm;9=V$Hnj}>b6g(XH5M!G*px!tVqHHLo(ph}##W;#Qce+=Y;EE~ut%SZN4cX!*m z>Zp5b9e8t`sg5)5_TTmad4_iHs}#%|!7=X4tT(p$ryL+$gun}4vgSeQwaUYe+`*E{ z7hB7p9u@%AvMwSWzI#+i;sK^vW7 z`E~;UKV;vJ_+89p_OCALXP1WYh#S5TEsj1{%znSVX7kZuiQ2WQ1}UH zb~O~JaxJG5mP;bI;&!>{93z=fp%$s$I9chZ5UWaQpHEAn`lBJIh6eB=diEWqr?q4p zFF~icpv?OQ7*dP;kf957h&TY{1CkdU3nf+qIt$C2o!$z1$Tit9s8Wk%--(T?)`Pg>f~rVq4l4x*#!)<5=Q<~ODBYZ;zkgN z@6G8aG_-I=)qED+I@`U8yryg*NT>5Ve(pBD+MDc!LZ3JLV{xUg4(FXGa^C~)FeM!k zJjOk2!~yu#CifFNV`Hbg%RSBm3^Znsu)xSjtbo40zL-3@yovewd0<{iZ|@Ee>YM%! z`|jyM!)K|es5rg4`tj}Cx5!9#ovyB~T-SpcZ`d(nrbaOqxiG&;&-XMTcempu0e=3Q zyDQXmo5^PP6Zu58<#vB4FtcH6AU>Rcos9`QRO;;P49F6I|Beg|A!Mo)e*W^M+3iRN zDsrtjP9ugHjX}zxQs`O3_#R&8frG*AQ#qE_aJnIeCjPK=;)uF=-On~p>2NNJQ7vM6 zvxXu5hIZ{WpSxiEv278ttK}B1W?iL~7U#+D1RHhIQ6Zs#mUWjl<8-zZQJ>l0GH-o6 zUy-@I1+qch`idzW6W;5mjKLlIV{LFtR%i7dXIt&*CE589+(>!kI$D*tag}{*ky3saRTsXW$r+K4UEdy@)zX4ug3cgUAn7Rb2lQ&b{eo zi^0^>MG1*e>;(Y5&ArYz+&)b8V3m9OW{bVtzY+E-(M-VdvgRq9#Lxgm^CKwk>=tyH zi0_!e8|?aYM}gLre}a)N>^v0L@QdP_Fd2NryYYpN{Vn^^OQPBnL8FCB#JSqN5Ep&F z?1!FtNGNHkV{@fVbNnOr`!k&g&vM#(&%Pjw@(DbdQlT5)n{m$jN1Md%-74NK(JwnC zu<9Re?!LXnJ_(@;2O4Bvyl(`fbmEQ*3L0I$%23z}R(D3CuJC<&=>NU>7QQyMse7+` zZ?Zkztd8QUGuM80oCxh&R0E7{&?{>4V4H!2ApFgjc1_5QLezHN&!T58o&ehw&57=C z;o;-pcij+7^7lEcq-nkn%{4YUaiC$cYWMqRpL2=~AzxYvyZQXlQ`!Tv#_*?l){y`o zMOL$e)|;FBwHblPQT>)U%GHIseE}y^mYN?^ zkTANe6^H(zS~z%=gv4PtW!n3em98{Z?0zva9dn7K6l~2;gQi~@Jor#T!K z%YmW09C#eB$||_)r_T(cy}#}DbgGhcV5M+&>Qk|;Rx!QU zi0Mz-XV=^NW|q@by61^R&+SWSA}z7hOGK=yobh z7fIZ1q@p-iPfy{9E^Nvqgr%%3{m^E=NR&L@_S~|UZ$?nOFKi_0dj467wAvmaU5G(N zdtT7pa%*{H*U89n!ajZ{FnN_wyVg=AwU2$2p!Zu1sK$1&UvlY?I-3mvEynZ#Ulh_a zLU!i=|$MAYn`LbTjQvmYbJ|EZBF;i`KYr&}Yr%=axM|O2$Q(RSgDSKIV zRsPi4de*ILxKo@w1|{~AE4JN9?poxy2QorpkH4)L81Yh6L}Gp|$5;0!?I(MC+G;Lq zzk>i*3lJqoE3HD5ltuun0=Q4v2vSE!$MN>ek=*yO_^dhzs}p(h0JBo=aCFiNTWN6I zuv>0Y=YFVGJeVnsBo%<8@69W$q{(!Gvzvjs&auR;mt+%@Zvd~Q=DH)kI zpW6u_SPaB7zj^bfDHL$-Kwip%FaJ0rVt!bf7vV!_-CS9|xt#)v^33HBo&LI9101*!DnANQ!aiCQ)t{zh-cH#m$VC{%ebXBzXZ zn<~*9$P0_4P*jYm6uzg_I96uUCNUag0D^|*d=1if)rKfR0-Q1mv*YjHx8(bB%C}xY zJ6whj9@?_=Y@0`KvZ2G`|Cf%>UO*>j2-h3;; z%h4yl(sKK9=8u|MxTbsV>l0uSdjU$hWQ76WB}t5n)7n5BN>RRN>XS{R%fX9+ve@cov_4MUd-uf!$_fpM#I{RlwuXtI%Wu`4$P0jQ7 zH)ZF&(BUNy0kV!Yj{V)b{_S8Mr}W3m{jx@#yekEAl{Q1AO(1{Y%4M#dqK?y2Y9!k&+*Vv9XZ|!LIpj7XiY+3o0k| z1y-Sq-HpO?&z1V7RI1vO>G)A+QP$q5Buz~5lu$EKz9j+Q`$NTfY@rKk5MO5IWUCu(Bck7=4F13SFn`dt+IDfP5Yv4vt*Zr*G#%-rSFlmKOXnyc-k~8yjxGmGt2e zZT|sqmKpv}x7SCDKzYUEa`z)r&SAYi1w}@wZoQ0*jNe%|fms+fHn!;fVKoK`ua=HZ zx~T6$fB$1@Id7rdu)_e;MMG1I#@BwXaZ}&!!;k-al&HvB{ zaOGHH4`bzqV*dWm0A)9v%u{UA8_^`q$;k<_tX;UgbTKh8!7KyH?9vP^A8uJmF@mXl$b57Q*Z%-hZ&FTf{-?^ z`}G0xtq|tCk|y>g=PSFhqmF9B#-*EUBmUc0p_DJOnU)Ti#U4#3W{~0J$h`gfy;Xa! zQUuGO-&kjzL2Fm#`4J>{tza<`0Q~-|m20VHcvo%w7Dq*ev$_gl&++3ZbyXZXjx4=1 zD#((9u!%mSQbB>ArL6K*%8a#DfN@lA_H2$K(~zHepjnrSY<}CsY3Y#-gfcI8_4Anq zqq?Gb7*az1jICs%Li>djp9iaSI0X_-(T^paTE1PO+Fu+8uShF??B4W!;IOq#MJ-EX zGPC9x%x)}qJ*)N3dtHmaQK(8~Z`ka;;}=u3aIfz)7SGQL;@I27aeo_^4-u*{f-@S^ zmYyHwP8pPRGW|^G#*2xI0jg19QJuht2zkWy!Pr`KiuZ&oLL6su08|U~oDQ2+Hs(je zs5Z@yMIjjTG+5AMwuPU?vZHuEmKDacmss8e_-sU^EeXWXC!U0%QK>HrKI?VcEZ5;Q zdu$I8M5?-$-~OJv^ck4wnORjVBg8Pk%23qX+eqOv*vluBe!Qm{UcJw2_2TEMGCqb* z)V?$%J>6|=_@!*Sce2s*N?15Ccbjug4=Kea$)B1fi4(SK`e}9hW8HQS>9>pXA^|D3 zc-_=TbZUHdJ|5aM&ieY2plDgIdWa1bYC9<#k8$O9Ef#8gA#Ns#5&N0(N=tlKDciVQ zqc2=BU8_Hn7Of4PUAq-NEe!PH7rw%xz@Hil^}VAB{`qa3mctUxg*@Xr~?y$(-;VZ@Xg?G6Bef_R|w?Zfz4eELJp+$24rmAOFDASx4 zU(PPmL2jmUALE7jus0E&yUj=Wr24DFVf<%;Q_e4cHmj2KcvW9zJbdW4pPeWh+7o3r zL1i+MK?VaR|TTBhHO z)bZz!$$D2PfPokp8ENHna&n;FCnC(u#o3`Mg}4Egm2W^46cqCEgTUQKLPqZG>A60E z`u9b>29ilR4D^<`3y2az z9y%vlv~iuS(iu9xtc2SZYxvZ1Zw7kD-)y};pDi7+_f+s*pw4utl5@J4!e2;>FXFbd z@ek&Vd&S>M=Az`Fqn&kXT)OVri9feT0%qOqUVpquovI0s^>Cg9lXBeM*@9IZt=O3+-~YqvYhX@G2xkQiD#>82=}kj@`&RR>P;sp`ly?E0oUH;9g?{V@W2q z4t1n7eF_(Rah?l9@QaD;FsNz%xAEbL2*h8Or!4nAqVDS**JX-y(k3yR=bZ@LnUq+< ze*8-oBXJ@ynS!p?Z(-kBydNG)m6xSG3-aq&=C1E~OAPm1`#;#u2gsdVkF5#_2!0$$ zsbs{X*|sXNRp9SqJ(EtS9{zx%B2scBAkAYVg&msBU#^7VV3RDaq?#WwL)Ry-sEJp@ zAs?TbfJWGGuccElmE@@{x9m`iyD89^)SY)dq7iTljEt0xw+sECii32%f3EiSKUV6U zyoH70CvU4lT*V(uh#)N?ZO%^%8@;R|atird`R3vhhfIYfyS=P(IY?ThcGFh1>QY^A z>T5Ei(_S+yNULQZx?nV=q)fVWg0nT`y1UyM7^gw8-JJZ;2DTA+T(vA@_zj_J3dhrf zr}u@9(S<01Ft`ykQ=Su)KMa!_N-M*q9FdDTHlwwFLze6ew=(`jj-kL)h~Z##Xl%dX8y^Uv;?6I%?eUzhg9p~VS4j;~J5+-5LD z(ung3j&!iDPTsBjzgogim{e6K%iHecV0TN6&a~k!f5@F1m)@B4>{f+f;`djSVxZ33=HscQHFwhf46sZwNlA@PTXLX}A3xs1PG(u} z_X;xVtf!gYycq|E;?GrlVv1*CVJR`C1lH-oBV(!+ssU8Kw9N-OxooQ@AmD<4sLf40 z45%kPgbe_i2-MpKotNee^pCSSpgL^ z9i3#kyl26G1yDaIES8iOgIkuaZ)1c$SA8)8=H6mG?G*^f6j&&VWT>8TaVH5_%=%PZ zM$5p$F;yi6VEPyLdRG*>+e29WwU#`eCi!gq29kJ=QX6J}$8(WArwQ3VQG?AEhvtPP z(UdbYzuiG@)^Xl9E#Q(uQO&9Utzo@bhvcX*S`GZ{&IO|~1tA$#3gDi{8wX-%CfU-V z5$AmtZaU-{v$bi@FjumTzTG|H+R4~Ky_nW%j+mTrE1r&L2-E>M^yDHO3Uy4f?UUp& z-sjs|GQM>j8WkQb5m}bx5$5E;Bph$2-@ze|LSF8Wne1>)sKJr%KQD!{gBV3aoM!VWCZMYvV|W}^i?EYh$ae$b8&@SStq2vc z+s^nO%_c%GT+w#Vrk(2iKZ1>y*G@-Av(63enmZYoBmNo{^2*7F?e0tbjBLIi6&aXf zLm*Z|Uo;p7s3X;X;n0Aa#yw|M%HOs8APKE^q(0-dqcH#`Xt@(+jAjUXRvo)~cr<9~ z`6OPrLw)OA_9j?$>;B?Qpqx6Lheq=D`7=;MB@ywe0aEA>bj?7Dp1`VGqf`2U4kx^u zGieA|EcyF4TtIt!yPY2pjIt4S0fH5PvasE8NthP^D*OBUY2iOI-~wqpP&k|Y!BI+$ zAD)w)-Q=Iy_kK(V2ys0bK8S9tgSSeB%?xVW!7-9Py6O3BgmEWzBn{pMCSZZMFS_;m6bu`H6$ zPfPpS*^}xdiJ}%|`C_d+%NPlM{aa4TC`@D<`e*%;N44w~O;Ud&F-KHZJGk4>6+nh6z@7yll&Vm0MFiWyKA`>2|TzxFkOcjsl2vpBJX5cPY*9x977 z!CT7h!-RT7_zA`%v|&P9AbTmq=8znW>I2mD9P8_>h)|r-aZZ+`)FJ#vTbUC`qGIM#mHslDb7CZW7LQ?A)?N%$ zGZ=L*3eBp?wbyq#cQTMOddW91RqG#lTA}aCp%g(AYH~UWG1JM3-a>~~waAxwluGXv zvb-C-`3q0z;>nDXl49d?zUF-4B3WzFv(AgwS9R8*>QmNmSD0R4(n)2C%FL^VPl-@d z0ZrLjkQ8%LPiwI8DGi@e-Bw!CAGB#Q#Q0tPWIAp*U;>U`tp`{(l zs7@CQb91`YuP2VEY`I8~>8gT$7s;^jDwCxnJ8`B{40`yXKmQc?vBYP;n|XcQt{2LA zlMiMctb4C26hTKV!qZ&WgX}drJ4q|%vz>5H&;b(vvZF0(qJb8q!I$iZSd3?uN7fkQ zwmBf85qm8=+s4!`lq|~hxj3R?A}GY#ysoS@HD?pq4Gmho0E(+PewYzIJe*gY521Esdl-;Gtdw}C zB_!~E`u#OK8w`)6rKOe1@0gwc_>YBmEk?el7hB@=9;~Vo80&$rv(p{hd|JSQiH?-Y zxRM`V%ZNHZqbBKc2T2wrX`SK)`afJlvC!C_w%xa()ghzH;pL~Y1n?IBbK2PwS@S(U zkhT^F76X6>8V|;ju2Ker;xtm};{@gxv@c>+%rZMUsw=n0Zwo!o?^aIhv4@STLGewL zO3_H|e;Dj-h_ffE-{)F)4E;_HuJ|l>nK<)Rvb=6Y2rfoFlX0@_iH*fk%ENMcFR5jM zD391=_6a^2)D7#ZV)5*YKj`tyqm+M-t;t8P(0P`2zSX;;HWWlrCycGhJ~%n=UU8qX z=BQ)IuoK-X=+Ncxw*EDv1C2uiQg&k+U6p?82hES0cR}b?3WqEzTJ#VcA^*s0P2& zSbnoeMMS+q_Esq{tYqvv2@?Hyd9qZWDBYp(8f1mmPJZoji(py( z1Z6qgVDPRnIlztJj{2V#AgN-#@D(hkRz$ez`jd>_B#4vx`Kk0nII4m(yg?T`Wd6V; zk~yPW4z)>1eqKj=@S>_ah$K^)5wC)S=M!alw(Vl`XigBdSB(wI*A8)REe2othW=A> z2+P2XV(6wEPl~;^Be`>49xPp2PRIS{($V>{vcwu?Pfb!ShLA2LhXaASe7yC~v*BOA zT}&js!tiB-%|C@{&Xy%&AFF`k$d80Pt)*v;V{NPm3Ue+%&=}f5b%MGgN>-AJ!WB>P zU17-LiL(yss2ySbWm%nDRja_EO7n}wac?~|)VvVOeC~-4MAO|XOQaJqy?hw?Y0-4@ zI;R@1`})=3gRF*rTfC`P9}dB=Z$1dQ`T0AP8=8&C#V4cEa)w%_#79uKyEkcphXFAl zc-?OE4vzc3ydB8i)fWHybg#=@K$XL8*w*4^W6qi6ygek~eQo>hTPMNoKMvV)>Zi;3 zpA!pvZM{@@1=#Z42Hfnujm05Gq2e_cNIcf@dXdpEXd=%~H7nqOIouoynRLDh`9qLS zD0IErRPN_j#?Z`y=y^L&wY9lJ^uO}g7`J)Z%J7f#i}PI6_+Tqrb2F2nI8^Ky+92x1 z?RL5T>T}zLDl%qj+5YO<>a}8oQLcAS7dC4&GQsp%go2Mbk)EB=^M`aIO>Spn-v)dk z18DL?=J$7{gi-FIlDQi4CdN3%JV*&MU#B`@vCB)kfmAuyZSYMW`~CI&lV!n~0{O!} zkRLzCh!+XLG7-m+JCx}FZqYXa64dPkc+4cnuwm{kvC8=hJAsmLk{mVBf}_JAd2jQt z2y`?B%)4~tZ6;|`?B$|A3T1SXQ@1v^4`zcnF-&YII>&lR_r4v{mo5x7d!67_Ki*&o z+H5>8IBND}Qnp`lXzhwwKOto;rzJWqM))GT*5fjS1#e8N)IFt)MyM~Ld!4W6y>Nrb zVD9~XFXm4BIUn?MJN15R^i?~n7Hp$#MANSQrE;C(aI4@=Q~T1G^Wc{vzJ6h!ooji& zl;G!<b4&U3@$Ewte6;<2QR+eR9!5 zYZ>#$B0j#$QUbDaiZy-w4vdy}!p{`>W8R-*8MvT(2H)2)%&kn0NgrD$&u>j;1;p-8 zwwW2tHpl9$dXIS}oZTqe%Tq$KrC*IZqGr5#q$^knA5gRD%I?#hpIBFS`2BRR#93hy zqPP^LjEMeQIX75j;6TiJ>a#087InM~>%!e#()$aBoQdj6Y;4u8Co<7gZYi7uni=KK zo2MZYO!D)4uX|x``jwWytJ-yzFy7(+9_m@nQ(#|=vu}IaAcjT_=VXRqD5%0N$(ldl zzNCt?QJYsgF7zqrPie z2%9ns-ZKBCS{^h&?|xKo^P%sDo_LhUO_CJYvLRjUd1;dE=Jp5w$KZt7GWq68x^*;| z3LZdkg;MdQS^Mq-?O`)g={|AM5@*RgHVfrx!yC(Dqy8E`-T@1x^7J^}8^~ZhiZ$1l z=gq-1;(uk*|2|T+h12_?IP^2p!^A(#ml_4QzhiZ`9=-V|(3dYNog**4X=TLfubE(*v$Jak60 z+oxgaJ@sP6`nvnO%9lk2nsiNzE$*kd-bERF{uP(7Sd&*@fJiOI`rv3CiX1lCqoAKG zpq=87WZlTWV`i+9(6TUE;^)h4w$h^MzoR*8--{~1ihRFFyWC0^ZD_`VUU2=zi$53I zOdg{`Am%j@*Z8HGi;ta-aj`?{^K6zG&)N0CR@9ZDxK=QJK=^YeZ(WdXruv}t=kI+t zzf3>&Pez8SI*;Nj@+Qcz{K^xUw;F#_SvbKrXZZJQJ`yaIvS`sh;gxM#(f-a<5Hb8o|Nj=wnCTIaluV)=GPWz-OE${O- zcA5a;(x(u7h{(&b#m3?C5|5th zAy%z>b$_?D#fS?xR|GV+@BHbn3UD5*5-3`*+4bRcv{jB$1?|RO#Sp31P8JKhJy{~7R(VeYn$z58s0ZPMsBUP9fhyW2Z3Sxr8F4-OUQ_P*+FNE;gV)tM z-(K8Y`6}}(PfpIUM^0g&{a6;b`isdC@oSpi)IQgIQ+|fRTEfP7u~wn%(JXvjqFdlb zXUamt_Y^JFN=S8kX*jBKJ-KJ_Mhf?3Uool+dv5i~V?EBYU(S$c?D>_AuZ3Tzj|WPt zy>)TMU6uh^ba!2?Vn0U}2N}}B!(EQuC>m$|W3xviN|{Zg{-^zIK8zb(F{09L3^-*7 zAylPwn`Q*r6)8r*xE9!uL{7%s=gv^^8AMmchboVrN5bG$yNY~f19z06a3k2|z@Ik8 z6&tABjvEF54=jqeqCr{9Yjp(OFN=774&KVd1LFW~8=4{{vF$TdFSu_SjvP)7;HE%0 zgqMMBcZ@Cl(_z8?fJgRKwaHE2)d-L@jkQvh+$*t9FP0+TVSOM=05~D0D|p9}4ThOf zU8tQAhjXeMZc08s-L}viDf1z5*{6LrGXM@$jEP;f93D82G4 z<57sS+^y#QI(1pUTna-^i{SuGExkia7ng)bb+j z&XsY>?hX}EEM{3cVM#;FQYfN)PX4wc=>M6foj#?+X z>NRu3_A9bqJ45*pe!%}8WrnT?c!CC>QBpDt>c-fbVb*!&AfSb3|WJjz<5V6G!VbS(osz6;n3jN zeLY}Awr~sms#zwj?q~DGt<|I}&tAic!q8nI4iIE08UUp}#sfuxaM*3gFEKbFd<3QL z!YdE~fZcbgjR1yqU8?6^da2_4W!I*tQTs^J-J?9v>qnj=dil$5z%&3(R%L~}DzAy^ z{(EO@;9eBth@o*eG zymPPqf}l=70E@`O?Lu}cMLb(AJ=LODRi;wu@V@@lTgs(hMRx>Jqc7OJw- z51zvzx-BdxM|w7V>RmO<4Jn>@^1ip1^uC#WmZfJJo~pwf1Lxa4)i|Lc+%2b5l6Ygr z+5zscqiC}DJ!Uzp6#C^olre3mle9HQctPbKPP)+z^kjErCljbikzb-wX zW9$n{3tN|J-t-kd=#awwgug`Yd0{D$o=(b%3|?j1&6;ObIKK-J|2d2fw7 z*AV4u8>WlIftOEZkH3F~k;b%3Skl49cpuIB?h<1-9bUcLlUXO1ugs7Ho#H#iaP~hu zV84BO0Eq$<3lf);=YuQ@x5hB+jS9D^rdN_S#$aJ>Dn$3-{Duwd85@(HSe%t9oUc>c zY)2ijFRL$aK(CvFFGS?o3i4ORvLBTXg?hD`K0BgN_u#^N-%yzT>ijDJ{<_9C#YUL| z!xS?C?h}+_1+oBfhX;(|Tj9uCH=d8UM<{~2f0w<n=EvUcv6|!Hs;eb=ED0{pOsksQx zZ(fr}oc9KI)~N=x;)92`**^DY@9uh=^M)@QZ^XVN-7~2{Kvv)QnXs^~Ii)tP{l=Y{ zy7aKg>xSE{`y!vXqV^BA7r(z3NlMEW*mC>1cACDN<*|@{-l1P0XelFa^8ePsPg@~z zq9gft8u=Q0GVA^RxtET+e_<+xxj)u-&>%mCi)bn_FgRS2k-&rB!B8mZdF*Q{W+5g% z)gMJF{ibV+W$|JXrkp8B{2Hfpv2O+0*(v!HuC}e~y#2K_@l^gS*?a>B9VPw9QzysG z`Sj+XdET{tHo-1&*V9f-bRd)fPW+vsvu?N3ns`voiH;7b+zgSpwY}&24t%9jlSJ6^AbTJb8lKg~ zQ;T5>FT!&6(_^M^C7axhtmJ4gUw6kOuPGH#(azf;8|`Kgv@vk8k#9e@L@UHv1Y`{O7PWGnv(Aq&u{=2Al|Q3%|Go7RhSY-etBqoX z_nT5FlHi?cvt`xe7<+IU1Xlb@O4}usz3A6~Qd_Yty>8s99Tc7ocl%i}AOLd~nMn?5 zp4#CRy{J=NEK&Etoca-Y9zO@+4QQwWhTwW(v`}Ot?rv9f-o|AZ`N5jv;){hj#d=CScp@}&xxe3CH zZriKBRTpXHNF7Z#y25UkWc?_XNc!d`gK1?W@XQaSG zcM8X}pQR}~sF!jwyh@(J`3jzCXM8NG2D&P1c>);(`J3eL0{-*BlfLs`_TG3eeATCf zrSjb8$?lP+K|_mQR>E3$$3|7NV1IN7U6O3-id!wrkMcyE#A9Te-pe>qH0Fi3*;SlP znK4nLVLpz3l_!#O0(O_w6-*V`W$X@5i6?6)xk82rjJG|*Yc5itG)G{T<&EZ?4^ zAtuFsaIitihaI#zWgr3+h_>)ZKL?+WsFG#I)~M>1)qO}L`&Z|Eh7TDlQiCIbB9xcC z>9+Visyp5H{|{qDt*QTvou>IJ40!SVB}XM7RB7;IfeKgI7gN~lDR4CCbP|=9{iVps z@h~46-`+$zZLZ@({d|ljX{q(ISN?j>|E**T5QqE6TBIXgetgq0tMWAfTdh>adXvp`i&EJ`y zZBtCa?JlpM(sL9}*P+A1?vNjOSL8p6AisX}d0d&z&n>>XmC|ar632aAU=XSN1(zb^ z8CQjOS$Ohz)j5J$uREtPSt)n*ie^myg2Dk)M8%8rEJ6~hrnnN_FmRxd1O{T;m*H&F z0+mu~!=``Wfp-%mh??7VK3{f|1B?WxhYRB1@MmC9f*%z@pg`2|Eo}pB6~%A< zZ4Z_|wLPo1u(jCMzbq+7fx`I)*f4{j64Gl-*XNGN?hr{|y#S8w<+9z+$>{XDi01&1 z5A&i{s>6h5f;t!|H6gu4lVxV&LUbhfPy!gXp<=-SNmCZrTq+wcv^2FRFWhCUMSB0c zRq%izm@pOq*us-Q0QqpvC5FgDo{amyTY_8d%D#Vv6VeRmxD$DaSSih3nDjY0C zb9#(aj(>b6LPHW62j(ZfbgmH*SD0nSA<-daN8n`8Yq1P@=Wy3u2C^~pic{28p660a zRqV`;Y2}kJd|m~nIJdM1c?o01}X8c%2no3WP>TGz9(l1(=jz z#Bs1gd-}4+4j*_1iz1$E&;L@%5`ro9 zO_^L>n|_7D9!B?$(_Skg)j=7@t`F5fJhK*<$kHufu! zwI7Fm)-q696_s&o)}!v&)Y#B?fQ4_EUNa24leq}5t0*&uof=&x&2ClhDYlMwK4BOp z&A_au#u&@k_UB|)b=TsqFfI28-L;o%#|@PM)axE@LhwAxP>I zbI#=NjRwl7EK~e&t>;uB35I%#{|8-jg$F_J7B??e;FHa>3uD`u+BRyW zms!>!E)*p7x(`!qMzev&0>@n6kOvswd~|tt=^9MG3Zr5Jp5;$I6yn!`!TAVP-w~k{ zQsui0GhIP#!`DE$*LW)i$M*a!nhrP##w5d11aSee1pt1JCa6xS*@y%PYD5c->GKhr zK0im`h6mVT0F;fwO%Y@+)(wkEpv|BKH0%~#Oala}7c$>pJc^$;i2z99wdasJ{L zWwBWxz+L*t>8dM~GTNCCI8i$s=+MlNJ}3cSvc8>Y=&kP>g9ayu6`o?JtZrfD{PRQS zW;w(*0S(mZrkAMYt7zrp62ZDQ6@ROTAWFlGD-)SaV?r9qOzW+lM&(eHqQM!-F5}IW z+kCl%sl+(0%<@{<^BmROq>YZJP!q|nKqHh5bATqTc`hkFDbEjYVvhM$PIL@?jGHxk zYhsWfJ0&(F$-EL(zg*w%QmFE4c3!zeqLY_c&nJ)x-gS^CG-XQ-4B>@G*5aF`n z{YGL@Zxww7g?EL#m}K<|l|_Epb}?;^4j*sz774-+LAfTW*ZkCoTaF|uBl`jZ4D3N$ z3rnoHe^h^9J0{siM?=hlxQOUb;=E{cXv5n^)`9;8!vQD|j+rcx0W&9ZOEJaVl3lIO ze?Nr6O5|nuZa@&G14XvQ2e2L`lCB1OinhSj!XTiq8(AlPG2?$b+5cFa_cuF!wckHD zm^7~up#Uj$ubzP=SAmMTk&((Wu_)I2#lP4`%MuCt;m>`=nGt{-F|SAMs4y$KZL^TZ z%)xmjvJsZ4pXtPmCClI{kBx~sX8@9=wsTE1jR2~`ceBsfh)mbH0)KW7!!mDjyJ4aX zMSJL>%}@I5^K;FH7HapYrjX8oY!lS(&~MvJ%rT$bkK94qjn@zD7 zKCZZ-Au-zSw#*G5aE*EAg8WA1hMie9EbJxLo{+Rz;EPDqa2++yHV#)B3EcOMsvKeA zZX+Q-`Z`3Ji-Loy8rYcy#6LD?s5H#pl(W}WyjPQ7JpUhx&A+~SI7#-V>Q4t92=@r_ zTa9ZI%ww2;Ti|rz2%Mry+H!I0IrA7P7QD7z5f_?Ht?xG6J;pKYbK7!G=okb@mFYnh zQAEB|wX$-E&G}po0UGz=4H|2GA zS>mQ_`%lgFbE)#-_JVQQl$w{HztwrCNuof7A+|3G98B}xUmWACJ0y3&T_JTgShDAU zc&XJy=E3K|{nqnbA!*9{IIIFBsdYa0%hw*O{+h6D(%8Da{WTtrFr!b-lPonM2cRo? zt3c;pk;+95YK#Xgi`^W(t@Toc=|qx}qo_@eUkymh5d8Vjool!@tK~v470xvOl%sVpyU} zGU<3#SMl#Ce<{jlC^ytMxzxvD6kYBmY4=}^IRc>GEPtww5o7PCdSb*{8SiRgX_ zxo)3tcHH-Q*3SiQ3+Lq!J)UD-&kq?%^SQ$WwBEwxUaM;78=>>=?9vCu4G+$#4NflJ zO?9j`-xhuDLOZpExz&i@wl0_)Prq6oBG>PFNGkj|UK_xiI8qfdYbkVRseat!9XB^k zM{O>CGfsob@ zhZlxD^PGeOdJ>Sc{0Py2uLz7K+Qz~{2+KbTWH4ifq9ed3PMV=V^m7lmY6Cv-acu-` zOZGWe0DyzK%m+DuP}^qm-Km!J6!|1y#Cszq#JTdnFh$OXF#<5OwwD_E8s;e10Ov7S zI}roN5P;%k0(w2%8xalC9<l!n)X1)QW5|CaHRvp&pD!io6f&+2GC?{RKvw(s5TO zhR|XhqW`M};QtqU0?#wNLEgYcox@@mIMMI~KZ}lmm34S@^bIAYUoL8?(D}i<3rGb3 z9Yx|*iOI=#H#dSlkN2RpHBUYj1+%P15{)jl4gYIJ?rhszhcs=;s5fQKI z41$%aWRid3m_k>HclQogSIDroj|}Ko2k5c(q)h{ZLqnOr%(~T(Vrb`X&)xs{fznrL zbNtpP{gK65Xz9^VR06${rh@-(LLWspeK~iDNyuJ38(FuJI;@*y_-SK*W7fx11o{EG zf<=2OQsZ2%adCN>K>MIsZ9nw=z^=c$@%zkMFVk|;E%}t%-%-n21&-&+$?Ix^q`4~f z0YAMQ#`|Xf=v@u14Qk!FYown>gD&p+%_`O>>CwweVstZUD0U-_qb%Lg(jHwyr;saO z{^r7wk5rN%c2HP)#8~|W@twRW%!RQku8$5n8!q>x-8*ehYLY&4zhOMhB4F(zn~bYM zzx;H5kj&ru#C!CZ_Ap@_?J#`EH-zS97>If0?X2L)ueRlKGSdL3W7cAX-)2MBf5#7< z{}5IpPk^_)#6avJ=k}_Y15pD2Tw4kL>{xOl0=Czs8&2P`%6txdc3+R&gRSg5#=M;Hiw(t#g|u$z=SnAA6U$YBMJfJ6~y zD9SlUikpGxz4v(OT(o@jfHCP) z$~UVg*aP!MDekh%>JUq0vu;^bu0bM&)m6bq!eLZLdP{dVLRmCGFE}Asg)zy8t zzo!dfU}g>v51+L9{i&imj4=B9ccqUX-+-DgFnu-D*U!w(+BN8az^PbiJW|pUMLJLv ztf+{UK1>k&9#=f)#}5@XHSVe9@$o8~zv+p5BHJ#ONe%M@C1Z+M&1v1U+3{qnro( z`}^zbokr{%_HK@jpwM&k?1oArY~{rDQ$<3T`Q>tBZ0uylJAb;6Pi!xr9SEU;`5X99L zP^7~J0X#=LzGtuxud&6%#JHPEelQ}u6Ho!O^Lxsj9WAUyQ5j7+H)~%4$+XI+P}DJJ z-7JpMx3`N?)KvHiI&X)MI?8mNJwW;_CTU>STPjj79g;!P#YFRhdI4(I)>FurtPyBJ za#ALZ%zJoae44q!tcV_j@xX|~w86OiQ%37}ne8=;>3S`Fo?-7eMHxovvB!RHOhV&v zOF20y(eK@}^A|jEcVr}km*J`Lf@yyyOWeoG3oyLYf)p*+#3dKy{635 zOM8c-Bea_LI!TUHTs65;o?O&}QUmMfCofHVM(NBQzJIrg<6h9suQ}g0{J|j8-_Uvu zSIGU)On7vC(VPmC|dT=rr!Pd%#b)!&`zHh0X=EDbud`xJHQqXnVD4ej~)88*B% zKE(Mi?_CAyZisnWFL@_Um&g+5#5KHf!xDd$G@ZL-7B|m&1mC^)Cs>(})b_Mpj%(!{@!;N~^h9wvGFw{CfqHbyA-rKDu!Re+EKw01*Lz0^=pzHH1k7 z#oiW#UftJ=WH+^ljyY0NQlL8;$dyY5Uu>#cg9kC{1BDUG?QOA7aC;frn{U zoV_O>=g8~5X&)KhgS*mvstLvD`uiYJ1uz-oekAy09TYG~Jj7>LL9oazvi%L%E?u#m z#=i0>6A(WtJ&)gAu9APXf~Es;AC{w^KGaR*p7$sz2|Ta8tet&l0^>NCj}@~(ZOU#w zOr`TGElKrx=IMGghT3M^oP1UL2m-jrFLe01TaPXqm(`=>O>RHT1v?vs<{jz7F5k-A%*J zjtvvd1o?}#H8sr4%rzAi2351*FXM`9f|s^-d`!%?vy!4>Mp3IowpmigEchH&R)y(5 z3ky2ntc?AAOJZVT7Z;bLTsc`;a|;UyXpaTmIB?%SycW$YH@g7SVh{$Yxv_CvVq(E) z^-s~&ogFhPtCCUEdL5RaPBRWda|wy=FVAtt#>T9dTNbm$a*;peDAj0LK}E<`rLZss z!FoKoI#}|1TxC+FZ3L>ax5Lp;Q3!_4D0`vcZKp4R1S@f~Y3XtUX|LcS2H1s=B#l;> z{*m*?C}ex+DD-bb1F~6$!WO1Zi)_M^ms7ab{gJfnejx_? zNN)15=J0kusLQ4b^!E2A$xPzYkcG}%dDVV+4xIWLq(72T-aq!dKB8ZjnA2o43K(5& zEosu6iy$5_E4MNZPXsN1kwG;9ZkOtdV! zSCLOGnv=HjuH5TO8bUUWkR`7wyZ)5kSKJaPW}BJg zZ|;L-jB&lDpGLxOLy4Tc(o(ri418=bsia@`h5f9)^S}1v118h<)V<1oCqZCp>ysNk zEhsQi@%y}Yt7a})-@A<4mAz(ekh4(^scsYZIIsN^IcWH6US4v}__u0bi9WN9d{e+x z{aowe&z}ls8Z^!JhN`^1kw+yU#kbSA1)GGeO}Z^cm6|qX(Vdeo&0644DH65yzKx{T z@oODbb{ErV_ln)|%R0;J3=sltVk#jcRkP*oLSQ`VebUa*_ zkzWn--h;2~bA28Fttma(Ot&FDvhzO;Z$#n07Dc;x^pMYR`SBRL4rpQ`nu&wQH`uv-v>2Mz(AKiB| zl(n?nPya}{xLkp-DbV^9`rf5Q7j#EeR8@tBgp8Tj;LyrXjPr-TnFXy=pdkv}hG3wg zqGDp&pi%qC(#1;GXbrBPd|&_%Yd(Ofvq9&veo!}(nEA7njUig&l;;F;I>ymVEoV9v z^8SLr><8}|c0#bBCSXJ-=VT_~v~~4w8uKrIs^gb$XHbD0Y}C-I)4umf@<*q3x%jHF z_`#oXyJEgdht5{wyBwq3Z#A>_3z9qi$r{X{4U*O3r!txI{lkX>h<$W)zKF00WV%aK z7=CWqpdD~DQ^ynOQ2OC*)kE#CLjCRC6C_JcBpP#sQim38wjs`^xl*cGE#jveg~%eHOjft2HXpYa$ZJ?~ecaV-Yt58W5y;rj7StZ7|*&X;xZUEK;_27L^s zI`+?5y{#n?IJ?f$F23;a-)x@z`mPl4t`+tTak$y=Y_HLU-N`At{px0AW1|W&Yo=%m zLrto_`!M@KOzf#Cj&l98?%abpdypC1ajbaeerQu24JyQRB9`8TQZ8|f&%iGHLwg49 z>D-!qV)e#2ss(!5lB=uOGrX(*1>Pq$r~TQ$rBERUSwY9`9xB-e4Z{Ih3J>s$(8`l=DnqZlKS=!Iuk=W#Q=CJfRs>$U+S2mgNEjk)H z33T}-Fqm4i-ZYIQT}m$K$e?aL<#Rrvn*P>y+4Eq2-}y_qIX&ZY-K9;6`A4UR?cnGJ z(Nfjrm7VdRqtLz~fTGgtcAS;_3&qa4a56`XJXNmXMTbd5N-XNrP|-5i;AukRT5tX$ zxlgtK(Z#@%8q55-&ixlsLL$I#!Kb#xMXc=)hUaP#EqZ624ZmI?B!@+IR0*K_J?`V> zc3@E2;WJBnwy!-Al8fMNbLeS8$-gzh+Pb9}k*)96x%86GY|E{b@L`Y`RTLas{*fo7 z`jDezQ}ZmzQKe>{pK-d_u|R;_!&V>f^ZHx+(HO3{;>b;XR+DEak7&!-op8f;k35D6 zL6{GCdU+5;J1g%h<${azD`R?VBU;8bYrAdkp2+c^h;ccD@!o@+F9kS_#!^9%Z*1jR zybb0!+LIk)8ke=3Xxo+k)7aYRZpymJ@FrVIuB@KrFv9x-dPEUSqPscF^%NN!@hA5J zID9>2iGfAqh%Wx@s=XpfZNwqX1fs{mGRfH5S4Xlod@|d#SEuQ0OYBc;;oD+dHL|E> z^i*m0-APsZLC3#t`$g_G&!#>9m?Zcx-VV#)VYlS8>K_*inJ7^I&gi>8$+N*6L8}|d zDQnz3xTc~EKoU`n;gV;M(PCj>2>SZGq%gPoXSLE&W?izkd#>C~TY0?D^NPND&&xaP z=l#Dc^_O8#et+Q*M_QGq>RG7+Gj_=+_q|&4%=lLFp)N-+B>ODo55PIqmhOrb1&(V@ zo`vX~%>RCXEsF#Yr~7z+16q_p;50}B0ZStg9srU(gdfhMHJGf?)cOYo(4Idp0JkMz zh1O`ss#LPEmW*jOsbh_J6; zNA#ORLQue3YGOi#EYiflV6^%J9q#Sj-Jbc{isBPz-1qO;;0_0*9AE?)nwZ2VC26Uv zn^;;7tlF-vuhWN|o}8@OzGY_DTQ2$W;}>YB=HZFY-2tJef3~(98gv?TN)_qoLMm%& zCO>~F&Cj=Wbd;5mSwFi0{|33M3=4aA_eK$l^6KgdM3E{DCNKNi4OPm`HfxM7!CQ1hvyi-vQA*&TPIHk z=@icu5rklGntwY1uxc~)o1(M$QX?8HnrVPCV6{J}i)6pESFj9~oPj3@s(-_A z*<49`%Z*cSFLQqz#5%`$iQ_Srph&wjh*|gKi~vyDmb!pv4LE(bo|a_ritlE;d$P`{Cj60p>{l9+Nbr}2kD^4LdnOQT zRd>gT<5Dbx!%1cBTsWwmty51-kw}kd$^}l$!^7h)Yh}#{2}r4SyVf3$Lpll%$L4tH zC_WGMa*Y=0{E+3nRKWEjqVbtkdQ!+-6=XtDX z<0(Eile9sO>d(AebFQz(yM_^-s$Xf1Uap*Y-Awjkj2%zwKl~J$x&5U-Kghg?Id0PZ z9V_~N-G*f*jRL-$w)Qa6id8O|f46NbDx4~JdwWkfWdPasLVL@ga|xYf)<#kS8_4Bk zTz8L)t7K0^4T(y3-*9w3I~seRI6CcXo}DPt4{Q@{i4RBC<)izUU2H{Ri@5UzoZVfm z&$)b$-EUu~jpSyzwc3hv$}u<1&yGDxbL*~a5jPLq8WERS4DdP9yKAxbeBWbaxSspM zAiL}+#JG<`<~w^$44yhlNUVMe*640mu+Zh@l%t!QCSyf}RRp=%$9*2gPtvL*_lcoY z{XJZSm#1I$9YsFBM%F?SVf^Vw8@7rX>HT!kBW^C@RsG$;_8mp5ZVu=CTCl3&LgC}x z=4|v17cR^}GDQ6_o!{Em;r#Yi8=p@8poZBatIQ5|FFYWCWL3{0v)vOZTw`aRn!h?X zF{{jFL!Ls!IrvJT0u81$a@0GGobV@gzVg`7+Uu>8QGIjNVK}|~Z+T>+K8RA^$CShv z?}+73TTE`dEjtCk%Y6Q)4)v{h9$-#txY|sxV==K$xOP*~HKO#65|dl%Q?|+6{?AL! z#H_9cyicx8ZWT3e3QJQmz7A1AMj}aQDX?8@@`~R5@31!aw-*41)_oDC(LVZK|~VRgm5{M0|P*JcVJ*3 z+=zKiJd18&X=y}cWICGx91Lv4bbEm8s{fePiHcgWM@L}Y0!c+j_-k40$A`S!E*RK%}5gVGCrY9$NL_3MWK?K9??eDTj ze{a|?Hb~3JB!j&jYinzF_gfqVffv}=St<;~I5s?S;5HTPmxd8?NR+7Y82`q=z}P%- z1z&-HK;|_z_#GT@u(tLI{GmWa3j|tRXXPQQT!ED&xFe^DuFznjiSGSW@e&OwTB@av z@~hr^@a@!SfwIF<)5D=X1%WlnB^`TbuH^Am+EpdRd?PD;Q_o-d2ciNr5f#og94n$! z4Do+-ehU$OoS!s6%Ni-^(W~RX?o06rJ-o4#^4~f!zuxqv71FsBCt8eZ#z7c;j%Fp* zm*}I^XfTXhjW>OnzPp3;kI^*hhWi6A@j2c_tE!61)cMIpjk7Ih!S7;fpGFrPJBfX9 zbP-6tJacJ=0OFJtE&8NAspqZ3wc!~jdqK2MwukfGp3Sxj|z#ULS$zF=8hY|3%)+jA;3#;oBlp|d)={^8>+dB4-oiMNBc_pH=lJ@mI46!2Is(VD#OjgeaWrQKH{vTya}t%B+u{Ekco#fxzUY+={;7XV~Z+j zY88DMM(&Z>-y@LD=9hS*TUoC!JR*e1S;=@Rb(5u;`~*r&S1USm%ohuWB3qr;&6eLa zDphE1i||Ao&y0F<#kV5?9K8+Nh%v1_Q04b^XG&k1gJ0y}hrVaX=u~iFYIvz$2nPga zZ9A7D$sZ71B0+|RWIyiTSaP`n394rCHgxckbww(l$9dX(!COeiP*LINb&W|JAfh_G*ueR@ZB3N`FHVV!+n6b)0J5?2*s|GI zox0_a{5m=gZ@R{0SnvbAa*^hymmiY`FV}@W0;5Nx{NUt=_t1oAvNib>1mE2Wuz`o) zZVRGiA3Vsdel$mNk1a_OoBf?Ru)YhL>H!ezpZxdIc64+EdqtlrIWKo`6fPGmf*3*AiQfXZD-^<}j4K(kMH@|hH4i|6a{I*s;F#XVj?f~B>LZs zHsU(iJ@KO%?Nw+g<7Jm4U13Y!;o*OqP2d+94_ZxmwUW7&$){hECDnNrXZhHZ(v#?; zK@8G4NTFTU4pd22mY<~hmA-_Jh7WwoR7BMUIMG{z5eX(mGb_D^(Q?W~KlrNan;Y1C zAU^kb+^1!B);4#h%_~-UU4WT}q(TInbuMwFTZU_>){B&$jzuZ3kV1$Um%TgIXqNBe2}0fs3?$`*;;Xp{KAetiN!~OjbLF<@obg(c zWn@xBF16B{t;nGIv`Cg^w~jCLtxE z-?%C6Q0usyu%wl7$S5u^UghMd4iLvfc{`(_$Z3iKaAq~~b*1JDbG02D3;uC&sK&CL z_?$z{u$V^w8&$f0zNsQ08y1{&8b-6#ERtJ*2@4F5=7qAaAGw%t3gc{aOQ20uAGz72 zHjFmaYPp-tFy=dBcsOWl&q3t>uNJ^Cu2Di`1nUQes#RisY7UW!g$%w`h3h0wX!4#beahQGbZ$4P3QCaUKfoMG+FOu;H6Wa=^p^p@Hpg%5QK#w zOW5!0Obl#wgF|H0S!1^H@VB}b!{*}YajzU}k}}#Mza@=r-os`5lLP<#w7;@AgPAFs zRw3j)Wl}D9GpiKpo#48I=ZuN6xud4wutr%~St1U~|Gc*X6P$QCvQOe(^<9&bL&nCI zfGj@2_FB2&`u>BkXE1je=d^^)5)nF})n|VF{i-`Hk_IyJKXU(3)B+XhBzB`WJ9p1$ zsT(mDPiUbv&+fDO)*f{TwC6m9D;IKG>5r(Gls*%e+u!L3OPwYj4R8FJlA44fDtMe} z>PcRWYn##ml>K?ai8w(YPp!zsp_tg{odkfv09LsiGzH5gN%`i z^VWIpWh6E0QGIFCO1q*7+5EMptXx$;d0*dvo*nNGbq2xPu^sKf7RrnmyqtqjZzM%s zZOJNG1UPO6es9Z*2Q&{;zK}RGfT3{NgeSY9~5!HBf+kFi#B_T( zY|rR;V8T$0$8eepSb$FJEQ_gXaFKh9AXHCr2;)i ztIm&HG)}Gt^)I^GjLg$^*Dv{X9zEuh5OmcnYcf~2Y4YAHy~b^C&y9q9YJBCap^j`w zYr+Ac?q|*4?voXZQPOigRa)(pSzYn)KE2ge4n%hEA(1h&aby%LQ7O=Ni&P|vMSW3lcz=JNR*LaY$M-&uPLk#H`8kKfB^Kvn!lZuxXufQie@9AgD;L{DK@o=fLEYsv zj&Pt7Cm_B(ht5CrR3YSg`&z18A=p#^%8GV=>n&(C`)ld^F?7xXH*VxZZU zMCv%Ej4VgE(f;CI@3yM(b!n>)q-cHD@`&N$Co!I_{2u8+Y%7@){iV^x8v;i@L7FnR zUj^MGP7AJ0xia>Y<1%zx88>P7fz+HcaLKvIY9RjP*|pyt3W#s;+bmwJuwHEP?Iy)| z>x_cpU;@Ab?URJqVN~)z6H6fg4FN!;R8Qyn4pq#Hs4lCWgOk0*fhcEoR<1JPO<8(G zvS^4Q+dM`1BffIw1=t zvbK3e(JY@`>UT(SeW=AO@ZNH(Ty6Lb(7m3SzCNr$SQqks8c$zN;mz0)62rL};)l^Wd9nkW#XsIo_{&)eCPfPV0h_B$ z63UY2Jk1=Kw$`s{RqX_fBX`FCSe{uWZCK>Jx6er$HvH6>~AkEW-4Zcxr|&(PY$prqwMIsA+W zvn`?qq7i3|HDnXc89R(ZA_VyHKW+yVtJc6SxHwM)Gp=81W-$8Uxa-8sr~ zOd%gi;E@ujB^G0%{;<9;6Ze-Cb~))>v3M%gh`NNPODS8mSk6ve4L+t;ubXO(-_Eq8 z)()K3r5=@8JXPiWTZkjWJvQHbeeLe1g*|o^UDTyOD&2u0ML|Ing9=e@X4}HpH+sBH z{0Xl2J)<$10jbpSpUM$+lCr`qbq-=UsR`nq?KSfMIeqhMZ)@$-Fv83wzGXRDibO+i zHEs^pmTvL4cX+LgG_*zw(aWq@BrVJpy97kWbDVhvyVp1Qh%!CRO{Z@0^_Dr|Ps)!- z-}!UG^1nW8-7)JvcF(6(Ok@i)P7gA@LQ$*{yGlUA8!w_V-xlH((h7ZwMX)SmMOOvE^3lgrJEs| zY7c(2Z*?>ur^w#G7N}Y!f~*p5M{QC%^Ej&iHt#obLhc-v1BQgHl&$7`zSY5l7#t0V z&U}ku;4z57VUTN4qVP3)O!rCGuQwQr8$2I!AdKmp>FP|(YI5mtCVzqZhovR9s(rZi`j7TP|Fi>_o`Udd;YfxMMMBQt&w7`w6&H1(h9&xw)-!6QzP71h?-AR6 z8Pvmw4!-9xO6DSD+-U&YO$V#oM{*h(XK)3R>?GU8Ki4;+{7cNG2sC z&GL-&s-sHb)H?P9y{1yfptdxypH%$IsO0F6iC&DZLeZOj{?Yl$#?G_MIcD4PsI}Wk zx{s1|uN%pq!@O70;E~{FMJSyGQb z?kmqTHf3LQBz%bS%&BFy+tmjKZC43Kc6v#NED1UW)j_t~vf9%5Nv|2EpgDCuz8#)E z73ELDi1G*nL4&Zie1yg0`F=WMf>8tPlpU%^|H-Y$$ zBqJTX#T$B_8hU~B|5z6PTJj&z@gq7-;kgiH>~FMCXZHCtlk`P0JArNv?hKIyP0zSE zvr&T}$iP>QxKD7CbN4e8>3H1ilj+XFEqb}MupxUP%^wI~ zCSEO1C}F%Ga0gD%omjp^RK6J-i9u(cSzef#NO(oB*lalIG!(z}U3+2vJ4q@D@rh1s z0*w*t4#a%%zHkL0=Gjjv77-Ibiz`}@l$?x?7)#zX@I)zg*Oo4-yy;-J&TnH}Pnh?d z)25WT>omy6X2{bUbobj^VKhrmQ4mPD=^1&2yf%ejcanb$bq4`;x-?}2F%g2#=iPY~ z-?Oc%kp0>mZoh{@6J4BDRK`2io{#9i3C}utz>M{rWOUK`v1^b+77k<@onL`wHn*D2bnnomx^9)v1a>EpE)PV ziwsqp(;Ug29Eq8B?PiH4kJZLx zBMQcWs0Rnztu@TU=u@^RzemgEBz}`~nj5;`Kf-@o27WZvhHDOk;T6ux=B zr!{Sa26n!jv}DyB(G}rxI{J<4r>m)%lK4~Quo^ad`5~|X&`q-vBGU;n#CUfO=Hst?fEvf2u|aXSLm*A z=nk)fPUvQpy%RY%(`)E6V!~rm0Xn!q{a2q77GEHT`A)pq(%@H9&j)w&i+wp^0cBp2 z#Zi^ciaJYPLzQ^FBj4=h%a{FTtBh16eB&qkg^+tOq?M)l0Y{J#9In_p?XPVJ0~MiD zpfrcr^GqzKMl$ytQ6JJD5T_UhHT)xZ6EXsTi6H@0cqjJSP1Yj!$}0$2z@L_md7t4L z7WfM#$k{Z{B(x(n=o%aWp#vZsD^gVOJ2Ml9ZO1j)9qNG*7uNyDc^!et@C#rBsyy#5 zy)Q_M0Fv=EI)|92)nwiGI3FjWePZ{!d+F!Mybc(HpdeR9X^<@BoT7Zp2wFng%JrJx;OLJj76VPOm09FPX?RLpCL zR&zhNUrrZ7+Y6f1)MfY;pcEok4;w7~z|z_mk^$?r=@Ue2*UV@3CZD_E8l}yEn}fn) zF2~W7zL%SFMvHNa66EjQQnG|By7&8by=ovDD#M@J3qxtY&c-B3%S7eiaB zLoo|)iL**Gh3>m2$MF~meh*TPnoFgaFM6uPCKD?N-bYcIy=o>RPn8f?aoR0Z&nB7t zm0aJpA4?g))7~e&vb82SFsGW~d$5_=wJzwd|BsUG#xv&)RHB4rhLNcX!~<*Vg1`Ly$F~Y zkNc|~iqeaJE3xduo+m3Qo+g?%LA#kqVU`{`31!~|?It`IX!Uho+}Xa@qM@V_c;8J? zYt`Fs`4ZaJxVG@Acb)$|M_09Az#`kphAZBaA}xGn%Tg~?g_`Cu$hTDHBL$cnjL|sr z7CZ-l+Z2+R5M+a{eVbn%79s##B5&SfNQ2zPKjv{OPvNklte_|ZUU1WxjJz)q`zX{$ z15&qtz#I>?Bi)LQKzHZ(`yfm)vMTsSOfRlr$17tPeh1G*!k%N?)ADip#q}{07)b=- zH{=7Px=y$Z;Zu;2Z}Y1t0j@pWKhnZ{8JCd)kNT?T(!tT zDYBhu_YHx9S0QL%^aS{Lkmbq;ae7iKc3wIV78~YGzDYkmgun@)fhT~hBC8(qjGUuG zBL2FVq$mBtnzmydS75XoEIxEE5P$~aH5PIzm0DQ%Q8TvH)T|XTKXKotAMtbuuMSrO zZUf_HA(#ePf`8GZwxZ_2Q^L#nHlHcfSxznqzqwrTa(I@X&55}fk-mdDtQWXMBmW7n z{x#5q)Nj0>4WVwGv&x31*LHybQ1jq&m-rBhBa~S1x=tv!w_Ub?N~;gTzLV5TEQcwD z;imGDvIP-Rb#<{?#LKIcZkL~sI&%=tO|K6J65^rt2~pxvM8SADDH{fN3+O^~TW{>|BHNEFG{i2P{V z;kWXn`LnavgjR^Ix{|=foL}J5)~`UCL%tLb&yXl_5mHXG(9ghltD4sH&C1=Oz>>ou zRNkxes1mAG$v6ooR<|!$S*ch2Xg+%MT#~<7Wo`6~V}$^aG=?2El6%a&`p9xlGZL8H z5f+B~_QmFho~O(HOEQ!7cT*T6o%aRG}soAscZDmxseJNYfd$FQiTJohq&D`LDvU%^I^l|vp z2cavfns4dH@{4m5bzWJrF+%$u2x7@gYI1JO9OM_Z+yLGehw>d}2=K+a{X`Fd{LZNG zxIYAGL$*S3OfajEwI`Js5YoWPRA|5o5+7idA>fJ01_xxp1$Zl)_$us zWJx&Rpg2rMKkN&T=9OEWp1pKx?$Hr(F>uB>bu)&F`Y(% zGaK}uHg#<`sCJ=K&JBab?JOeo7>m=N3Y#hJ55LLt^m;uc{SaZ+npV|}mJ9qYk|nUc z=t=uEfsE8st7An(^METmP67dcg>|Uf3S(x~ z*|p)#?gB|h;We8a1pwLEqYZqiO&|4%woH3+PXX0ta87MIVKeQ1t#+WxY;iICm}qc@ zNv6bled)K_@|Sxb8Jg6tfVIy}L3LUU{>k*nS?Wn!dzwXc-y2qz6yRgioDL@5*TpY1 zHsL*O_vs2g%c|5+JLyKuDKQg%VLd_mkEQsO9Rq3Ls@cgtA!?Uhr^J?Y3(Bc#o&W%A#YLj$8m= zvTESFB#TSp>lcD|sp+Xd8!ON)hmATSH6FX$i8!49!B!dTlHV5L4s6|RGwu6WP3!rv z7%uvQf^$lQQPhaz&zCVD`dlW{`=kyZ`#pV$tEik+JPVdglRw$EXKRXq&E8V-3W5#h zXlgF`Z(0jlMSp`MQZ^29D?=!o=Ia7yMX_M`JwKo^N8)u)!Njt9B#)dG%&v@Hwl_vl4w~k=b;qkB(Vo=>E|wk#?Vj?}iL>qpGG`(d7QC-* z{>naYOH1G!7}bgF`*c+v4$(3W{yzL4^Yqz1}=?oRm>&VIb-t(pZk$UX=|0VU= z5;8Ukx>ZDgSj5i()u^bn1mOVY#7{!^l|z9Y9X?YT-92Bw<~+AQSh;!#)=2JJmH7+Z z%@O`^QO*Mam?Qf?>^Vis*rq! zPl99dWOl3z`SaAP(FZfzK{fFC8Lcw~>rul#)^)mk6az0WMZgFBnuMaZm>ugS_ zgALtwyCH@As$)3urLeia8(;g1x#tKKW6zIqnt1#gnmt`RmBL|nJ-U1nU?FQxL&e)d z*DoQQNoRVjZH#GT&KLS)>7?{VbMkn9UBY-Y!5JE)N%!?@RC#7NpUx>S2koEJOD9w4 zum+5Y-lQi{)?f=GG;c@qCx=^OAt~!CLq!_ae-`ea1nUkPOsR;|z%2l1UhsU-d9)EY zj1@bjnVR!OkaN7YIM5GfWFvqD^t|A{BSr!xLB~OF0C3)POlM@;e`^6eULsS&=Ysp& zn5Y^Fsu29Z388PV_l($AP=H2MFE~IR0Ln!e@@DF<+n1JduC4l@XqX_>bylyN*Ib!SQa$ZS*7hQ0g0bj&NM zclnA~BXUQ(J{JfCMp;SWR?tMF;p#7owUy$;1>s={NoI#19jRI5%MUMOR0}#c+1|aY zNNQ-~F!C08(B1$`j11ie)YGvRGx4%K{-TtbOC%R~9dh<{{9Xo0GW=|R^75E6sU+(? z!gp{U<@v{6`i0vccODrYYmba8!jEC}Ix5 zl=`Gpk(#m$L;ec7yu5Yn?LU1~6lDU=3QAf*|5iP~dP98QQ&r)ufJ(s~N7chlVis?!v@9I?j1aj0sA;kMXTSjU7M7zEQ8u0U%3R?A(-R%tV8#=M6N)Tf zpD%B+tiK=zYR@kado~n3wn61~cA=^eR3f-V8*NiG00*uDASFVKv}x~Wp{n3T6kDgr zt(YP%o{6>;{#WbgT-9D@lm0J5zGCa~TkUAbgz2k%rg&#NFJ^-G8TZ@ql@D=qKe{yZ zm>&aXT0jR$gHztSXoQYk_X4jmGq(3}Qzc31VXj%APSd(Y*hJ$C>^I`RTIlxX(f>a} zkU_>>tJ-8jO&zMTmQJpga)M6MPbi(c2y5^ zt!yRt(${qYDR(pDE*hi*>F0Sf1ar082>(+)@%N;A^&R?mdLF|IrRj7JW78hC9uL*E zSja=CTwWo1?``37@`^$%aN2;>p?v-a5%@CFgcZ6{m#kG8>M!w(MAVk zNsL{N%V6OSh?N%L=1*hoO3R5vtqY8@rIigeB^1?G2r4!hJ|fN6y!zAj7MU*{y3od! z@N16*Es2M~?u#4yl3h?fGhSj{ow|4}*WmcDN{HRx3I{n-P(o83H}xau|J|xZY*^(S z%ebTP}Koj(yxZVGKeEc7lOZo?G*GzO#j?<3f8pknlbZ#p(4EF zXEh%mTS&anh*0%G13@Zaf}3)}+Uxd_(5G=8Js8xVZ&jPm&M|Z*h#100ML-Al0Ai$q zYeWJX$NjWw0Fe_}rG|#|T&(h;zaf#pj_aZc=)W3o+~})EGW2su^(lhLdj;hBHMZ!YjPqc!SmCeCt{%j?2J;1PuKrclu(GN^`OsW z+?I9J=g>VpJSiOa;$M_>(EgJrw)#g5`2X8=FS5vpa{@0LRH?V%H~Bo*a(q%Sz=H+p zh+it$%@{=_k7T)8*1M7nefi-dow$h301o0i1a+JbKYqemsxZw*RI|kN1cHv1XzWw` z+eKyNAd0#va3lWS1~l9LBP}l9LgTlEkc;$gW(jscKx3iSst67;AE>?vw;iz&*4EDq zf&UqV$BOJP34aN~jfLTUwZm5s5EvlL!(#$(#NhJ)+8|WchUd^Ny#WDa_#uE2jNBK5 z4Dm4*H`(Mcf;vN#z}Qp>-(a}HUrE@YGlo&)a)W6q^p)+PZN0Cs2T&oZ5OgcFkMLm^ zrcQ&ociVbfw(57Z-?F2(Rim57jdC3a?>;b?OPVJoc?)StD)_SEyhrh7RROq)E${&a zlvjz9Kg|bW1c0}9cg;;ra|;U*-KJP!=ROL%Zq2Z+=8cVwot>T7*jQ3ths9R!^UIXi z!LhRA8|E83!kZE4#F~Y%1=|~<&|e&;jL5dHx>i%sbVAvyUEP(dBVWIvf73wnB%I6X zIR0@(i$RT+G*j{NK*6>_LSZv1|K|GbPdjY zwsd~h{-_=)Z)r34u)bBmzsfsoeMv${gCyKf3T=gA+yB6m;c#*}l9cF8R0a7p7u~Eqt;8N@j{`QS$u7TuY=raN?;c1tKir(vsWG`Hd}N7*11eu z1M6_Amn8>$`OB6^Z?PkFhZs?$v) z`$yU<9HS*$L)Ud$xfkOcmK$1{At@e0$+veIpy7nd;(2bfcC*0}<$t8nvSEgS0rpds zo|B=b_F}z>#rUZ?KT(Gf_SL+-6EC5}5+Pmznmn5mFSmhDIPVR^N=x^5#&VG{Nj6qi zlr`*K_{hRPypEF=5)y)OjUiIOh-9g#jA8Z6N*O}z7m=v1Hr_I6*=_FFXx^yXt-nSh zD&~0mrs2%F!F*h&&YnGwIYHI>S9(@CX=Pf6VeHe75ES8=$UU3OR+5zZjJaBF$dKUe zbwa<2z?YZU6b8Pt1EosmG}Dqo<^gNzzNA_i^cWc|#$|lorV|!5xbV6$``zoGu6OPe z2OAQ880y;0yZ$b#T#9&Yw-Cr%A`E;ycr;$GKKW$ddI5j0M9fQhGKIVq+f5-*Z~i2{ zdp?LeL{YKm3h}VIHY5*as5x>JZConqEjz7rGa;zHIu1NUa=SiWl18Cg&&1}Tz(O`o zs=m&A*+hTLew@jZ zl2^k>Dd<0LB5f2zv)SL>{d^#*G&T~9^S%T-S6c2*A7eA|XIVp{l|Sq?rRxs819|*1 zKWrrFs(+RpzF|l6$4@K>7^+ytI>psmds9KS(m*7TU(xUenLU&O=N0TN3}43S+am6O zdvG~xA-8|nm><-h3+lOW&j}?JlWQwf0ZTQ>p|^_KN}#;jP| zngQ~Tfr0C=Twl6Ye6+OexM~l^IZOIkUHyaKwE%|Bf)hf)-PgH-!%`N`A zn&)D^m0O;AtBK|W^ zN%Ax4!&J(}l(g79hJgDBqnK3}VizPJbi=5Nx}Q;U@y z9nUbpA_7*!R?86z@k-F&HHCd8mcyyh4=Ze{h`VfA63NB7;7QHhBRZjujWC?3Zw~GH z9qbt#RRcQUE`1#V0Zt857$b#sD0xfnJpXoYK|frz)$;F(=OJ;Q*WokO&c&x8z-$K1AQu#~R3`oS8 z2nIJ@2H+Kz$%4eh{>q%{D;S61(Nt-2wiGBZFz{axMN%KCf9H(z{wZRrP`p<%nSv{rpWvKXCe@s1Lax84{?UV1m-T|>+xE*aw{(LcjoW{Q)} zQ!im~J5s0O@|l3<5*2O!pr2E<&d|@uOij#)qpkW`!)AV_%8|%A^RbiN`Lx)qL^d32 zM7jNx69ShlovT@0R*`Z&RTxj&u(O<71JBCA#UK$^S!U<^cOKpfm8ZgAImGb(3Ipta zF%e6$9KR3ja=`hl2iv%z@Bo(4Qmsn!!6Xv13_;)+5Psh*^eSopMcA4@vAmzf0*eI+zP*31MFx+ zh^k7Pk>Y_@#@0LDZKxXUnu)lGbRbr7?9cxQ=f4t~Vgi$67-~Q$g#`P<Zd_g4pV zW;;RueS|(vx7?JNJ!TM&^^<=SK?bJ&AMSqa9M#Q4t9#~n)}v+%)4=4(8pbD)Tee+i zY6;lcKzZee-8`te=>)2Hb=QW{DsdfWi@gY(0o*vqOyxI`9w5vhiO`IHm*n3WbAzLz zhH@mhuRp@*z$0L-wHU%^|5>wFZ`aU6FiOXZj1cbksNIDY$-n=p`qF4uo*kx@o{(`k zuftGzl-{uAvT|GWAwRoU9QNMWGv5&oF|8)e+ThJ9L?>hixMDG1W2{*CvLb2p2=EA4 z1!5{b67t_U!p0z;3e!~|UbA1&$x_EjTMVc1|2;<78~)kOJ3GrY5#^kCIzYw9zkaWEe)RLJ%6C*;JQp~BSHKho(Jf6ix#!k=rmjg}N=yZ~KOB-32Yz-(9k9-5CY(3-_btuMBtH1Z z=<6QB@<$c^_@$t*5$;O+`3>>w9-7TLm6holsZ^xOrOG7dvt9PGcJH}&1M-WPj9M%o z%#%-jgG-*VYnuB z=~^MA^{T39b4~gFHgMYXZeQ{5K9d|8JpVl7W+Qe2&cN}{^!C_Hhz|fe6}$szJ;*DI z-{~a^B1zlgq$(9agaLd()CU1gV%7bcJfJqf77#;oBKoeY*`bk>7tjD_78C-x4&fFD z(YAFa|I{=9hlu$@$c%&BR>;_{HU(Z#p*X<>AYQn()FQ)h36p@fAR351g!}~YJp)Ij zSQaH4{;ZY+I!s;vM}xiZ)3dtR84IO&@7>$vwyP>0v(}cSAh*SHR}{_0&Nw*t>k~G3 z5D%^?|G4}8$NBR?)`Mr1R)Q;^>iQ} z@fzVhr6t^GUR&XH`c{>7wqOSq_XXQBfsV&|yWXuo=W0BjtH5F86ktruxxZy5x-;G4 zOiqi=8N~BuZ!90``Rq$~IV6?9h-lwZIXl(!6%oi+CkSNSJDc9ua-hd(lhpvTbKCOH z#kd|$cMl%_a$^WNZgUyFi4GZ2N3%9{naw^AyQ0~$Eh)p^*5S0k8s_J8n5cGgkhtP> zxjg1Ew(^%Et>EEgUfa2SOf_>=4+$|v&kSGk^>D4O!FkOqxNfa~6TaVjmAswad2Tc5 zWarz!Gi<7ds`Vq$ecR&XTkdIG%2`5ERN|}>>6}GbOffya^XjUtCo=NKm#%e@8WRlTWVC&|3n*jnNwef>?t{8}F66*mE`D5J0vb=<6;o)eLk`p?PW-$fsG z&y{2d;IeU;A{7qxEF4Q#Kaht3p{#jrsHFt~O)2>cx! z1{EVyIr;(O0eD|bXa&v<{tBKsXaQoDC9+bSywZdWf%8SrbO4xwaKReR29(GmzzeV% z7|RLSCovbgC06rd>)WZziX&F6PGn(-j>A_#yXoeCa$5$Ex7-Z<#{09 z={W6qRyN%HMd~^R$(-|>&yp^to)&b@ZuRW2c=OATG9ksSC_TdWTm^OIB=wF$9;Job zS}pZyedT^F8l6nO`<)}#Vg{>376kA2c_XR!jxnFH z3Xv+5zI+^@VZs*pta^5-iij$(IYNiX z#k4htnd!tH)h0=8SJeMZojLk|&&3ks-ve*_82&ujk0t-XY7|P(52q%0EwKD1rTfS$ zt*j(z1ZLlObMl@2rZowFo3uA>>R=GbenC}AIrvgqehm=VFVQjn@77K2 zrelGh;BesNz;6L?W)QK@yEai!9asSp{EN8|d<2n#V68B$VETdN8JR9Y8M_j%o>71u zHGSZMR!oVC#0ox-z~P~a?9m6lM((@Z??KS$V??Q%Pe-CosUSpbMSE3EYK3KVcas1P z90uleI@&yZAqIm$MffcyPK698?{C_44@(;{#KO-eItu((qy4>VB6RERc3~hyJ1Pkg zpFfKaCNam5@f+S2Xn*;TLGD{?zqz`)WopUHj(g@lpl_|KsjsenvqVa$7*@~`Lr<)6 zC}K0aSUwBguCw8kxre1a+U$bS9n)eM^KljLDWCZ-$${bcC?iEfyzlSZE(6O%AiPkA(==J=yjD$S_x-NTo|-}bOnN^`y61^ykUA8 zXQCqlMPh1zE_<#aCaHMbT@{Xz>mS%N$p@3@=>M5Y-w#+KDF*<8ocUZ2O0rHs*+`Gi z#u@yTy+=-Tn*l}W(+0G2X<72Mu`B0VSLNYi zjkT5+Mf{Z6#9X)zV`!ke-XNfiHt$#OUOtg&pQP`X`oht2CRL=#d3=*mXKDT{g6E8T z#KDow`DAN~j2W;ZPHuswrHx(GP#m>*N@__tvCwWC_MO(v0fT(Hfj=+TdC&!rjRag+ zM@CCEiq^(PHuE*C>Jl9{?c;Ep4+k*$;a4WFP@WGdc}PcFJ1M2!pJ5V^m&SVqu$9!0 z#h>rf7=~-XKlt{O7GRJr)=fbLS$kd8m+b{T*%arR9Hv-*gs9B>l_Hs1^DpB?Y7#W= zk1Bbpsi7MhZ7uhzg|VF4K2{GN-HssSCnwD{h8x9)yK{M7kv*KGAwiC)pSn*m3+tNv zpVs8276ltf@-Q*I);Jp(0^WZ7nPKpJsncm@jy-r~U_Vi5C{kZY=a)VfF5D7TG=$Ax zZ+Dg+`RkbQ!DqU_8~+h@JsguH#YGyrIfrCoDGLi;%84;hUYIETJr4HEyMUDvRSjDX zA>{oEXWO_17~_WAO0{v5VibM{;Q*M1Uk7`HNYsGjsH*%Pui*J09X=|3qjKa1vK>-E z3U6zFsPFz&^IDe__i#c~Agh~9Nx!eQKhSqF&wm1F!>_&!@Ur>eg1x)1|C&m$Z>n?u zL&L#Mf;sq;Nsn9Rd*6Me2~nttmr#Z>-G&RJTe2lg*&Nr}Obv{R7tLBR5(DMybCdg= z-GLvUrxj_Q>D@kimJ(1#5?|(X@b8?6E(C}M@OD?;Uc2rq)=|y3M8gvhn*~ZVA*Rw>ILMP=yZ+KlC4oXEtd@ts!giZ{Q z~&n9_3l+%~q}3ddhF?LNEb$uC+AJj`{B$RFOVbBBE0 zpUunH`*kNWF-)@`nf@V#tVv>|sdRgqzinlovf}p%$<+^773;%6;ly*~MB=R3yZgZ~ ztC&Y2i_7%yWN)qoBu6ISvc8v*nz41m6yy82WD|HAl1Urr@>b|G`{j>nN1d*DKQYlH zqoKM}hmE8vi$1O7jJ_Mi%g{>0yG!hTGf{zW{Zkx@2JK5+GBuaI;c7<~KV~RTyK)@0 zECcoe89txy*)U=S+^$lpxMYV*Rc5Pg2Xrj@99?jG9vS-Can*YZ9~~-_15Ng3MiIz_ zM_U~d#K&A@G3{nOXsn8NQm5{`ZHGbzA9w2datigYc6YOoCrEjV4dg!W*pR>K>ggq0 zB%gR-9KrIJxPD^!BB+^Ke?}@${=>-M7WYi;ID}Hi`Bu|Q1*1fFalrBSxvGYPuH;3& zpl|YrkL_NT)(7|XQiRA~Lgj6aG~GdKzw0JW zj5>6QRvc^21jh|z=r^B_D0Q;$B1+K1u-)$FMFXLEQtlSDSLJ4%aTbg7PHxxB7Y%#4 zY4H@ktok!$bs>q&5rr6lgE%y3V*Zw=mSc0vQaAv=admN1Zv;ALkr{0yWccz%?$c7( zkkA4DeBKQyDK@##tI|mn09N;KxisXCdVkzYSe7BM-xE#?@1IUuyJ8SMbGoOkQ!}0= zNNTbbG-BT=dDPRVm(tvqJ}P#VM~FcMagfxnM8n4Y(D+O zmQuj;)U~?prgjGgv;W)7L}|UWsL zF#{^39n>mF#@VHw)8lFP3)y2#IOhM>0?6(1!boU-`^g%A^`zCR=h5un3#{82g`xXZ!GAasF z_%$q;(d6gvk0~6yN6I{5HIUO3FDQTtu>HIl6`VOH$_iTLs z=}#)$`Wb6jboH*N57Bnj^Bu$q_#*P~nDJyOdbE2brCd@ihGMO0T0*iikdwnmTwW+E z7so&>hSzqy(AC9qaj?;Y`t@!^!~HfV+6c3hy!PYGm=l56)uvH^qwq?vV*2e23_T&i zo1Wf=nJQusUOrS|H>35iW@!5x@rm>GG1T*NcDg0~@lH6r^_!0$)efJ#_R@){;Iq2~ z8gLtvBWDnN@}lCNb60D%f)Z80pFKzAqTXR2UJYQ2&EH@!QMEfCXpJoNwsD$t#`dTb zT-tBgii(U*(~@cHjk#=o-rm8yHYdaSnC+WPxqIeWIK5REEo)>imKaa3!J)hra(M3^ zH)_5g8F%>nrO|Em{Y}~`c?uO53gAZ~*oz1q^iN$D{8{fRDF~f!*!xsAGm}5V)lh|) zH@90!ZCi}BR_5z$9a-oKFNl?~m-WKaPIPh4Iq5xGGcc+;Mwuz1IGfJZP1ib1|hoc$HF&U5R z@n&VdkJuf%r}=DwW07BkW~mQE?)TkUKGcO)?)34yxIusDlN0sjIgd!aRfb|d`MX4l zQSJWBQ)<&n9jYM{xESm-y2T-p*C7#xBJ@~H7=jNU>eSyG%j?0XEoosmJq>*LFT=wGeT zQN*QRz7EshF)N&M84OHTdp01ca_jsQDo{)(h<+HPPsC|oL%r*@4wwlbn|h;jwpm}Ui82=Pj*d1CyY z96OT9q>|5AfAfyngL-pO7$8Cs0-{0vU@Ri6N%5vWKnj2uK;XqpzIdA=N})qgvhXnf zwK9m*4GlANA2m1EUptZ}%69)O(`Mqi5Q9Z&LtR{5IWKTZ%xam;bUpvwq$D=$(ir87zC9MMeq#rPSpzx?&a5`STJ*Lq`+CmIQVisFhnhI}gyF zWSWINHIW#Yd$ZNipEePABw<862NN=qx2DUN8+5cSsot9kHQLhL9Gi-5tRMB*pLDZ^ z(Gb{kjZ=%(pnPd^@=oZvdUiwZ`2l3T=ohW>Qapd696AZnyE2$r_;slqKV6?rI|+XdG;(knO?uZy&@XKsJstI{0D-R7@*7mK-- z?Eo(1Q|$U7>~Kx^>u3jOH$A>=d#RBx>$qeRZsWODN!7RmohNJY6lwn4)@+Q45{BR2 zQCsQ%u{9ZYw-a~v>no`)eB%O23yc-?ehMaNn@4oyH^Lueid^)kGwy&J6^S=+ygNpY z68N4LkQPCrNae!Qpvox}9(4~MHV~>!SuinCMFOvfk^2pCRLCzF;!Kcv@n4sglMBXF zxSDs?XtS%Ql6L5W0i8kD~QRkan_%ndgH+3K z=Dw;&Z%$VFC~;t=H(vzkRuEkESQw)R)Ncsh4Lf9GY2#ffTaIrL_yZnAJNi#}u~-1c?v|4&!wmy(g8e#! zWf-KZVT5C_>zG$WH;LW-)~?s)`R)bXS`4e+-QZDQtE#JLp1x@P-CZYqmnCz?#7x_1 zQKmxoi z9{YcGlqb!b(MKpJ8W28ko3i*TY5Ox9>;-H-y+wBHruI zG@^@vqj^nAPi@D)75B57FMX935BB6#E^#w{ zwedA;YSMmHFZq09yGlKDkPx)OVI9+DHkT1)6ESxo6dEL(`w=0!+$L4uKjnTe5#41y zTI7;?@4n^Jr3)?R+-YTN6o1Oj$+;!>%Z^MMsQ}iy-E;9-p-oA*9?L@^(uioO)}A(@ z%W3G?P%aCvtEJWCxlwA_ix{D=&t(ovnwpE}^dY#r^sBsRKC64CFD4SJYH(fS^t1=W z7%&A%-(&L^P#aeQ0;QaYeT}+jfROx9$m? zWpnW-MJ%1NY1M;Fs7yf5IA(yJ8;*k!y4-khsns&{|t=^(nHZiwqBb(Q(^= zX;)4`bsVR&+Y21D9yT*g4udKb%u?<%1wBX=d`<_Z0f>de`y6HKVFbIrTF3^>TeMr_ za?cTr8*sWX(&w8=9*)fgFBErv0q_W(aOP*PNjA#uUP2m?Ee$?Y_57mTfW?RX8pXDI z-jm(!gNe!|9B zPm4S-3}O#y34_NB3IJoxYUmRZgRw`E`3f3NQAR0=(8`ch(Rs1x<*8uau`8HA@w?;h zV-IK)-82JkmQYe6xk|D737v@=*Y-HN#2=I8Yn!Wdq zy)Go66L9a##fX0cp{f%90Irt!2?i&G$>zU63(;q3I8(rV-zT_rI1u;jllu}|yKTIq zJvedc+5c?OpkZng-Dgd7uEAw3_vCGAyUpS9a|<+HgtCV9uI`&&yKrR25w@;}&TCC- ze!l}mV5Pt?&~II5bxyyI2gaY8Q=5=@>+&{I0a5JT>>DHUTRtq|RF9XGn0<^DBD6=7 z{ZPe2)U*c2%V5KMrRT&_<&vnIp8Sv(TA^&at9aeo9TpB=S7jF|82GPLoPRCblb|Xs zY9{f@Dt^B>DK=Gy7nP&#_~Opuid&Lr8=(3z$0s4R@~xp@q!5F^P_x7v-sQ(4G-{rt z2)+9zy4;vx0B1W*#A|hM(;u0n^dV)pq5^$F^MNHkG|pW3sAbtf=~|&x!^VpaJ+A1w zM`B67s~!`^>LNE4>OS z=8}we+U&K(Wk4$N)3I~0tNpoh*o8*DwZ$~9#u-r&ro3&DXEnC?;mp(^kDv8Z)6^LT z;)j9)3#N-`OYe51(XBy-lO%OpZs4Nw&^Sa0AW1ARW|G%ji`r~)WC|XqR_?ZEVZ(Bu9anmY;u%XLd*DYAww8{HZeB<8->mw z%e4u+=0dmQZ$*~!bIG0R>~#nIS8w(kC|9R`7Ek;P7{u1&O%)<3E^Q$5_f_5ePfob+ zJ{!q^&ET>g#9++ggu@3XkFepBj}E4WP@*6`fw16Wf<(ddKJF~=Jz~Bqs7^(VEQ)X# zC!|&Avsf5#fE{QU&OwX~2OtBEULp}B*Fa}rLEil!`7A*klr)4S(48kelYjXUd_4r- zn9%8kHWoFk$bWx|AN>Oy+RFG~ZVLJb_(L!$#Kage3%*{{&m$c~e&U#~75?wJInzxT zID?9$$1;e8k-ZAWkLdDF9o}j0v8oX&cwnph{5D}MP$8vd-y`HmxKm9?faCdGlwza$ zThBV-uvMS0T2K==vQ$P&G7U2tcxt}>>R@_!u{0t~%3)y2wI4S;AK9i`G+f0iU=%v| zjkc=Vc6Rowf*`C)!#cHcE9P+B;??S@g8jayTa5*%Xo&RwwlTbE^wf^TtN91r%ck4N zIF1YF9P4vK;dJ@nNc;6WMH&r3!8hIA;6tr{c6mMXrZMnfiA}7c7Lj)PB6sa(R443{^^f>{GFwu$}o-{bt?p) zZ5x(as8*!(MejnWzVrU;Y^ME~6AE?cPr-a?lF^4MgI-^6x*`z0?5mSCtciK=-t+JV zhXTRG9F$-F{g&q@BP=E|D_e{6uJiM*>>*$u>kWP5GVeJF^2M@8z3&(c7ZiXu2EidM zt9tLmB67df*eth>wq-xv3*>%4yY=O^ME&yplPkqpXAa6$KH&<V3{`#{Ny2z(7JYG>#``*2f+`jd{Dg)Xli_hTq(uX?Dz#YCeL; zIq)8QA|r*$IqV}TB4SP2@w>}h2%Sx)B6Yl5(n-sfa;h^RTI{SR4Yofm2OBZc;V3b& zsA;gXWAnG^>EA6depeUl3^I8@?49rZiIm8VfIOrsNxV;n-0H2Vx2tD)tY@j;i_pPV z&tQig>pdNtD<&C&+|XjV*f{Qy9gwsi45>>R0t9`7#p1GGvapQ9!FpF|5H7K zWlHhh!W|a}EB&_;n<8=*z9;*M1%=2C;Kg9oWJ+kVBZ z9__v|S=EHJpXEzF?kDGxddJydslCxNQPv5Pp4;Vg!sB#~sZSNkP%`X8&{1u?SELOL zDe$>D@`TnKj=XylO@vcrFxc5p)Z$M9D?GR>v$s1!z~(d**NE<`oP_RbbG2nL&;tjT zly~;z1yAO|YqgmEQceGZjET~4F8(fia%cv+P2-0xo&c%2rz*yV;Eu&ry#7qPW#KR) zPU@9jA)Q@;8SPG^gh zg~Z_4xL>iNd4;k|vrDSZN~3TYTggxJhcOdoXn%YxzFYi>Wp_j*Z>pzZrapXbDA9gw zgtfdiRw3hY6}GLKl4nc^F#c$;?s#?kU+R)8>EDYE&>GBIoTeMrzUJZSJ0CeUt^4?j z=PmJT4ZPgzW_(pGyMy}A42kYVh84bz zof#@`aU)*X+$e&}R>7L;ZoOIatZ5q#rzZAz5+ea2gH~?A&-F)GD=BQ|vI29HL)9&G z?JcB%Qb7`p)u6=Iry0s^Am3**ozIPA!YgQ){S4 zuW`ov;)~;s&xe>S9Tjx58?k0~3k?Q3E;z!%m$}X|&~5L_olK^Z>xs+JXCxd%P_&!= zNoaf3HTI3Lqp}+lC-2zfd||#v%V%p$_~8BL7Y&U2&5$g)rG@m`cW}?O_v(_0_^_bK zCZ;%#8?}>)9i&-^mapn?Q7>b-BSLu5&`?l=oe22cLGKTm0jD$Z>@v-J`X=&i#ZNVL zvw3|MAo{$Ik&2Nzs=s4@=fVtmrX9|zdNFKQ4i1?(U^Ls;%PWpG|DMMkm@N8)tVm_P zePV)V%Z0(O{A_hI9O(oVksCse!LLQ%MaMv`8cNkm(%UllXi&4l$?BkkK@y4dK3kcO zPE^_JNJQkJ>ltrK-7k*WURObSv2AlgsrTer$>agUfr|z z-}3Zx$4o}5)1g7m3Sj1l&YO1X>|W#Q_kA2Fpya%prC&2>XJ2w)LV?%B zRjcSM4+~5i+dM@Y%e*Y#vXhHJMY7Y?+N7Ro7ZVf`g$K4cZn(J#RS%xO|1Ise%&7h% zYDwZ~^KgHoHXPe`pbnyD7jd$xEgAm&@m)GnDkmKq>8YAv(&mj7?Z1rC)fHDoe9~!UcYEWF{Ykz9c`KME$ujA-!eYQd%1ZV80h_P@)HJUXqcgxJ%Cu!K`L~UMb zD$zY_HhA-qYz!F^JU7Cek3xw`(9_3|rt&D-cb`Qjq#8-Phf{eIQAb6l!jsgkgK7FVJQZAcgdNH&?Jw%{jbc>G|Lw zm=~;499wTa8e6_66pt0xeMx9}IKu{W+#TNQ)6o_b9nX{@|9MSvK$f>W$Iv|Nv0CPY zbZpw{Ycp!yV4DkMa4U=6tzxpGKs8rp4YdoyN;t>Dk#S*5;@|<)u~=_c#|VC3`L=@4 zf~(x>0iQnU=X9SQ;`_Vhy7Ac%PrfI=MjKAKqwPi4isIeV+-Q}-^zJ18!bnjzXEeC8 zIMj)ma~Nf1$g0GLR)o5z(e(Nkxt88Cy%!P3X&JPWU?o2dSzq7Ig05c_NY*PsI}Kjr zy(%fTJ7?}3bUHs|E%wh3{Om_VmmH{5G)BD|tHv?hF+9FFJszVCglQm>lB_99)l$5q zw7weyXOzv@oYRrQiwXs-NR1BQBs4WY)m88HX|>zb9cQq{nY25oC)ahB6c6nBJ+cv9 zdlZwMo@QIhFz3d%nf^3bt_iZhIX^p|PLplDCysRIyDwLrGy4h;HNwNssU?_R7@$6Y zCRv#KfH9qkyH{Nq-bPy9q0$eaHeGGMQaBx>7C6b%aVW~3DNe_7G?LMjE)2{Fw^-6& zU4NsXf4V0AZD9PoB-OLIae)dWoa7hW;8QSgM;6vYK8enB84Lv#YzyzLHIQMm>TA_8 zGkq=8a}n89zC|GjCMEIzLa9^!V)3N?L|ZyTTtZlh}Yx(p>?`-%aw>i zQrmQM-?hRJXL11}vDO;5(bfuNuwKoc+|}teZroPW^GWh@W{H?;MNSGl9#@)#`W-G; zG0x%aG`w_dr_JO>%2{oQPhKtx_x zi7?PtG1-cs;Z1{_1`n$KY577cVa?@|q7o=}H7Nib3_hZPXd1WGqhm^a%-7?cSdIT z`&L)@Zt9&m|EJ?Rebq0;d-kGRy9xws2o0A^2*$&|W(86n&N9;@*&thwttRda@(a;S zdoK_Q_R8Kbc4e=u^!s(D=xx-SjY)2-f*`|v5vVjA#04aMC1M;e_Ayg^r3fDH$Mbr% z)ZF&Rdx{!geej$*b!`UWmJgV1t=W66UV)vldY|5t7mm}H42Z%{}4Gjws{!UiArC9$Mt%YZnt6hz4r4W0xWNyj*fpAi(ui?r_)7!D;ZK?D(OD*QsrhIi5)Bh*^#O z=z53$MyOYx3TJF_H&i+b0j1+qjgokVnn9w8r zmi3MK@29<_)vql&4owN=&XpfxcohQuFBgENwx-zVVDpQiU#I+)&h)x^^8*V#l}pyB zSXhvx5}w=iKmS*57RdD9dhQ2nS)iW_csyN=@mVi7OpF`+eUnvDs zV-rI&@j_%93$(`#F85?M{O4i8Pj3;y#T@*ZJs&QQRMH^xZITmG5`CSe^c6@EV9Ik&lD=;A0F2ThhRsONfoJ-6Q_@%AtF9P zpUwai^lK`IlSRkQpKpkXi5U~W1N6nEdTTmLJ3G51R!eo@7pSSJKa!FH_Oq@}w z5|Wc)e-9V}mS^732NPZ046kHvV!#sJ6KHS)`kQv?+98wKqG%vQ|Bnm|7wdogl36Wb zWNTVG5*Q4A`1yhFA0A4znw6=s*(?`pGWvUajR7~LQ5#CiMnEio}K z;HQ45DF%%B8IpksPRRICeQtJB0%fQ3Sz{SCDqSOEGKQVQ5`zp|d*)~g8|z1S#9EOz zAP<2Fkv1|O20ww5OETY4aVaJ2up}x>+dur7@(hn0Wk3U7m^{S}KtdNDAdPRN1vsax z7-mPa(rS13PXlMv3l4qbeqVqT<3)+3lN*EMewUi$mp$-JI(~^WArbdO{`nIUtfRIu zKFIKs@%$6Z<@GF2OMD6awdz0VNc?tJT1`b0_x$gLzpk&Go3J-H$Om5*_M+qCSG3rF zgZo?924MMqdg`^&=?{#y^Wn4^@a!=caP1w6r*j6psO^E}k)O=vY;`nS8X^t{BhVL4 zM8fT21N5dm-W-){)Q9)Edlgn_(gFkj)Dwc!+S20ne4`R1l(lvYjQP=A`O8sxJGLc< z%lUdFsp#;?h{XLfd080+2x3OGo+UGKlc z#cePiNs@@CHDgX%^Li?kN$2(Q@(R$_`a2yT1)j>49e};Fzga1e7fF8c#&I`JK5-?lBp9!%O)UzGHDVPKQ}i>3l`B%8o4~w81-{Tp%&YL}BQMKkFAJztaVPM)=Rt zDwi)-q9$Fp4Qba_jEzkjVyLdfBydak&$25N!^=qZxdMUs_z{n3m%Y&U-{io|fl=09 zH{iJy{PP;%8|7$EkI`z2D`36{qkvnfd?6;kmDTCx&Jg%l&*o?<*V59` zkRhFZMF0wc)7|MxzDzo{=(xC=e}uszpkAM!^ezqV1!TM>}{o4I}_DG4=b|FhmfEuf<}EZ5oBJEyM0Bj*y(R{EL7;wc6G^-K**ST zFi_V;#}uUcW6aT%pMD6i_Mp7=!pT2NGp|A!55>^yhduw@W4ArKa1im7bLE(NaO~MZ z4Ui;Eg}+^cZ!bw$$Y8Hcd~h3(KsE?zSXSw;91u=nPWcfskk;Sa6S&psgz%Pi9%c(?>vejlou#)O`TpTM9 z{0r44Ov54R*o!r0(ed$YPRH}WysxylRs(AWG;Qu^V`BrfIU+x2{oT1{emt=sSNkp|kwYA$%hB7Lg;P07&8|=2jUE0Ik2Jf352a=YzcehMQ6+`M;B6p0>Qatk;!hl1#|$QV|#^DmO8aE!zHc+8j>A z1qC(=99m6Pz@RrheJzd09f(EowaN~_p1d7$DFFgZnbzv9bt+q(_s8}2$1{C>eT5Iw zk(f=#b%B)z>g?)5lIO)Cf9(UGF=I}r*#LYG2~J5#VJ8G|M~N4gL2oF9%UfLBgw#~6 zn&;s}CO9;JNCz0)tE%FHQ1^uC&DtY@=8j@VC)y8VBb@7SCso~Pyi~e`Nk}d)?;xxux z?J>D*QD@UbrM^Eu%iAO5;~>`?tw=A&Pj~Z7t!q0h9_0)x1Q(GcUZ3fQ#zX!Ap1_R5 z=d(E8lo_PNr_!ht)oNB2=GG=qjr|H6G6MTxsW(10T-sIJ?b!JTP{pXKiic)=`!<*{ z74>(Z3n#QueYDZ$Vds25Wx$SqD=(m}z^Q;OgT3%U8b_H&tC=olLTh++495vh^-cGQ zG`i8jVK2CIFxl|a=BM)kAA$}#-s{ro!W00r{SDFYFA4H5ysMYl>J0UwF2h86*=hE{ajoxQy~ z0Llag296~%?ZtWme*{QXRt5%4dYwOk73T|!(A(Ds`%OyfWV$dK(BcioqRNv@1dz4e z!DQ|aEx^GDglQN{I|+1d0zwQje9zY*bZrTMZ%UC;F=8FUt5=xAxjFCYY2$~gj#D`7 zF%kXq^Y;qmvQtu1f&UiimXU#B0xci-JeO%hLqpNuJ^{^_a3UJeK|dPu5fXe&p{D$@#fNGkd(OVs(Xj@5O5i5+cK}`UZ7!=PuY}NpT5I6~eMLS? zNPP zfq)9wSLhO!{wCf$oLcpu0{@Ue49i(ZVGb0x;a3cF#b>bA-*ZWm)TkouBEHE6zw+Q36jG!R)ugGI;6i#i-6U$3d$X`i z?y@j}>HA}@^=z3!B#b?;+hRhu?qnQ##Y?+FgiL)esU|1Tz2Z(aX7owfJB7vmJA;9) zeYvsmZ60RIzSh}KqAns<4G9BwCjKjR6T$+QCCM;saH zf>Ji@_cq)2+Wv}_3T^4fnB?~uwVTuA#$g+GW3xK_M?K~}zBm;~3y)$|<@x$k1){0i za}CCOI42NMQ9L&cw=b_ccppI>3kO72kc_Dc3I(^pT7i&!zVLHaWHf;Y=wH#oq$|=5 zyju3<<<~VpBmSQ{(FF`FtRK*Qt;E}Av+?7W5;YLkXJ{uJmtnK#`rM+$t1Feu8Sq*U z5y}FPA}0q&Zhk)Cxfuqit^k<;*cO9>gBP2f%iG!jxCo8U-p?cs1 za&U0q!Rrm!vy104Qc{Y$i(cQ{0A7B89dK75nuYpse}9HqlUOv_;>!8OMdZ*8w3^(T5dqY_}rc<`o)RkGd_cpfGyb!e28xEzu{{iJhFS7>@{Wk%tY zQsq?_`;rwFVJ7}$Yj9T_hd7rwwClOLFfh#jxvAN5U;QP@`{opaGp7{d1)q)8_>wV{Cl;niF#a%@l)K%6 zBFrtygIREZsh;F;)g--oBXp;pE^dxUA@9#IgH_Gh+m+DfJLlJpmTvV=PMKh3;s=go zjbb#yqq9C{tb%<`s{h)X0rprCY098QTv^yf3^OcK9#*((bI}wkJaeAWIkys+hJq!2 zA8_a^<_=HYx8s)v)|CO8m1`qBc?e(?VMyJ)FP;bo%^*UiyH zg(xo)rM=6=M6cg(yy;8o4m--BLFBu)_;4k5}WdicT5exK^z20@@OuHqotlF+N@~Hk9 zdFO6SaxpDYK)|tl+S{{yERgH;YUG_+s*x7_HGZy_2WM`VX-F?I>i5oK5<_DB#oHMTFjNr z*o)SrvfJuET<(O2hntz10YT_(mZ(C5KyNz!{v;njSDcQAF}G2`X&WC8FO}Q1%C{T@ zyzNCK5)zWdF)gIJHTS*53Xe(Y&L5>Tr?J}rWG9Mst4Go;aR*L6{A$=f~97- zWNbQ})nj4cRnd=g8s3)>$Zs-!XkejmqhvjWRKtRPWosn^0Vui$?(jAL)cOU2Cs&`{ zT%yDlCjYkFNTh$bH$k_V4Zf#^-_GXY87q_|9e|l2jH&#;cGl*Nsi%i zI5NaO@xFJng7X7sHdH$M7GR)gA-P)=(Q}IKP`g-)C29&NJbq^lyW{%|i5{m8Gog|& zA+{OL<^ zSlMZ-8K{xzm9&~!R!fiihHmXotbO}U^-dV`MF3OFgF*9%VsMlP^LbfX*Sb&u9>g6a zImtUo%quB{OKMZWu%6X0%Fz#JHLBIHGxr07iJS;D47pc$!{co=m-VG;RPn{Ic(Y!W zNAhJo~|^3OJ1LX9(Ro$;-G%eLuhv++LGhInF37gGR zr0bntYg@jy`nO1TgoeQNzGEs^#6=>*XmR5QsR4gB@5I&v@kfH1r}AuLE>&a_BZJJY z5<<38Q}&U!3D+dL)FC5nPdS29^F*F!(y2-K{n@rt!|a*GP+h{6!?!gPHfU++-G%qb z6I4(9T1LM==bdOxvXYaa(auMmbIlokbALz&JF{N>PS|laC-1nljhiVugzV||55WO? zUe{aaGc5(jyfa?(9C+rKM$0t{+TmndUuae*x^eV})307np)zOl_LYn-T8u7tXxSR# zooK5bQMnsSN%ebwjeUlzE>l-jaYyK@P9i!II^u{Ae&KwTQg00${ws zQ?=M$;AN2fYSwd4hbli#10by-1dG>!rVe)slj?iHzq1l#31;I5YkohDB>i8oK)Vzv zX8;%Vm(0N#^PBw%L*Bp2wwpTu_&(a&nwoDdP~ls1(FhR7ZPPoR{t}`xLbNhGVBg6( z#_%veff%xv=RJG=wD?!Q zG{2K;1f+(cE-OSGg*VxIz0uHOF#Y7M$J34V0RB1W({LI`V|6yTToKpVKK-n0N)XHR z{8c)&4zum=kRWjGTO)1N>yxy%AZDYxgWJF^e(>e58=^UQ7HS+ZKNG&^@kcqDP{#&J zGx_OL0$k<50S6P|PadJJ|)1-IXnzAHC&{m3wBJRJi2_p(6EbcPi<_ z2*QPoqr+E_8LP3Ia>TiCo!f*wK(~a+6AdjS<=9ibyJW0P?6{9s3!;W&p<(c~tVr>b zhsVwEYPxkUMUOJm-80MO!LhG<)O;-IxR=>})+GlHzj%d? zx{ulJbeYjis!;6b2a2GF;JX?g%Z5aL3(V{AQa-Iqds0TF=yrQ z22w>B*_F$&r;APYvjgW-YnpZ3x<*62`{Sf)E^}7-g@@kyMmZ33a{e2vgMTQ_`343l9rE~olQweO33lF3dWpR z#9S?xeKm-n*N9@hk0>kfcVXej{ve$J;bTq+gqi1wS82r%}iGnd#~&V)+;^II3@Ea;yIirTM@EddoH-s~K%BG+b0< zP^T|-2g+LzwBxy#nB)KU^S^2L*WD>lEP4qBT?4h~Rb%_$xL*)2$jAHb-)3vlH+J=` z&gQpK@|wkQfK1O|oBNLSn%`b$&tdQC$(?^dnwdhNiK}*)d^VUl154jkMDw0oam zw?m?H0Ujpoo-t%|P>NjL#j0*EEoacy`n;q9({Ld^-osu~RZ$@GX76a$>X9*XtyeZN zio*0{H|A!kz`x#J+-3B+ZLRtKC?zDh{rR!ag`s?_AIp0w~6d$Z^%o?P}LixQ`BhL5v))}N6RtL*9 z=Ck9N37lX>$}7~gDP@mV%ZRqI7<*UW4+mFI&US+G@F1+ES}i0_r#U}oJ4J}I&2X~1 zTw{=HdCQ`&d`NY)zA$F;5Kh7Yc`T*T*CKB!%Bwd9tTF{2l6wKXi_H&5?SnZ z2fJkCy{9S{<6~-+gino*2d83|59dD7c#J+fBk{8RX(m359)0~$g*p7rp zN$ZCK>NR4^`WcgKFG~6fYYy4W_B-Sm@DCg?OuNQsPJkstPBD#gcKB*~K6E+aboxB& zpf8fzx=U5W^(^T(U{DR}k2yBJB@2MZqK!jL6oK;@ngwNKvN;MO1ggH^$#9_Xt)TxG z{Jrh&6Qcsrz_o#u!KXg>uMtc$&}3baO1|kLS74hW)WB`?gVzyq+8lAz!LIzc=btms zTJD9{a5^|Mu*Tn!!;iwIv))$xW1aB7)35+@Kut}J9|9Tx|7t6_E;)hor=*00`;*)J zX5gH6Dt<**neds+BQSYIiP5o*9sGe^%0m)Exx$HJVzy``@H!2|k(bcTFp}57J-U_P zBtf--*wP46aSj-!vT#&CaIlf#I?rSkqgCI8(R8yo%IEVm<^Ws0qO(#L(6_dV-GsXv zGrtrk;q4>4hTp%cuVO(8Nio!(CQq58GU%|F{Gw-kBqv)_bu5BZ7{ixUo!5+?N921` z_zw75vyJ5EzuLmVVr4!-I>|g1{v>ioYs8 zY}E4H$VS?n&%La?ZGV~jMngqOt~7yZcdEU>ENcDK7-``4_>7JJHVLA3ClJ?nyDd*% zyFWX7EvBmoC;NhP*e>10P+vV8Z`>Q(^7sP*aj@#+I}oNZj(VNF#jHBUGr7twZ&M$2 zi|dIC|GN%Tlt*Xni!ZHCuIg9@8DF<*!KSi=_DoDH<>_2D{ue)V2mVkxS`b^$nVkv*>~l4qnP2Tc1)KB2@)oyt*RE>qD?jV9RXiEN zm4WB*-BbRD5&rXp7g?j;iikn~r?0Pof%EoGj{E^|7qzD6kK9QAp@Z#1My2zypI|%? zVPK*FMloEAJ;;g!Rcn0Aby#K0eG7ig%udVt4XUMFUFC9; z^@2d!qUj6Ke6|+)qZ9o(>(#=}^OF3qAjl>LL96uln*CYJX=i(b&G;@4Z#r2^ zNvX*b(ZuvxD-RUi4w7Z2r8eK0oZy3KXh!mW$K^XLLJ2a(W2V<5yeUx1IIM z6!YTZ>ox9gWT%q!dxP2`t>;yq-A%Hm|K$SgeHf+TWUnKb%U{Yr8IxJ_q%0x(n&z&j z%&%BFkV)ZBt^&KM^t~AwVjrc{_HuDBXLyOT+D=`}+T3<@_}j~(GdxsC#B*&m3mq@R z#Hzp;b4QCBo}8I!Q;=qa%Giv2iX&UyChPf`GE{4rT=p@5&}Z|U#^FT52oLM#`#;M0 z{{{kUE$sM+&(FiYT_2v`3-d((SBUw~3u=A|WhoUaTduY=0(r@D$L=eK| ze!36H@sEF#tGG~ux!+nNskX88RHQO{K6zlXBiHsEry0Yo68C1q?*)a#-gta`Bm0Z> zgMR0XFyC>D+~ush-16C8_LL-TSz-&+wk`#|q5S&gF?UDNuF;8K74~y9EdO^F`=`Ny zq}Ho;HB9ZC?aD92oKsK!mRG0nXZNEyBe5G_7avX{toZk)?J8UdA4{F9)Lk9pV$y9l z%pswbUsuARi4*k(#Vr-Njh~*E-+pI}-`!_6v(T>o3IA%OIUzouH)zylKyO4Bq+6wS zW4)QGJBb&$H+u7ID?awe>cWJ?J|@v$Ke#U%iyOsL>TPmn2fq9% z?Bg>!R7T1S1jgqj;qf8!G zEvZE~LkPnJpiRLexs8vGXnd*Xx5UK9_EH`%3Awp;yCgcF~)Q7$x;a-Hm5su&U^PaBWG0SySb*K0aFBW6%#F5@^?_O-^8#?6rDzDbFpXOYyqz@vxO%^~_8e*gJ(L?5e2u^|jiqW*GH(pYv>VywH8xYVW*=7`&urf7p==B9Eqlaq(1j1? zJ{&IW5<#=y|90a4JQ1=4I13g_b-9%{VV_I1lnkS(h2cqjyxL zEZ{)8yKA2KadJcpN=3r94{vhI3p2AcQMGsx3PL5TVuNJ-ai=TWS9_}IP)r$rLRDVQ z`J5~Uf=YFed5LT?6v4#6?}fv z{AE@a(=dy1VJJjS6LcJ4oQQN4a=oN>Y4%MUdNdun>wAAOo*H9o@Z`DUad0FgJltCFX$-u@qZzIvL0x=G9Px!-C5X`+ zotaRnEr)Ov+ewnUOzoRM+|~P=R(rs;m4Y z9kV}wS`iX#!j~9A{Mmo##O>Cm()%}1(X(9DcVH^o*4rea$1Q?%HPfzmIh9QuaOAyw z>e?o9Hz2K&5PKB}!2bd?d^hD%&V|`FJNYDZNX_~KHJ#LfT7y%?oIy;kfA>!D;aYcJ z(Y)U1>bta-FYZv67|I#N*H?MSG=h3_}`Ri~*; z-qhl#IiVS4Z!jwR-t%Hey!D*i|a_Ye_i6(D|x$G}QpQI!1jmhEF-Lbi=8n<7j`^*pD_xEi} zPlgkVI$K=$UbeWRKLvA!|9&J&C}*DTGiiSs%C1+e@!ksy!h1PiAiH_|6OuhQN^*Bk$+{9_Yq0aB6vTa z-?{@!7xd79s_Sat{9jke{~{ijKfvt}KuZD;_3}KC>+a!9u|yoT*|9bxkbdxLXfU&` z$gPO=Z1%g4r#7l+M#Ec(icM-eopD}nzXCwGf44LD#twD zT&*#Tw&vEdZxb~6DZ&<=tD-;kkV#Mo3>iy5+Nqhr<&s8tCXL(?bNAEk80VE)*jwqh zoMX8j$(vEsm->+s6jGk_1r;V%`TDhLz@-FJ$jcH+fAxMfEIzm^eauy}l1(LNWKPea z#J6KkE)cSM8rt0~_8=eTYrpp{0$Rjn-jxVeD|vZVE$^&Ce*W7k0Uv3qcx}N&9M{=2oy*A;i?3B(&~kA8=h`4 z@#$<9?cg||`^)QsLBv`U5|g6^g&Gku3i3igp~>QKzBf{dl9q-pzA!-()&K*RsK3e% zk>Epf*W{;wwbw+f#nxlIQqeH-+c>O&4bO)cT}IIvNN*O<%NnCIgkNS4`J78`OM@`4 zV&1)z^Ve$fdO7$u)cr9xZ{JH(u>^meZ`tG2w{M#PA&-scY1=GLWNfyfFwi2Tg@89Y zD;87VD!0Jox)l~R5)aayzDCfw3OpB+p~XC zku>~EYyH;8bh+Y_wB@!$1?dCzy2{gaD{M}SzMzUEaZ^&2J!#LdDs;aCcsMU!R{Xdd zIwZHdZLHM+R(zJKrGlcKqR?M~PYN^4E~F+d59|dPojD`$4mxVBvD8dXOIl)5j6TNu zj)&XKpp@5j`08L&W7`sI*+2tWK8{PJ?t|}Ed#fj$atlkNWFh3Va0`BYVrb^(pfUY% z(0&kUXEZG%KK;Nyc4V@Qa!z_`QZX`e&?u^h1mkelrkB!oy57y!kanJKxmNCeQ4U9& zX(1B!)>A<}qs}7m3vSjQqT7M-Do zks~@@V};hgb3a>7u%|j(E0(-W)|A!H+R|#R^~mSPdB$%@5~HG?{J4=`x(-)B8! zY;U+$PI#sHR3&rjPwCTep)%*vob~XND4#dkGTIu2J{Z(vu>!PINZ%e1bb82w3>*DF zzd#ZLRIokXH>`JNh`~zJHs?HAKew8`g{K(|Mjm`M))aDlV&T4LdcIh1H88?!@3Gg` zN0*$mxZmrXZ9PS~6Uh6_l!nG1VRn4_f0f?D!~S(gFi6ZdJS2XB9g;_&L|6`#vfIT5SJv&6h%c*&=Y?pudGm2cnz+x-V}a8In`}#me?iFBV#_4*dj!i zYP24YmjO{@B+UyJqc5oO(j@kRPZu*vN5{$@wPdD*=>bWUG?=}7F>>Co+hD*?W>c&C6vA~V!)2kYItxK>b1k#V`a9NKgIHy zoDR0D;PER{zS|Mi8u`tjUCrfwW0{&3;)spKbe<)-+=Y$0&yuAg%KI57)=-p>=f=4x3Pv}jd2b&z#5TLnA#pxJE-As* zU(@xtIyC2g1F8s;6vfcOr^LCFrd78n40PqH zff+9s6!6^IFD%D|Z9iIY4dO~oPpq*#8%eKjbhH%x8%-$uLp_aZC+_<7Z0)L?%z|fn zNF5!L-=5>;e3q#=nZN^O0D=57o1f>6MF#H5zn5b5vZSpqtTaJ3#MgLCVnDQTL$eP~ zc#Jc&4vxx7Zwl!h?r1X6J4&xsG3LrFcx+O-!m!jpn5!n>rNL{L z(G*Ti6e?^U*yM^ojQRQ(?pA&>d~GIK!TAJ1F!&>ON2=fV-(HMf%`C@(JOvXD9gTyVWa*?YE@N&U51`u`ui z{7lBf0J)^=@0p{X000-C?D^QRySCd&(QxK#9CisV$Uo2h#3Od+ZQ_Zthr`kBM!$o_L!c0GjIY)GQWWxLre*m?!UEb7#X&vuZ~Nzo&$wwtXeTE*7Xx}2KXL*uSvob>L=hXO6qU#R#<0`~F@ zr-D|RXH2Y5eoR|9^)>Y0J;|hn!RLy%w0&28{~WybK04|uWE#q54`S4q)Q~ggX-L00 zruc{W!}?zI<-F!QR?akYerbF?A$gCBD{jrHuBbP7$~{ZjbICc=}>c580g(tFungr2>~?)9%m~ ziq(^oVRHX(E-&2zYnFFNMfg$Se6A+&SM!HVEs^PW&b2$SUxW!Y8ZU%gc(R>yvAYmt zG{!6aiR!r-daUgw_g1pm+fN{77GOA#d0cDm-K1~pa%tFoXjFGh#)!%&LQEl;pZoZ@ z_d*E)6aZ~?AqUYLsmORgJ9;=S#=_Uj#Gi&2_48$|F7UMiySu2f#ka0A1o9%NKM{0a zF7C;PmL|EPI!cFJ=Uo)FVy)SU?jO;(SM*Og3jdF)w+^Uc{o=icmPY9YrMp2oB&Ac3 zl4o>|YT?`K&)g#;m#spY3b3BS=( zeL-u+&x;Wj{@8J1TBzv!LW$JhfoH#gCWUA`>aFUybpDf_?;G}1dO59{xP+UycT#OF z_qTO(uY$(yY_!~{?a31~<$k8pBz()S)bGB@02#60$P=O^n>fw4|JOnZ4uj+O#u#6G?5+q+z=^YMzY}JCAQx7CpI04~+_g|I|bzE*F8BtK- z;UHk=ocm4E%0z1-Vlv%u6DHoTO;Hz#qYHFCN{V``FxMHu$JOL$kDHJ=r~Z=XS-m=Q$XGK$Uyz<_NGSYSY90pl5(>4 zbX8~ASW_{cA6oTREW&ol@qN+uWA=J2!Rb(cUyIgHR?0*XPxOzLQwjm5toqLy8!b9V zgKkJaSPoa0kN8M&+maQA31n9Jw--j>Z|o~sH{y`LvHIn|EaLpsc7CXtc_em!l2%)^ z=v!&?Ht7X~i_f-RY`hR5nm*1))Q(~tSg5)QsITc4-vcySPUb!eD+OB2zNJtN4NrD= zlUwf`{z>@Eo24uKUu{_b8qRe+tG;MTB_$<#^%52q7PptukGbPuViD}CE~yM!YYX*P z#&)C>$g6x1nS>cybDVthO_Ma;wxtkR4^~Fn?Z>~?*DdmP_=uMaMwmAD+nn^C`KD^IMH@9o5voI-;iSSsHfB$Bn@_?)SD0~+ZBScMe*yzpX(IfFWVr0{Bj}Bnd zO@}*}M$IOA<_L zek56GRZC3VSkZnlEP@z}THi-Ym&+&H6X)BWCB)VS2HJjqvj zbb4X;B#C>q800qc+^6e1rg4zA6{b0?s~3sH-~8o{B-I2n{?@C~;Qrl+0HSkbZ3@_2 zT1V)aXws#DFYAX?6hhwe|eOW4jg*dR@^CYQ=-TUST8K=IJ z>(H2!U-V4F!y&%QkG7PyxDsd@XOpyP?KTd|-SI(qP24mZz4J3UO4|ORXmv%bD%t4d zp|YqM`l|wyx%t_#KZ8T11#}}(rpAazkA|> z$(nYw8cXr}TEF|qednd4tc@07oh&%`y|}!!1JcXw$TidGrlc0FteOe2$3zy>ijVrF zirL1XFu-4pmJvrwfK+{c6`wgL-X)KWz07 zBX|sIat6I@1gfEPs&Sfy1r8HqxZQTmNSY^z7~(3V)NY8(7FrDhz1vozueE$AaUp?koOGLS9?}kP2OgM^3iDvLhwDsc7H^RO*RHgc zHggRe`?bR<)kOQ^R$b)jS-ZBU?YhnTLE&dD0-4eKCd`8t+0eI4+Knk%f3T}}kB^Rg z_*vdVIX@Bqwp1v3oQzrUacehqSQwh+l{pDHxejfxP%Ik!gbD}oKiqqsqaU%~ve7HS zw|#niEqb=#)pYY9au{3u@GO&Q^)Q@hYn>rL29AW^sjmfa>tB{V=SN+7p?m#43%A`Y zPYL<$wL9n!S%z)_+U=XfN6i1Lmv#mH?&uKBRmR5Q>aVu^8(N)_Iv>tff@(D1n z$bX7PRazAEWu)}kvFW`3Vgq`*1ndmE#{RGL`{8NSLfqa!+8|N8%;a2-p+t^o-hS_U z{bP}|vU$Z_ZN%fRYU($rx8X~3H(WI9e*#?uW=&+anl-xJX+dRY^5N^dv4?}aTgyeL9l1Bs9#KiQq4h&b5IkZuisY7` zv#+s$AMb3{4k^> zg34vgEpaKr!}Y;Lgd>BZ$X*ZZ*Dbm8YrG=YvlBK5{77f#(g%XqM7m1&)wvg>5?f+c z7D7lYK2uTtD`8hRb&gybl*usYFpBIcyCQxGb+F+50<)z{ar@G5i^_wmWry9k;H2mXE$O_MQnbl;MSWjPe?A9lE_*l z32|q?5;BZNo~Jwki2L2-{y@PFv5y%}WVxi+TlzvYHxN=fa=T=OFC%35riBvan)JdePp+*K}ab<-| zX96A)n&XS=m}4tuw~AFJD@&3&Vr_}HY!Ffc2N zKtrEUl-|wU{mTKHl4W}e%cttyW@;td;EUl&&S{aiKwzWoUZ>TBvmEYpYJhKetET&N zFcM9&le1l2u+NT5tl-B)Tl(&M98Z7c1xLsRH3PtSLKZ>YlG%?!w= z;Z6T~hT8r0fN-7Km^Dqzua89U^sQdm&L04k;lCsPs#N1l%H#Oh-9z$U#Jb)VB-%i8W`C7i{8mZCK4!Xf$l3~3 zJeZOL7`?xilf`ncY_Lt9eOm+$G**m+F}Jrr8i50ARF;I)bUWqAN@}1VbAAWeEZ?rZ zw#%~Vo`A?|6|HI266?D^$&?`hG|nx$I0VfR0uMjqf=H157*saI4MdZCHIBre8jf4T z)mD?mC%l)^BVYxB=eUV=JFs~#GJ)T3FXIq4cG5X_)1_#40 z{2gDGR+s&1*>W*SYWclXdh)58cBSR>(7Q&=4jiO&3SgEAAKhzr3^C@WS!YmA^ z^kMpEYQ+O~MvLA_y1Yn#GTGW;UQ&Ij_b;5DvMlAj;#LiW4K!vgLdfA^smMU4RF-B%9la&FzBi^{;1u%?hdwzi=5Jw79nIx6+{R0 zE1chRV9{Y;_0!Pj_9HKO(YJd1G9ZqG!i0BNZe2ie-~Oh_hvW@^O)^@t5F5XghoZJa zsk2}=d6FkvY;G_*Lf*G;n#CqL^uJyJZ+h>N685VHE*Q~+y?vnnyvR_VQ~&T>jqwwZ zE5pC+s~1%tk@r5B+;=bvC3Py>v+nw-vHmJqK$!sB%d5LPsBeTThwZJxOX4T?7uMW( zM8}`NtSnB3Ds9>kL#|>Wz!lUDz3lP4;+o0x{HZ=qr0_s6Rc8>pk5DvD+eNK_l~w(x zV2L|*z5{=mv%Rc+A}oqn>ks&>dr(HXUUbQhN|G$Df?xSQX*r%Mp7R1hB^Ui&m8#lG z?sqn1{4}Z_E8#NzI-4CcVPowE-40k`k@!0r7Hg@GogOC|=9N;2-#=0SO$Fckawx~y zcK7h1?Q9m7Rk6MA=Ck|p7$tGsY55&eS&$ewLVtW=X-tTC|8;>**jf|>%T&Tdz5`xTvOg~UI0z0DBx zm0Z5e(WUAYAoMd<<3un=mohZh2j-zht;1}%)SF&tZZyzeBpcfP&CG&5)Q2=7 zbWBXOm6f2@j8#sfKvUz+K=M6V+DHco8vXZ-zxj2t2kC&r`FIq|bO zl~FFDhJmOJ#b7;9c?G{22LG zrl=KeJ^8Z06iCWCkUC-uVLK^%x;WN4)#9x_UMd{vk{6p+hzeUFj8(_qP;Q+b*_hjQEz&Ucd1>V(&(8TH-vXhx^ zXlc`DtI`%avGf~J12n}K+MiIuWjkQI)yQ}CBjVeU<)uQa(f}28Z5gEkBBdH4AZQ)R zfwFUO9>OFySe_^vOg_wzq4WZ9aB#kqM-_{RlqvcRY>loW4l>YOwWOpJ8UFNV;G)Kq zr8jg$bKrD=7L<%jNG>no=TnM8bWBfOw4fP!S6H{s7>iUZxjP~?U7O(ngq%ta7C^39j5d4;>VKKe@2)|eYo9)U1{n@M-67cE z?a%4>o*oN{nYkRBUY!LGC0qg~1C;QA%m(m@fES){8u~C6cg8>xi38ndTJLgv9Y^5nHM1eO1*G*7YN(^!>92|bNw6sJ-$jiN0R$I;t zPfy4H)(2-NOG-$Lb6DHjp6u`U8?hO)X@Z|DlRG{(1~$eKM)t<|=J>}?w*NUGuxH9Lx&C>VpJWL#IC>(c zJ+QM%l;MMhfLW^$5g03`WSBTn@iuS67ila;x*s(9=0;y`>1?H)Tu5)}-Cp9WB64?oUS@IwA_P8>J zgKPAtUx*DNYGt+@y{v?2ljlQrB9mNMUhF!@R{ar8)}}(6(1I;YAF$`HR*$C`rSd_zq=lb5n&fwHzD z)X#kavmsxr!Hue2hsw*PRwi5d%;=d7yfyQdZd$5>j#%@WPN&!0>Mw~MypkHYnX6HHB4UxCw3CkOj#pk@I;3MNUFIR)Aa zy&ka2z^Dhjlv?&}^@%{50>UTgEnyaxY9C^+WS1ILaB?O8DxbdIRYw}wH%y#?AmwML z4)$>0zb7|Y6UO-q}s7F{HWWdO089(BmrSs{n7TPDT(Jq>Ho@) zT?k^O>XG>Dc_2OH?Ci|m-kx)yv!eq9uXLMK(NBy3psrq7MMbYfBzp`TN_()oi$ci7 zKuasFpfEBu#YsjcT&nJRvb;1l#tJ_BhP@F9%z9-71qF$n(ZBUC59ioHDh4}yOtJIb zFL0PJ2#MKpa0i(qAQ6KA7k6iSJ3b)+v)bO&gjnB>9f$F?COOUlfD&PUT8lxZ>92q&I`x)QKM;`Rw z!Rg`!s&t8if!^o1-e!7zQc?+yqUTD2 z=Ff&>)#!4Mx<up9@-w7!yE-!R-92dYWPSoMP&xZ?XiAYubZw!2a)6 zNngCt{Ghm(yJlXkoZP|&I8{e^b1O4dxQ)BxNf-?rmn|PZjk-*o%a18d^K_(-E_(NY zH__ug3~#V$^$pE?C7ia#az;7pxN<%sAF!7@6E|sVGh`;u60+{eqQVJx*J7Uf{VJ-t z%Yx=uD~J*O6<1COg{_N!u}z~gaZXc~Q79lTIdMW|H>hfCf=J7wrS)r1n-fmBv!=4r zKGU8w~(CPxm}x?!GPCJw5#ib74qM>=LgmsoQ z6cr0|u$fUSi{FTUqmxO{OKh}EfimbHUytE9($P*L5O7m{MAOX(i+x+c;B>doYZ`~0 zza6K@70*OfPgkg&bzo*ARh*W$J^hk|fcjD2*m_uLXUn7|-;ps_9i*zwz$Up%G^z#2 zio`P&LEeeQM-i8}AKzqad45oVmxG0;k&#xMzQH1YjdDB_8{=?Ytj0Z23_ogodgIhm zMz22sH)kB%TG=3|_&(rA&+PP}PBSs>AEYjKj)}#7nQ$u{fPi0v{&PY1x))q~KSM0d zCppHUxj%XIGxM5BFz@4%O-(|vO5*v?QVS%@k?s2l)0Xqd1_t|cn%=)(C=z8$Q%IlL zIr$?*q4L-WWzGOjnj8}TxXNTl^a^GWYGz+;0U{?%2CVW_AkM-bmgp{E-lE#Ub@+t> zsDrTA(>D8rWJsUjVPV7|j#me(3w6l)7!~NNP{=joBXWLiQ0QbxJcwOTFp!BFZf)os z-~jR9#aAgMrb#3Vy~OGHXr2}Y zRhKsuiM9I+<)!jlZ-OW66sWa`h?8VtfYQ#H36Dw&QG-HwZHZFbN%N4E&GQ%Sg&IF`TXv{<50Y z+{akjpIu*(J4#Izt7ZjHfQ~Xd>C5_>l_&y0w%2GQxU?6&82iAiG+I4vG~C~raC35- zDYV5HCqh{Smy6Uf*rBMYAZ4R3FrDQNOCqro%UZG%sc2YK(lI&1VLivkS~UW&^D@5^ zEZo>SDwL(-z;#f^xGyVv7tVW~A50$X{ZmIIDdK!Q76w3MFqA~Ukgw+jF9Bw z1Ong&GYHSK%tHy+jtc}^ z#>IiTxpMQ6Z{NBv`pgB>ZEb9Z*X(W|aoT?MfvMwIVT>;1+T3}AEf}P4=Yw>Vl$750 zmj+dH{a*&baD8HWIwCyWx@L$QD?40l4}m5G z9KX!Q!}GIugL8S70|gaTjRxzTM3~#};)&Qkd1-0)^UYw;Cjv!@Pft(DSs)_% z?bY)dd(=ELIpEIJph&FNKcPFS_(sSi(H6tk2m;qcnI(rg>*^GX z%L`wB&vhu`)sR$3r7IX@EO-Y6yt210m3b}SAUYEZXnMOl54F92X2?&}`g}797gd;S z2Q`U$-*Cx>pscKi^F!+@>02-%%bSzs(hGZ+{eiHLilq)oo>z*qe`;FWNKCX52Ni#! z5b)&&aa`ZddG;5WhEMFrtR$MQAcu|=ViY@{aJz&g(s+#o^IBld?%=n&SO7Pr!@jFL%QdCsMKMCE@=gL4CFJK#mq%aM9nCCLvBh6QS1lf zJUQjQabk1C97nV`6_RZ@Z2$$XM>Dn%xTy8U=0DZf`#dVxnmZ%+a*x*M(WOnY&>NOl z2oVI_kW}x>No(B)Vhf5a{LhVzO&BR38sBOe(@c_(ppafg_NG*Apbi@&HqG9KX<%}c zcZV-HZ2dSWDU{sN)RmWERxTj4{MaFe@^e329$@pN-YSPH>T8yHUh0Jo$1eX8-g@?;2c<eK z8ryy3t;ccBW5MKLb&`kT0dMQPuH zH15czyCq8^Yi}R2=Uf@0nT@9d8uMy$gEdCx!_)7G-zN8dFxeZ|5F@%^soqF8xH=y_ zJ*T|rJ$(A%OddSKRzbQmQPV0@w(!Fl5knj{{iBn1^Xh=u{2IDUjQ}+&3r^c|Qwt*X zc2{C-=Y&)HgnBVf~=MiBrrU+bYnr_wo#D}2+ zb)5wlqr=u*%UYWg*MsE^8PS#5j_JOdhnN~0Uw*6QuMBJ)?!RXYpX-#I_*!I`m|h7b zO#4}ECG|DAEIR3(Jlvo?Swf*Z-f=wXf-3`w*YlCIVD-((pMxm#x%UfBhQRmIOQ@eZ zo@fWXc^DMs6k8%90%lvJLVphioSoAbAcFs)Zp{&1`LjFjXcM}FbIqn&>TwEgp#UA! zt_AQWgRdO}l6Nuip4hSnM;N{$^K4H50t;wB;D8_k>Z0bds8^%N?R79vyBfi*+oVD)6?pblBOVi#!Ik`)+{+Oaqsw87fefPX(0h1 zHb0&SA;@jYGkCUs)|Cw&qGGG+Py^2 z%*@To`Ztvt+C4sHeO4@EY@7=QSih)2E2|>!b$?k17Dtqo-vs<-;o$HBiI_S%#6g_~ zEO=mu3M(=@KhK62W=f|kbOJ(jK#M*yG(;&q-qG<^NGKir&<}4bKgnWz0OPwj=RA<* z79JixF+M(PRU086x}mVe1V(#~4ccI?)fh;U$nG;@<4V?1QOQ=M0pAj@e(@vUp-hTG zS;MxTrx~O!_K$*8jgs>6$nbDtc9XDQT(#inDtr4%kT3`W$O;S1Jx@Zdl5Yg*ohR5WI#b+0*1P2^#a#hea=gj9vNmy^ccL(X^v(-}0!>Szmj%!V38 z0H6W4*9#|QnKoIm7+nxY&Dp1Y*GF=_6MM*>+orFfW+2m}@6n)2-3_LkgMBr<=74#d z)63YHAMs2fjdRxdnn({#Y?xjS`PQ`4qb;A9E>?yT`R$C?<;0*ddo#5x=^**nEj9N( za>)^D7ltPkz)I=UX=jMiJ165v=;ImAa<$_3Uw)IW@NvrFHF!)(0T5~g?>3&qF`ud% zb!I({hp7~Ma5md-jMyFi#jA(mwsE6W#ouwXNa&9T2>t#i_Yn_$5>bh3H!4vB&wU(G zulu3;Mi$ujMt^SUiczB-4N!J%)pYmm;SO)gBgmT1wR(>-YW#vVw>hVtwh8v{hzmQ4ViBlowL{&;w;@t6 z??`-14p_UF)?jOeDfX(7_8p4l%#n=aCJ=?4IQA`s!`MQ^O1Z;Qz%y{Jc4d?Vdpu-`Oc8Gk1{MP}CyoqUA?7IIfLR>%d7VzZ5s*n{>yu(h3zWx7%%MyjW}I?-wE6HLu;xbM};GSK0X`ebU!+Kh=U%e6FGpG=OD|*N zOoarS_I^5T*9}ba)i%2Pp2kL4%a^X|Q6QqA4Q=uGk}&3vU7N~E#$n!{t0$Pt^lXwc zGoEYptCi+@cw5Nzo;%h(D{5=(Gmdq9Nzzt>9qqRSPuGhTL`UD$Sq|;2B;<52wYwqz zJwZd9u#G?NQwmk24=;|KI$K2mP}=Ov^<@6|)PY!)z>4_mQGV=O_3bq>r^=u6O(CKC zO8vQBaLmh#b|gkTF2!QwbB5IWo9>s0W;CCb++yEri^g!s#ro1PfW`K5k1N=uf(_ee ziHA|1`!VRkLCobb?!D6X4E+fj47x8OoG-9zlV924Kw!zBXh7y6gJpvav2byE zW+zQ>LZ}KyGd>XvYfvn6_vV% z2KSJhj0^#<6U{f#AALq&DnHR+fyo9mLawmxFFz-(tK{ zR#r|TX+Gd)D3EAe8Rf$v|2mosrWE?tc3Tc~y;wMSw4A*jsZqQOmd^=)!t&0H7X~pU zLzg0C(B8Xv4_+0(HV*+#PZga)gJ+86ND3h#CTtp$#DH6k9b$JJ6Qp)V02@Eq`e zV9Qtj`u@IzjEr(5kuS=wIuomAGiSwot&eTN@RA>C3StKIwif0ON*<8q(A(%Gikks)`%{E6=J6{7p7AMI@S z^Dj{fW0{Rlx4$cm47rG+Lknwe?fLdyS2Mmoz(~$FQ zXx#w9C%9Ag_O}pWz&DPP7U%{L(k)pux9lIxA8K312JYCQbXlt*;936-du$cK3@>2U zpbZv5!9-6*MFp7n$;qMcCzJYhhfxgvTv4Iu+zmEyaSOD9$m??v-M?Gf{ueNx3vMIj z*u=!8UKuDv#brO^;&Q#nu#f`${bNDEWlJ1fPI3;o=I7VZ9FoV$oGiCI45HCxkI4Kj zUWvRIvn7J}9|q~)t$aR$jjL3K3}dmuF@H+Q+vlq5?-uAu2L}%?Sk=70I669Nx($zj zP%LOoP@KnVn^R?6|noi~OZA8f5 z{c|=|j7$jtn_H-v5>#(wSzhS-gac{+q3G5~(%;I{FeeuFg348(0ovT(&`>_pUV3tGFah~UsM~va=l_c4L=|i)w%qP zp3fzREPLU4uTX=)Zgn~+#s?liE`Q&8UU5>E7qTX{&$Z7$5hwkJ@EN0jZ)u4*EH_c| z*H+g8-IwK%9VXd1)CxiXXoZ@EW@$7A8cT&)(A$aH!TCA%Az(btucXf054s#1Eb@@ z%2C=w)yL;JO

0)Jy{|q)XY$Ou?`eUA_)BCh3;FR$LCqX$m5wo`Y#X`S@2oU~*xBW09 zB;VJT#z3+y;1%^6_g2%?|N1drT^ay0Gn{o>X<|!qva;2aKaH$Se7Yh&Cp=o@0KinI zymi{pcf{OFySm)`BO%O^?f1U%tUuCkNBloW5|n9{R{i92*=OJ*q(}Ad9Y6`@Idk+9 zS^7*geG?isWvRQmC<6M`WEpH`Yk{F-LzEdpm^AZup4Y__Wj%W!LHs zIVAIU>!{#)I=;-HpNpqtB4~J7Ir(1jye#%~S_UUBoPTV+$9;J=_TFnE!Ty_%s0k*! z6Y+Kb4BgymG~1T^H#afV{ajNwJ`rtcCcw~ay70Zjb}{&5@pXPE<_t4&^|Q_igGaj3 zK5_uA6_QV4D8=6REc->tNb^BjTS;`WBV#$V{{3BB2;W0jeA*uQYee@eS;xq46NCL7 zzD-)^@-N{kh{EWw7zGr;Fi=Loh!?{F6wtogV4@7w2vY)kiw6LKz$H`x6i>#lK@0#5 zAceq;qGyn-$^(MMqfY=# z4#jOLeQe?<54j6XbJ@?F8zMhofv+V7%L8?&TShsHpKd-_Oc`!!k>Vgyq8{`ks&pza zGai=Hw0bhoL`rfJa@sAnzCUz-PzdB)nhD&l_aZu)O||B;Xp}!qqIYP#ufm6C(L;5*^!`-I~(vVNzBQlpR^8`}`znkYH(1*eKA0k_1*~&-zGSCIhKn_em9Rn$_07MT+0k*;aU<5M? z#COjLj!*aQ-wnCj}i+2zqYXXVi}%#h{BU+Fy5*Z+gZYJ8Mr*X2UNf@VC$`A%lk{OTU+X(uTk_VJ13x4I{!$#I56`8yPVApxf+@H1D!ardS0 zt%t3Y*|kblWvFb4^D2%45k|uGl;m|M<47Pb6d-M<%`azuSC<{l`V{upe|zEZ-#jZD z7IMGRx!Guugs>QNu6@O2JeL6NN&9zHzUEV6BEjZP9f{rUY86-8PpuC+H|yBa$JW}8 zwi`{y;>dLAn?{fiASK%#FoXj1aobunIJz1+6(m*uN_XG0T}x= z{cHR+K-KOB=G=WDh|Cj1GjnCibnWT@Z#x07#VkJ9c5KvIek|@5T6Ab$EghPoV$frr{X=B* zI$fOO)Guff4GxNAX?^@_!ZKRaNMFSdne4PqRB`g>q>`%2M{q>`x0dv_@fFU>kSZ9_vXE{vM4peQY?prohq!HXgx*1U*EczP03<<3tR3GXRlqGKTE zXfmVMqlW(aXu%5Ap_NAD_NWDiQS+93bdm?P{98s6Gfcq-GSmlu2c%4lsnb`SXoP?n zwmEuMPNSH|!YCAwlZOU#NCPKQ33pqE@ZaSg_5CRWXEue@#mv$cz9$J4(Q4Oi1zgPR z?hV-75VbeNt8hVCS-httYOgvP5~vbiNZ7MLm`S}je?SI|PjPJL1OM-@ZucpdYgY2z zaXFSlcqn6X(Ny>W|56dzqtXYRCVIBU$kW|uQoOUz}Px z7~<)QUrYII7K~{n(`P5|Izkm86%js&t6{~e1^1=tZ~{{P*}2`6ZgW{C0Sr3Qk8mB^uO<`Td5njKhPh)r-(q(r zb{PP3v71c*76L6y2Mt6)BQg*WXM_f$LAGxvRw8ZR4;mApB297}Bdfe2Y2xO`0an|5 zitVF&D(@Cu7ikDFF(o$mL`1OIk`ki0pY`CCpZz8#1ub1{BoP3YJ<5vDYCYdjI{B-|04gYUk|kIHZoJJS6;@|e(ojV*lQXINzj!kc5g$9if$jj ze%!v;+aMac;%TRsYjt+H34|Qp&)7Dor5rkXf`b*#?!`iQMVJ0)YbgEbjG4f90`wJW z0b#?6J+?~KB1N-g!b0SM(0v*dUO0$6H4O!jDs2jpee0PGR0q)^uJqlR|pk7?0?ts-;0SJmSQt){J3eOGuyvPq?Vh- zCOtNTf1JxG!x$m^=J`f61opd;^ZUvN6Kh}Xve6pS`@MT+p9d=2}|HTI@Q#tL}31uNI(s3s7j6Ow)~o*3FL zsR#&4oDMDqh79vk&0{IzwBYdng|IagklYPes;G@2>j%Fm7(UVC)4Of4Gx7lO>Og;R%yvZcOKh;Hgtp)Z7pCq zKdh-hg0WPY4vk)Ys}qTT@D#Zs+xR-l?f5pLGSC6Q@GQ~tq`4g&${}=pGm_TQaDRX= zcT`+-+_FC!-a-O9Hyy>bmgsZ2W%IJ35nUE!qpkB0=R$xzpmiZ+!vWhhp*?z~3Z_t2qGn?h@+T zG4pj8*x~%iRFl<7w?Z2|gY<|DwLdtqAu`+f#zuY4L089_Whvb!T))S@Vi`%q(nA0x z-T8Nj#kRKjfgOX;#jjhc371u2zo!X?6y!EF-15{EV3pXGF8|OS`&l&7#ZWgEV8vH2 zma;$E`~FSp?~77!z}X{`sNToOi7dImIzdrb~FY^+6AS>2ZR0cgyf=ib-ptG2bGsDYh;||p}3!sw}(4dZGSG}kHcxJ zJ~bLv8qSv+`i9Wr*q3B3+gbVvU#J22l-%2Dd3U~n3wl?=1 zhO?xv-_okBELVDtUH1)~2$mXaqCTwr#tu!_UHN5o7HK|J(i-ElaCbExTd&7F17jDX zV5L)^nV}`?a`v;%WCz`mbZCV*n@xs+g@DI1n$O-=rMhCsfrX+1-sdd8-0OjP0^Rg% zc7A#(n2ipokV_@{pb;;(l0d0%@hLLpd@E#yd%yJ7!}A(>B5VYukgg%GG{$&uYfhz1 ztz{rE0Dq}gL4nPAC7b&W?H7QTK_RZ?(|*iEHS}~P zqB!EU9_xnlKhD#W*%u7V)Va8{li@XtI@X@?@QjIxi#Bq*u~9^Kk={ZJ^k_)M6tc1i z3M{$tu6B`TA%hR5XxC3ZeVd#he>xx?=~YcxOLz*xzPMNI&SF4b_XiHlY#$N`rbb6N&aYZK*8bnW2Qwb*2u4wcw%}<%)XM%Y zt*&7d^;*YzyhcmNUBR*=v8=3h~$08-LeEu)Gl)=KruyQ;_B$TU5)Zzc71d;REy+dkI5XHBub zBMA~R<>HQ|;x{%aX1A}i3?-N~9n8qY-#F%;)QWEXu|!Xf=8AN;81ggLOubcWaXu_D zT)?EVy4&68Y)kF)=q%?GebIbL6LS3j9c}Q{s4uig_UJAkEG#hca^rd66Crr(y1wU4 zn=oQbeaZulE>%n8H7#&{B!c*ll5#uJZOt<`iH+}XeKEhTSy2!gC1K0rgUaa9hc0id zmqilZGBw6kbDH+~Gw`eE+ZX5$QC_r-p0Bus)<;I zB5r$6pk{eKzoq~b?=<12z5rx0Vy=Pw(??sAHI*fA6b`bnXE1G37KJppcUOOB?Bk+R z1q^=ygv!54VvrT}$rjr1Cejs_t_LIHmp`91ShqdJ8L{4MkjuIeJ1D7%r^+$~=Kpw3 zZspM#w;i7(R~1*+|4>`F@XC1Spb^DkR8B_f5xrMR$jCw@^kuo0zOZL<-n6jy*1R^w ze={n|0vZsmIv>1~qkk9LfMe)Oz-jJp&|TUtmo7jPO`~BH(g9QVXvZC7DR`X!`dw<@ zXCZ`b?sSs;=sR~$D&j^WDvop>r=G~PcaoZBSUGw;@e4Nh_`*$_?S%p_IWbT7BcB9J zN}5d0lUlfA_(*7pNgz$%2Jm~5(Rf5AO)zmy?&wHT+I~M{Tk<4EvYv!!J<6~ zOzls^8OYfH@MkvtvHNzJL?0d{_30Qg~%lf?zNXooz}O{rTVka?PfKjo*^gx5&EnK#C*%^ zWm0tya`RmVX}U}{kyJp&Bhqh2dJO&L-P(a`J&U32b_#QMo!R6L%s zS=}2yi$s|yBIw6;9^2cgvF)ljpD4wHsQ1&?G@t3#g+-}e=6Y_Bnu{dOcr6i4qF+k` z3k~BrPDa~3vseY6=Z+#z?vywXbE3Kdj{iXiApet$On9i3q~~zIR7~1*#dE*3u|*za zDq2@I@?NK1qV}W1ZB^o2Rx!P-a?YQLhI3WsNL&G*p|bKUZ|}oLk{kG$jvIsNw@dC; z6{FW{=H*H>?=c@>G~pQRTzFEn%J6ZN{St-%KdWwuR*exA!l#|m)7+9ghKGpFiJkRpG_#hcDHbERvpDBRX#oc!`u zgGQNg_mjjl-(!u%-_eqMnY`(z3v(4$T}qU=F~X2BdKM*Okn!AVpA0*TqvA_-Q3xui zz1;auXbZ9+}Q(yhK^_RT{?fb^RLL6i5@{CQHa z=UYn;?Wc+C%~s??04OP+^S-^bfk6gh*zPVaHw~cysO|GZz2~V^wBHn{Vj)&%VjIMv zhHM#>6GUet8AzV}4bQ6lUoQyzPlB<2Hqcr1pI|0a0NLZLw_1XW;1T6yv5WA_jVOZ- z_YT=+^$B220pT(Es9A^gtGlnRtt7~D5xWzUuf`)XgB*{rwYMWz>Y9c1Ge&)oj1QE# zo7p9A5*?K#VcHm||NaK@f`3&78#n#KFUnVzlp$!F@P*udg(Fj|JjKdN@6;Hqx?5N0 z5@_HD41C1>{KDv}uu_N&guNgsasP~$syCif$>CTqv+O4k;EV+bBVycmrGH|CW;|eb zTtUAb%J7P}?Q=)rn$Aaj)BS{28AXrIQDL{>b)d9b0lLC_^3TKkc7Ku;q1oB6lY15y zwT8W|o+uy-c0A^6w*Qk~q;WG{(z$eS-VRStgD9imsdCYfTRnG(|NfB6(esF7A+r;c zqrWM#VWTPt`?g|P%Q0=v8MopOK8O$W=WTz5nN3Xt$LmJ^AN5ba&41TNZ|1TWDBuZZ zA9m<6J75;3uH*cq;-MqiH6VJ{29u8?i>~=>9pneVr&v1(>lvs#(QNfqQnQsT+9tuy z{_3qhykaVErX@MytH^?=Q_v6HcGAS?4rL<>K?0WFT0Pu|kSnib_GaV`nYJ{c{rq?~ z8tYrzW+3e2&-Wv1JZp6m?Oe2BcH%A>JatNjuk%u4`jNu^Eptsd^!)N(d*h+0lV0d( zn)(A{Y5)M~OD#1j5n}p+v}G!>yGJ~~VsEEJ`>YUn6ra=h!CIX{!DvQVdlPM9auLfR zV$``eYi|^dGwgq%X69z2`|(Q*6v#^gpZ$N;*>CwyjfaE12wf&ryPI7BK$tOH-Zy5= zT~)of?a4t~a^;QAJLI11K2v}ux5L@e%o8eNk5BLUVf$e=sSpcxc@gg`@0aanAt5{u z)-#I?X$I4&8D38uP2T$>6KeuF#lBeU>RuH9`YtZHoXOP4oGg6d__j(%P~Y_WW8^UsJpD9}NU9f1;Vej2k+Bg!GLGk(~9J3p3#<200zm%IM{qlb;&= z2f`})M#U5&1VT_*iXtCAXdv2ix<^8C%V+vnSWUOp3CjS>?HmN?H%J0=m4cJIKmrHj zuS=e*!?)(z7u$PP>AZIK{p(0ZCb<4*?vN=zQ+|^rmV-oa;nU%kFwn_m zNQv@Rp1YMewdSbjN~1v$ds_Yd+*uEnC_@P&6DG3GeSe04#n3LH$kT$HNUvi>u9K22 zCx9d~NAz<0RD82hSL$}M^+~>MW!d*$6o>0baLpM@wAPS?{CF#0-}g+rqFcy-&8^tx zF_;J&5XKIhX;@Anc(|uvVm>e$&oA+$Dr_yx4e4LmQzHW`-V$$^sU~_elap6Q zO>>?Kc}y32;kFj6DOOqwp~<4-tY|xs>SUO4tmhUpl`~I)M#WRtGP>pN({nzUGo*i{ z=L3O#pn!pyk-`^&5pXvD$;jxkR1Iwihm0tTl3vvr~LUlVb&1(zh>tm758U zfggOt-RHC&F+o3xRyk}A<0rau6>ZqZs7&8ZOz6-A;piW+Sf4w?EOZ4`GpB z7kDWl?3DkB!XciG(tib26X%>XSey*+vIj*Nd2v17Fo2w&ly%StSy`*kaX(uw#{zY6 zqSjZ*207HDjDiw6(RUWT5zRuTFE_^mcu+t+@6^kkaRzmYH~m+!mgOGXz3H*egT6(M zOYxKl%20uJ`%g3(IecHY-JH&BqA(F#pM zS%Vp9HtJSw?#d=MD9BgBdx*2Xjb66Dg>_wzC!EOpw0ZP2YOdl^9zj{`FvoN^qFej4 z>k${AdAc)$&byNF5j$rAJ5UW=oQ1?p&zYvQcY83QFUC-UiDBbl<;m){Jb+Hc(-maZ zz%@Kv=g)O9>+-}~5Rk${eDABDe>eByD?bHTA|n=`GvqG8uB>?&qlZHg@Ev68A1d}DiMaE z_zzxrZPqoqZhPGXw!&A^g0nfK!0ES?9 z*0FFbo$Wiu<#TMtZX(Y2U{hpGrZm1DkJEcxDD{MU4jxsJJ$5+Z@r!UwXe*s1@iDuV zY{7ix@gW@eoo?MU5R_k^UkE}Uf8cC|-6ThO=f}&>%3Rn0DGMbAgmMxf07~D3du+5@ z>w*F9@AcN3$J%#VboH9!mvFx&^}-R}(?=o*7o>N7ZoIgpJ&6i|%p5>>j7BH%VxMZT zTk%nAR*FU91pD64_aC1L869JrP<>kk&mSV~s}>1#-tOAE{0`nM92h5j*qN!Q2(Zszvl42Q^sCwuaYgd`DxH>`IvFp9 zY9sR=nkKFi*Tb97PTKzDC4?V%C^916iQ8IpBl*Y-=NI$wz9hiTijooE_iAMp(lCr^ zw46&Fb+_h>pUZ4bZgec(e7llxP3NXVE&`hP$j5GQv2pFjgj5X3?1HJ_$4+lY{LUU@ zN1ztkbTyNMz)+-DTEm~I$GV=K$qWGi)bGhOPP=epJ5JaXWB~}8K}C_+8Z3bT@p0$YJliy3NMtNtK9u>$7`tr z^1pZ(X;_c}OP=?RahhizNp&7B41d?gRpMOf?-|w6l}ZTMG`C#G8MTP}zYce@vC|5} zn^P(8oDlH3&(1jcs&g77F$~_B*l)Pt!8s0bYw2dV))LB(@aJa_O?y?OSdoL=eTGg7 zhxo6$-m=}bev?bjec#v6I zmBnS{o0+LDwJcj|s>1oMzA5zG({YXR>(nbAqxxOYSHQH7XX zUR-+nHFIm%RxLH^W8Tnvy*BmOE}g6>1)X{l{#{pJq$Lrmgo|MWvZ2!}8y^d2&}<0)9Y@fn!0^y{wm`zu{LGNOj68BkBDshaIApKe#?)vS?W1)Lhr`oI?IRuxJl*twR#iysbtl*RmR@$Q%MQ-2%63;lY zizmKVN+@mHtS}9ax|2Zx7bfh9OO$!)ra_M5JA0E*faeu=A<|^7oh*J*xRN5RXK9FT zTi_M}e=aMrP6_6=4JMB2PcEMQ;HvfO5wE+Cl9Fy!>{i6+9xm=W-6JXATNa_oCM2XH zk+Mmh`*|!yG=>Hp;o}ueCyN>IUspHj_RLOx+c|j4CaTYT2c|h&-essQ1}B`sVvN_h zofN!Sp}cc(s8*Qi1HYlCec)Ym24XM^U|QV@)q<7>(P_*bvkcm` zzq4u{cp`J^aVdlILt%9Bu~Xt?pa4=~XGZeE;Mtjx0*~-Pjsre5Me#Z&TQj-E}Uo) zk=FkQ%P2ouaHNWpdpx_oJ0QhJ@vRYtTob(F?2qzCp<#CIWlcH^qC?X;lKBRa2hu<) z9XRaH$*Lzy0TU5gob&c|+5L6QJZV~S^#MFwUe1Bu={0h+a%?Xhi+$gJ2Z7@0u+>O# zD2@#4++SLbQD_#cp7&i@l7u$A895m>2eqsy?d47N7s z%W$^dyZQJQj9}Pmv}TYAr+#H%=wGltsy1x92(Ot6bL=J;d!|d<7=4h`i^1X_v3%CR zMiy|2t^_TI{BWMJ)bSfOWrKpvVAFs$08KfhtDy?#LnN0r3i?!PmvodUwKQ=<7qZZc zYb%OkiQ6RMB`NER{Y$4ZQ{r*CM#9u`^sowDPGLVZHRLR;9GbpJc!-&s{`5?Q5#y;s zgb<$8DvG&e@KHP6EZoBx2YcZUkoS}mp0UwMk&$>Z=S|MjvC*iL%?r0Eg`zsYatc;d zY$|-WIzWUT2u6hg0zN~1H+41vp$bcMP4!K)N_^5%V+=llZ$ZZ3Emo5?5Wi@b>85Um zyDOFda%hDc^OJ9TvLHI5C|$E4+w!0W!1vKcM-u zdtI^kQ8E42jCGbm?oze;8!LG$2n_?rOS@`A|KgQ@QJ%&AFKnzpx}&5W@*wKbob zo3_hNYgh8cNn1H=Oc+?yjm(mp@dy}Fe1MNTk`5K_&v0!#O;-RE!Vi2t<+dMIQ{s0 zVbR0yxbEe-&+C5d^-A)pv_o}`j*j`#yY2BN$F%{Cw_HKgz`@FVT*3z&t<4C*igfe? zBrdG)F#ykg0x3k7cmLGa_0f<~^0;q@g>xGrh7`)h({uL!;&#|8T%WB#(YJHxC{hR~ zczIvoNU^WEoI+rsYn)^LoAxOua{}!1!yP6~#>=OMnY$;vvuz+L`v~HlXdO-631wbmUf3jqkF9!t-g?s!OXRS-Y*vFi2gB-nrAClsnPKCMkNt^`%=5N|Cq6V&fTS=M<67rZYO2><{mrfHZQ_>gQZ z)y2v>vxD*b$=~q>T#f_)S3kGHwV#ul(^l+<^D&=V*l5>(7WM@OLm(NdIpO#+g`LYq z0zh;WTuH@|vN3!W_k_d5kmJHWMd^{E4VR&#^Ft#GgCG-w0M%rYRFiY;jVla>6qjK5MvDAfb|}Q`f2I3osCTO z@g`=(vrr?GxvH+3;jy$;@{5Y8oOLtR&U&&Do~Uoxrw!Y_tTA~qXd7vCmAVpAaDN}6L6qxw+`fT;x9$sfA9!6rMhM2w^}fqc zQea8Uu#$LlQCf&TDw5<@!5lufK3;*l5kHl>sc`J2^^YQ@x2pYN1Drm=;{3Fa4|^QWGArKWp9N?}M`*23h24mj=K@h7g^$al7cNvMi$D+* zuy0h%&&d~!ms)~kni_=KXwu8Rus8xnZyVY`26QeH(qpa)1B(yeTDxvyVeBn4aR8*2 z&iKNrO)0PqQ|?|XyEWP39{K`Zv#gfXat|tOKQjlWZPFzUHB;dUTQ`n&Vt33YhG`4E7Fa*vNeTLIau?XR_G?MYA$p{ zQLgqHo4mqnzz+{rV`{&Weq7fWg|U-RJ8NuPVrbzX?Vte)2`SHX-_A6Y2O*v51!N*6 zW^nK&G=KMbZsb5}`k8!U@?mj|6owwkXekL7cv7_@ks!i#!j?Ec-Cvxn{gsr@TB2Jp z%Bg-jE-|pMkz~I@VW^FIJL3G>t+PMH{ZpYvme}7g z?1`e2FJ&(|tbbH(`o39+E9+0oXnp=!>W*V+WqKR`$sAti*kie&jXYcQ-fzx^TGuM3 zC7}W4jvOiukqZ^}nqCkUYSt&L6a%XGOAkvjZ2ujF|511!*1e-ccV%X&xShVw^3J~` z6^u68INmWMmwpjo(4<76-#d8OCpMG*Azbe3YcJ)|wa$0W2{}KaFr#p{I#>7H;PTv` zm{u|s4MvqGqlz;)!6eZ(l~-&sn{St!msOME-o+XYQ*#_=EF-AUG54Et+a+Ur?fzCz z8>1e1o%mxUF0L*RZ?dstarL=>XN67DK*#ah?OyZcPOb42-Cl6OL#x(c7PHH+w45UC z*_qBqH#Yk7V!wA()IF54fZq&wc=$WAF!y9WjKav5@A=9i?~Hv`pB>5S#12RZdCgRBI_*RuP_Jib;MC0dp#5-X@*rQeoXI zHTbr*gR6Z>GdYbVtA z93Zy$)Pf$L*lI{Ov$9=L#St#B`wgeey|+6FCX|)|X^UGWii_a7GqiIl5PF|F$Y#cj zH{$MEchfX`(cfPW3DIu{zF6E%Z0Iky!AWoXIGqOxu< zPmQb+<@H`U*eW)*$>*`O_Xh{U;4tf;@CA#eABI=DKnT(2y!VMa`)aJ+1-JepR~LUC zsp!_ZrUM~i5)9F|^tdCw-*~uYmZ>V)F zKjl~-vi0aQGe2`+1C`$;_Yyw?|Lq=*%PFg>ny{rnlv7mku@o|K?%KSb`v0;>%$@il zPn9SOdWQd3d>Z%t_P;tFqc#HMr!K}-t&>@X5^%kWTdPcbBJ43um43t z+mG7yxl#4H2(9mUcEkjVEd()%yHZ3=Nwcvp11TCLYg${MysSs0awsnf6l)NO?>y{I zr^P-f^-MqEU=DiDws)RYIag0MavN8>)4st$&@WWRM+iCib5!@MdvPRUDo74nUA;&A ze{D{BSmCOR9VlveDc2wpvCwesbY)1lWI!Ioo|7n5=Kb-J3z)O!)=`qL{~P4D{e1J<)%7isio ze{--LGf3C|&Um)3Jh zz^9GqM>q4w$fw`7dnzuXYC3lh*SUolK(_QnMdioJm*;GQDaq@YHx~xMnTx`A&w^Jg z>hW9p&o4JFa@0fxeh6$x4j)l4^gI8eZi8eK0aWoF4o2FG#MV|^ZH}=D$sOWu)jZ1X?Bjc*r zUW^^MLUD^4H$P$y#z(uD-LOWDX};eFY@aLt)I7&u)u#m>)(L{qUU<566O&Fi`{JEGPa*me z(|s`Gmn}GyYc1-+?9;WUQ-gzHnNCm3(FC;z*!A?a`q$UNv}KM3f)Gmm!qA+^ z59pdgKzX+I>B42NmP8l$qbprBi>ou?-atZI%gL*?L5ZwGf)!ghu@d*q$uKFT+JO`~ zA}uYv-W|flHlmbD3XJW@BHFs#ZMKk~9u|m=EP+YF^{{2RCIuaSGt)hjd9L}MLE0)) z8yZMhVDvc$H??KKk9sV4vp-uK->wV2PLx`#9KdknmRB{6p~`UMdfqI3+Q!iw4z;TD zbjHSOUz+}aQy_OUs$WHYdr3P^|J8t~v6s(#VX(~jgmvOr2o{{s-raq9tW6XK;1@WMLSJC(cA07E^+h2nHSCgF#ALB2v!HS{ z@aAZ;vRo6Qu6-x!8<59_Byk-ti!A$C5v4IyX3{)@uQofvU;V*(I+iv5=!2s1Q?)G% zVAT;mB%jW!MITOHqb4r|BOGMZcw+XntE=QKxo=Ks^bu5-WpH$|wn3|fS8p47R5-G) z;(7AuWC?4B6sUvzPwDob)P3AM0M-T5+R2~5kfxFQ;SVkreugZKDVv0KU$y}C$Kvxz zyO%gk%qmi020Cfm0~^G5>{%y>czf(^V|%VR{!?23M1i$9lHbm@Y2#g{eS2O@$ZOdn zS)}$!`-z-KYJI#Tjd6Lj@}Z}xFp_K#)pHb~$OnNtH~ew0H`A^{a#8J*z&5_089ub?d<$UYkzacI8Pxl^yAU6=DH@A zBn~a4&n*AAk7jl|s|gK|d4uL3y~~=q^nLR?%5P;mzh#=i>*QsV^6m80(M(=H`ebm{ZbD!ZmYN#@De%DerK8kf`_8gXM#voUuMKXjHC*bV zQL^j^K>6qinCRlxH*2m|Guh07>2Da(iTHBWYaO1Yk?#$cVKH27&^_Hcp& zt7hw9_C*uwwxyvkwgv2U#I%foSWg47YyUSy0SQn4dU82q@J64l2RF70&J+4baAhYW zh+6PqpoE9$&1IwMRNeu?v0Fkqpshss5$~%zzs%EX+?8Lqb9hipad`yM z#HOlc@E)F^&Uv7LsEMMKsPMUt*`kq{NS9*#K*)KrjJY6>`t+k-vL6*v`3v=|!Vz8Q zYX_X+g1 z=kSH3IFU!Zf#!*L56?c0(BpzhRW9=;!x5XFLFq;oj>s_S4_5T`O2qV4?;{ z_awW1hhxGkHlVI17|xacswog$;@Y*WfDm`qSHUF*6KputIBzVUW`&s3OLY!omN)>O zPqpuV{>0gr0Dk(A?DQB`%ou+Dm^b&lg+rojoYq;L2apPEFB z{i44#R;a;+J{6OGSLkvEHq033`&)78eNx{@k3(p3!d&+#-|Ph^3Z);jw_Y6agE zof*ZH(D*d=e40JemIOuB!ii~12A_i^wcpwC<+Sx(@@yx9rLsbEQfCsPJS%14xvsWo zrEM=70=6z+nybIfNW3pj)Lze_{Jc6k`m>_SMbX)6h7C#E(jgQ2s_yMUB?s-UWN#lV zB?V6X071S?Hfs7ui#Wl@qv|TU2!%S;gSUD=P4p}YTVGRWPB^-qEQ-;e-; zPk3uW@1!8Is+;-qJQgV?5i42iFvZepGW$MmkVHH&`HiWtRWbKcBsVXNFherLL=SJ} z(V0zcryyaUrwD314ATk8eu?Rr?1M}`PrJNgCB(aQgwPI$;MU5X28@EL39SqzY-DRA z{Q;$*{LWVAnHDRivRo3XX#Wdz&vLq~pIa#@yNcl|Xx=1gEy8vcyLnLC<9xqzG=#eJ zM%IBy%vO2FvF6bKw*(w?wk1I-4Qc6v7+EKVRa`$C98fQ^lLpC{?RPPt0JRZ+?oe=c zECL8Owvf3@@l$ayInwk?F1Z$Z(|@@D4``Zg2JVItltq6RU}j=Zp6>X=YpU;6-7FVT z7_w8I%7m2sU-eDS``@3daOIm-ihmPSTK!Yl{j+Wy!?q|1lz85786pS(d?GZ!5sa_T z6gw#B>;*C70LCq@s1m4NXw=vmo7AAqf?EzK)Vzyy&JgTZpCuyFV3Sfhm?RAJMiZfx zgOUV^uB9hG&!p>@TJ`6++)T75sa41>-C5dWB-mvT%Hw$xXqpVPr76#63p(r!LYTxu z`0^jnx47V0jI;0CaFl{vrf+8(+DuKrF%Zul8OpYMD~(H;T}WM9HM(Zy`n*At*(PWFNuZJxfFDT71dyzKL@ zOi(`?ncsYdc%$%Cf)MC#eRMu4oTa9!4ku$Cgk22dEJy1(#2}GxWJ&Z01*BOS%IkI( z%bh|c8)jIv33FElqG5qP5WrDr-i8%7zaQ|RJZcbXwBYq1PCLvab^u-AF-dJRG^%JW z1u8n=@=<=VSuxK$jFD=^igrEyL6|!+TN?D=C4OG|XNxIgA_j>Tt1Zr!3%}B-%Xk*6 z(Iv{_y$x<@@zibndzc^0(f4onPlls92OP9n^wJ7O>m2KeIol}K-7PU!;TJxJwSC;b z2L=2b?nDd2XkO3Bd9d8Z0zTJ~Y}m$O;@&irdm%oDAilv+<(WYU&^Kk`qj>21V?E_f3i!XOo!0TS4Fz*?t8~(_1$ihGH2OU zyN`(}I`J_DwU}H-jFS~ap+G1`#dLw@^$MMeSAKRTLPa6Hd?w!yA zxp~=~CVQQxc-5YL*B3&=6{#9rF4v}<1{W7FTePeuiesESkLC)etU_q^h^SB6S(>kQ z@vN1Sxg09u6zP9+M>J4D3BPNy-Fy16 z5Xqh8Pc9^Y^4r?g>j#ym!Al!4T)+PcKl4h4hGezDbBbcGrYgk=6f!6%Wl?{nxHCC& zJlhx~2qdDJs-*Q?=E?w~WbN8feoT7Llr=%SM@6E~`5#4qRQj_e+Pzuzeh#&f{rMYm zE6xj9yJy{CXB2CHwXl0ie)PYoOv8yWcq>``dmsj-nAnbT5Sxm!+{l4?+qTk;HFl~7-9MwLgYG}C^PmT|R>M#Q^Nt_#R zm~!K)Y6zOcBsiyQ5C2x+Nxmmf`Jdqc!?{9LzTho&HKS*OGulrJ7w9(?nbTEECCg<6 zg>4GPULH}dx#1g|hO-$$h?CgXQQd`u8_+sNoDg2L=qY(WN zQIR6{30X`=Ri1IG7v7eNWM*poz8mREDalesOclYwX{^BTe1|ukz?jeCbZ1Bc^FHw) zTL~KcC;YtK>m9PI{BRdXP@LHd7`f7i!A|_} zG+Qc-cj%^emEHKjE_gwNUbHVI#DM?Z(E9+E z@*)p&5+{%6ST#28dXZ(7oPA#EuXSCaS-yf5LJv>rtLOcizF8}?0uyF*FsH=wCNX8G zJ_O5Yo#Ti(+xPupz+gV-8%&#qvXnJe@BY9-kL!+KZ!%9)Qmj9pG=GuHYB>3xWS-z~ zGIwfd;paMbPw>3h!b+0Iy5ai zsc<~rAk!dIzR;qXk$9?gCYk9PzaG>&lV;jF=H#uBPC&YN)4fjMf<`fJYCrmPW6J_I zJ*Y{QvJy1^pHqO5qx^l}d>R?znXgRK!}~8M3V4v{cle2aBT_#Z-PgwY}(AApMTJK-yJ=DYs~>Rim+2h~4T8eea24KKNmkN|U4M zSRu6Ob!xeWdFS?k$4J_qQ>Djjw+*%3O*8-ZWJJW)_c>3y=Qe#wR!dn{zV(?P1-F3O zlG74=r5{bs@-8A@FbIQBoNW%Foh*s(ojt-gV6#CxjN9bk88U4k(*LJ&H>qrMW*P+icpO;H#v00F*Ym-SYO}B zio1;6)ag*lPQ!-%oO6&+&M`eg_&hNi>0#LH!fdY3wtrP1cPprTkU;0<6_R9t8yGyyCdxalg>w3?G@-xp3FPhp~LPR}YS>s(lKX z9W`}v-T%eV=5ik$Z$E(cmdm5X^TH`+Q1Hf4`v(d3GsCy1f#O$8SksiTA-@W9uESGB z?ZAl3uLsdeBC930hB6|?dRh$OtZMN8?3t^qY-O6f$>z<+g+*G_150`ssyLfMBkhWYr_d3xhl zN7Mb4zA{6hlK8}L4e!N<%*_K4vesP4IaDQ2xvtL!4crhII@R|ok%`&&1EQH_A#Ic&!Mq1 zs`VKMbH`@B!=l@e0|5YND7D9CiQR8NGlq3{Lub8I-?wy$(JtE&UMl z6Syi(w1;tE-6$0W1Rd9#r%L%-_Qf!Y?D6i3U9<=ZF_TDge6<3b z`}wId;GcRwGG1C|JAVRkCPtb<3l`EW8uv{f7}lP|t%fVTakM(}A0vt154_c)PwOk<@s- ztnR8uZ&o`$GU7AhnV`Kkkn70gY$?qzC?mZ&sjhO{Vdg%= zeV!rry+tj+|KcHX6kgjJB_Ly0vCLw5Eab4r!93?$z)%yBkq#MWSsgxBx+*7%@t%mf zJe9pEBtPeU7x=rc^WItUqXZiO(B{5Q#y>tt*{&M=5dgU>@!8hg9nWkE*`qO6=+J76 zDUexvstYBV)o|S$=m37{Qzo5)8SnsMVdAc^J*ZD)3hp8IGSYD`R|e@&+Z3pmHe zKEppQEH!XX&*jm%!_lH;>hmNFm3xlFCBNPZHD5#@KOVJ}<%r(d#BwY-em@md53z=sJL%r_XmaHmF9;i2rQ%<3V$L(jFp$-} zJ*VeyS%PT99LZnZ6-M!v__mi*6KO|c;=Wk9q(kN;#oQ!G%KK}^xZe&YuCcqT008Bb z-Q-BIer`qj!10z)OM4KP_6^rTU*BV8ZQ4C|h*?j)f#=g_;oe;3k&cepZRV(xPJ6Ch zUol4Ul;k;kUOWv5-^7tI+Q6Gt=wBp7OL-%1!yY9ii~Fkgy|iSv(I;(51LgR6xNZ-#n$vQ+_AF9Lw+?LPVQj`sc95&N#w!1S z$Sx1s?4*eg6Dax}vIL_MNdNsoieAL=#FuYNSubGMxX0|pDQVVuAWsz2n~9eHP} zsIAQK)|%U=Q-Ut6GY69eh~#O*ey8I|+39s^ews_6niUN9=6}%oVG-lscD09zcDl@@ zdhSkkJ^Tn_A1G`L>Y$>nD5CL;jHb40I&QAr(LIdbtP9y1a*9QdSE3W04tAbBkMpn^y`a}#gVZX0pRy0oor`w@q z!C?B&M$u8TZzYT#YGJes4nuTSUqMr|8)oy0Sp@%6s;5J`9M*~GR)S67zvrAHKrshm z7ZVn+M%bw($~iawsc>pv80z;|t;$t12)-52mj~m0L?HJXl^2($PYV;qzCU@7CEkkHj;o zlK-7Q7`F*2AgygGV`k=>gG<8qWG$!Ilw<>r^_zOw>vRnV(%U-fbck?Z=2t2x347Q z2mlq`lSJ0$3t#~-F^iJ8!htTJ!S4)?sAaKF`ND9*D^GYn;;UD*X zQc81{SUWB}WZD2^2;x+J-SEGK+!k*8keboN5_;Fg7`ZRQigH}n&S`<;0rAsk^b zGIwy4zY6_S`b?6s9D1)q@S9TmGC=PSg?@pq^IVO(8krYS?4>qegYls z%Q(1UhRe$R{Q2D zrKe@rnZcH^(Z0-BlN>0#ADz!xJ_+d$lv+7yR^EHHmCYmNP?O6df1HQo zu>6d9i;Q^rV)*uX#}A3eRrNC~AkF!}Jl|xSp!iS==2Oi-_KMcS$(9r~S{a^^I$3J2 zax$G>A^4_^u@YtABR8kZ(xLCbvKeo07!&xZFu#!N?G$}@i`T2j`((@cuI{nQx$Qs0 z9KgHO2<^`6kPh*UTx-6myrdJK+`?B2kyp=~-t+?$iOHkj5u)d#<)o4RA2*wtBPrFE zvdaUxw2nnQ%XKvJbCnIzlZgCJj{*(Qh!QWl5;JJDGg75JE`1Ncl$#EviRbWYl~LKJ zlkie66Ug}44|8x9TzfHnv$BZ-fp$DjCAAdCgI%RIu`Lq5UbeOvfQ4Z0d;7hgvDXj# zByfqIuEegP|SDO zAWG4R&U(WGY3$TY2w}gh=ksooU{e(8NO(wmMqCc3~Z=eR!XW3=IT&`b<69(z13wRh>JdLw&sz zjk|aVhPr$nj^!R}imLl(I!SWANWk*H|P5>n=a5gFSbMhPGxfUSq zVfJ;p6u$FBB@mDH6CSds)Z_kXWBAN&I8w&<`C4l;Lxw!)baxGp^MN|l5{QL!+!lB< z4$2Q$pSpj{;v%H;!FJr$3g)d>jn*D2`anjOBB9}+qnQ{Eh^=(& z&}*Dsk8c1S3y%8=`!Ne_H$EdMY9UsIQ`M8~94~$#ZA-e37#j`yf*LUR}xK?AE$rrL08Tht9y$t~(D26lLp0 zWml)}TiG`#)^GWl&u061KbWU?BklJKoviyhFZP)>8J1R`+}DtG$o5%WA}E4q*Tpt%E${9$J&sldFslF`YSb2j_<6?x>;m$tX4m8|{H`G1Mvi|!(+nCqAbn;& z#nrW!;C&+%KR0O%S-y$-kE0obiXK;6>J*SDfgCj@=d-1ZSW0+>v3beb+$mUmTjdHd z^tUj{XPO6SY9YidN<)*QT8U~SwMHitU74RQqK8NIIEK znctHvD5b7TSv>vTpPkP3T4$}&l$fy(q9J*ze66)VQfed~h}CY5QO1su9c2W9szc5S z8>Ga}m&)TvV?MnURXwHFdIfv%=(+}?WY-A8l3>{(3RCYP3&~2CyV1^(VaF0l<|+{gqeUOwWwX?9eqAD<0HJFp;5!~n z#r*a9ltG<4J!|UVi1$4@C0*=*S0iN#ct_t-0k4pL^CWxw@KP9(QnTj@zp%EfsF93$ zDWp`d+00YU#F87Yr+ci~C_NCzqC{JYtrklL1SK{o4ue%IGu<3t*RtkLo;$stxA45t zFuz_655o#cJz^?t*blwSzJ`Mq*3gkg1nwBLPkJc1Rr<&=4p5?qv_cdRp{TWtE zhn?eIrXYG|bbu&YlGC@z>5YJrbe;plmG?s=H|eXVW?t_}C9Ld}9Sx1xwRu`%Cs^6^ zJj`-`@N2cO1Wa~yog8sfBJ^T#yj}07plR>0d-QzGF`8}X7k#1Gw3M|_(NmajI;gt6Uiqj z-`NVS7Pn_^5PsU1I!!)$(_UtI>sjP2WVqJU&=+s-_m9fEAZqK9Qv6~2;UUbWB{2+u zBt?vnr&L5q*g@>}z2c|zKGhdzTDYHZ|7VvPa^!vxJ|GSzarKL%&BVw1-kZG*3-Na{ zbUi5FN^SWXP?L^*FFfH?nGf-N&aM~*OhRgG5(zWvuY{*CF@S*%Z*HrT9#W@97RaO^ zvKDC0$owCJ*?%+|DA6%`nvZyChbARZ@(*VU&;}H}6mxDysi|xqI8^zr1!ZW+0HFe) zSQ8q^h6UKO{gnG9{Gkj_T9*XAS`{Ic2o>Df zv6*P)AB?;<<{Y=^9Bl+K`iYi;5~4}^0I(>0U^8;mjyyWy z@rV&`94lBdt6%JA;i=U&Vc|F!G2FMNOhr1&YWh82RnmXNBPiSR>%)V}41X4hUR*7*KiZ}8HAW*VpsPDGvH$js885&VKu!8G z$7BGo5siX*>Fn+8h4Io=FMLHLxXg1O9K*@89^&`URwvk?RQmH~=7)_;@^?{WW%LLpQRI{T;G{ zmfKqlt{~hk=58^kW=HA-v!B+~vf-vmY)yY{c0y;ji#XePKPbHe$W7{rnI=Q*Y0JY6 zmy9<{suQabv0VUlIJhG+vL(4YY`;0m_O z7#pdCV!+Po4{TAFJ#9zJkoAx80NJa9T~cJcqPr+O`E=WTGg>h)JiPZ+4hLrkv;Dof z=WROsqFA#8N+4sobrfqCkB~=K7QkrO=<(Oq{C?xNj_-7;@x?C}#n~zn<@_Z@_M?-Qr^RW@5--~6InPM1(Q4QAEwRf(*i9|Xel*sl{Db+0 zyx;%l;e`1hB8pl1%E&?IT7LY^s-JQQs&nq?b6Z+2 zYZkXzz<0@mGlSvB*LKa!hv7Zwm?Pxq(5v%~k&d@3(xSyTChyA$qSaHfGjc_0Zq4>| z1+@zn!MYekQ;;h+6a*RXurUZpIY-U#&9byq(p34`u-WbCG+4ICv^iudiP33;?)wYY z!~3PZi2~on?qLrz&*D-=!7e6ELL_l@ye}kPy<# z92mp~008&s&S(*n#_A4w)&@(YC-$$EiKbK zO%%I;7*-z!l?pUKg=mi~@+BTSDTe+}zQ6)$Q1eAd{l1DiTM}cmUrIFtVTp8>+UcF1 zmFVh6@oCQLXc4hlZCOsHIapJyC~*Z00xQHWrKB3ND;MH=2SCBV=$oO@w=R8AgnJ6{ zpc@<`PYSB<@In&9p?_c(a6pr!yKRCYCFvp%BMBgWZ{$E}XV-(Fi)C`Em|KT-^rjx% z-I4ejtVZ~s=KKHtJoS@FXg@AyZNU&@_w*+e7}4I24rYri!i{=NhmkyzmTK-1MIZo< zm;M>KPbZP(?XMCP_UCj?AP)fq;AV?sp}PS!u$iXnmI+uv9YU-y_B(d z=J}C(x>xeDS_x>TZ?BMF(vTbAa*J)&f?i|@mk{6W=0B>$ zJ~Gz(PB&lRvkfslpmOvpq^%!ZmufNEo4>C!)*pMzHO|hOPD7V0ZJIa3-_+v2`M&iD zJk6{FQ`#MSQ|ZqSPDM|J;fu3{!Q6@%*Fhq1A3EI7X;seW3LCX&6%@tSA-+vo<(QZl zv}{NGPT{7n*M-d)#ZIe&)Cx*}<9u@R4bi>xB!9W2h}4~9GYRG1kwJ=1d1T-niy(_Y z9NX>Pk+LTdRW1IRxp*Rlm;sxrAb48Y`Ink(ums`r+HCL?#sMQ+0Fwyb*Ej1(7XRe|9uY}D|J#2`JnO3CKzbh@RB&(VVYkcK|JqCn#w z?Q>Ctlz`FtDQdQ`N!Ka>?1ltEkI?A6d)!hW z`6Tg^`9P0Z_q@r$@jN|lSbu<{Tslx8jP{zV&az!&>3UeC@#fQ)jA`jD3)mdsS6{{sC zAycrxsfcK-EBMpw811P>0|5EzLUR~>oQ^8#C#iT8R8KO(FQOxI#?nj8t67vtDYn;F zZ87HJqpzk(ySbcv-I@NcZZ#}p+8&xJ#or0+9^Hh`L2(VI)2<&>^cLp5eVXbjTFbcJ zw$|{^mc^`EEnXhR(pHdCV=^+Nb)0)sdWWO1W<8vL4(MfxCZZhP-#Smrk}WE^x>{U> zk(=tHl1YrMqCa~!db1uT#U=WFwU#z8!{SG{*~&<%=g|CGl(fq4YV0=eLxJyI)%x75 zQ10K+5%tw}qmTDQzOrwGr4xNHirgvG$`K?@o0?C^|J2T0zxc#!7Y|CLO; zFI`94K4?L^XO3$aqK)eLXo~xr)bow=s}sFHvfEwv%97%cS#T`gT<*Nf53E4Kg)Byx zjr3;QVf!`g;aBwa6_;+)7mf7I5$<8PT740_tA$TY_16!7H8Woy9C;v4T&h?eB(3{$ z)2dNE10hMf^E9w5?%$bJKfHyMBY^W zQ5X&4E-0--UV{nC%X?*7jNr-&eEwoq zg6Vm7wXAF3@!}ab_E^Qou2DHDLg%8`5=lu71tdqwuM#vmuWb1bm!DHeEW3wa%$6nn zQDtm#zldMYbUki(-MXWqOOeJ=U*c(Us~cVuaM>?SC1)9oLh`0Q*cEifydT4_&`iO; zC#jPNY-f|>dU5~Rf!gi0W@vM-YoH1@S|xJsyZ2GLy@dp#IxbTRBfK(MjDCG8&t{vN zTD7igqq{rVtrA1K7Nd6Lak6DI1|*j7p_ya}AS+*rK4nyL;RBe^*O$d3!S_ePZLq&)pX7i10UIiR z>2>-+tcGfJ5Qszc0f_cyVai+QIzPRRfZ6`Uns4?cC~-BNmpaQT4YSwsTolHrG_GXBn@@rp|ttg7@0>CZME)`9a|cb~^$C#n#h}+oZY< z!Qhz+4LgaTKRV-xX#qLap9AyQcAYrX-UAN)nThat?8Ec)lRj8wRmZ5sW=SS1wxBTEL8m51CivCk67cB+^21@2DSzDjaX_{1Ox9yE* zt>=g!@MI9-;x@Tn@BRJz*SU|Gg=GnXr!Fl$ez@F*{2ZgjEY{M~(Xp_wIGwFL zZuN$hU{21?!T}H{UQCSb!^I}0d}f{H`hO~8_St%I>hqZ-(w&nXB`xu7=k-x3%(XYu znb^Xt3mTPAKkjJHPH~JTTczn5Gh7YUp~aCpxQ@2uHKH&-zO62EA9zX?xFSIIAf*hy zdKh@MVJpUApn96~vT9kf^905t(Mr{_+DofH-JX65D!gxC)$Y@~19j-%vj9G@iCwYt z$eP4DpemBXYr0IT!-MDUMPF6rPr#00zUZK}PNoaNO)DbfE4Yx+KI)XG49`r7=RMCx=bC%Ke#Q7?RY_ldJvaenYP8 z{+caKqc=_$b(1WU1)X|=R_xkV#W6iPG{^mz9UtbJep1HA(V@ap^d@(+uQ3lE(O??1-Z}9_`AA87G;!zu z9A`FV3V-O{U_sH5$=+oh)bh`GL8bUeyVltcR{GkP$rwz`blh5aceyvIw6Yz0ZqsoF zJ{N8EH@#mw9gq5_h~@11{S`L)!0qU`B4ezSib=B5(`^;Oz(lw6{1Ec#id zw5sx`db+#ArrTu+w|}s_-TY@4+)K4~UJt&9>FFpWH{Nr@=Z?B^B$imD^kq9Q4RW_fOzSOZ2OoSU2N1o3G>?uzsr=xt(~}Q7{Y~V%Ev+ubGh{SZucBkMa9^w zjlFYFTl;fG$sPTw(I6GExz)jyICTO51O%Yhy-LoLwQsBT8hfX8nOp4TD^4OtW)O%i zIM3&O+PYe~5Es7}9FLo|=vDjLf|V4NyxOMG2{>a>GwkiOhq&PU&}NTLMR(LgrT=NF zbHC{H&2%ix)0x#c@^p3K6Mk&ls{Ll<*#H#!3lFQr=R2dVX(b|H0Cw_?mpqw!!P*9` zqaEdgnvK0ZSyYEi0@%E=zPP5bk%J*;OlwE*=NTUsHQg`IR`>Os&aILhhruvGuLU7G zbrzQuq^QvF16o1ts|VLa_0yTH8uKuCYu8`Z&|pGGAh>Hu0h^nq@sJ9hAr^i zY9hWdS_ZE%pgvAf$|HC3Hb+}vndZRmb7C4nI zR3H!Lz??D}kZebt*urHghPF|5cfdDD8F)G?sZIa*mL#kpq46$96lBK@0EG zr{9poxxI}}K+yWL%kPPdi<_H}%cgT?1_M|_6yoLK(eCi#r=~t!Yja2cBE!VQbgH8CjnRCy7yKZhoE^0tW?L+}x00{l0LZdd8NZJoPKuMnP=6XQo2Kn$*>UELTIv zg9;L@0)e-HoN*P}^G@=K`qLEx)ETM?5NNxTnl3kpA0R0Qg zLMp%pak_kYUb3X<5r@x~{Hb`Dr*!9R8r4w(paSF>k^`yq)Jm8>98&LGaTwG%nVn9~ z^~%S?-dx9hr)9&tn0&-wT!-6vwN&Btsxd=4; zRyj)t$ML`d|K!h#j*Gjtk(Jb7nVyu8WwOT-6qPBIL~lXU2t&|7ZjVCX!mSad6CFAd9jO zZWs{yTDw-eA66aemt=dQOr(2qz%>y=?drJtHs3naHxI#(1Av30EL1zrAbh?(tXo2( z$WRTiz8GF%clb&U#ZvsI9DgxYqTu#Gn)N9^U89yIKcD#I#X#muhAy*_^#s-Cf2PmD z*u;!4oQewR9SiPWSm*na=En@sl7DTcp71r;AsR;HV)N7I+p)T(X@TEd66XY zqJD!IWZvgDzOu`mfOoS??wLUjLGin7&Tmhdzh$7@wm6a{0InY#8toD3%0wAExi_1%9cJ|)?S$n^IkV2K#(4tN%WnwY;;FO2I z3^J2wqGnEMZ7A|2-%wLWakO8#WV$7bLxla6koCdLr0>R_URQ7C_0p7}_ckNJ7P{z`^b4yEk4Ts`Y zN>$4>n`+D^`XcZdwVJ=>PI`EFw6?WXYBmXf2q-Mv?+HOiBjmZ^M27|>V=<6=`}+F2 zyG6c#zh0_0Kc~A-gNT~+^jh_1v(K`jk&%%hA)r4tx`lEX9-f}^nS3Bk&DS$lRMZI3 zFs#F)qr1atN@iweGP2zAXS9EFGep_&YYwUB?uuZHU&d#D(Oeh8exT;&+8vUZ6z)gD9h?#m`)SW?}sn#*E!`!*Sz06Xz2+59ax5oqx6l`*AP5< ze7K4O2mzE%(pm1PrDAHy;gI&Ss52#P0TcW7*;+u*Fg?E0X(5D<4$g8nU5H7OynPDD zKWmyQ>_|#TNT}e6+sWSxIBoO3Rdt=#pNpR=L+=0nWZ`tgEyMa?XO@0l2mkh{3rHWFzrB#l)j;WubEy4@a( zQ(n_WMiR)cq#QJ*#lI7%xn(1LYv#dk(9kvutbR!kG*zpu$(DpQH%fNwv3!U z^E>DsZiqPv8Wud;k=c3}wFgdWsz?#UHJR(sPO9TnEB)y!SjC0&VrOl9PzCgdw^b?O z`4uVG38ow~J-8YTvd)z~uvpkFg%#OAyl^I|5QO(Naor2l5tA^l&$Zlw14^SIEnIZN z|0n2fLqP4N_polR1U@! z48|p|>9Z9i7b(Ui>;10oA!=Fp5WVKEHT1Uz*<#&Qa6(bH%a8f~b{Vb3elMNd-U!~w z>8y#8x^d)y%VW#$@^`rPJ-NdG?eO^W?ce1>QNU+NGt{eHWEL_|pTj2&Ow8{VQNLab zq78G&sJ~9*?8YEprrZjzU$|ojnKO9VQ^B3K`?6Zxt}kcOLd?+O^C>W1SFlm`zD8LQ z2eIOsrr2>khkKFyN^-oJp@{pP1$E&lsUhGA28aQ+SzKqir~S1Yz~tZSv`+-*g~~Fr^SNJ(M<7dsTvv{+rhk!_bmh!)T|fR8;1RM z-|+@#bFtC2?)}#G9fE5A2)y6N>j*u%eUeS%@Vp-7{9YhaTvGCMJuavbmI|E>g@Y3G zy-*GprE!;A`=6#3X+ZQDlauz@@8+#^o?QLlL%R8uzbGXv5eefT1b1EoUlLkM;!w1hb8_BXo7cKtk!bAx3JV1L6Pbw@qt@=)recaFX?L7rBixtMYXbe zx9KQSI^gm3u7n%;N>n2At zojRo7Fa3$lWWV#b((>|RZ@TFehU?*MI1%2w77S()TQWIg7U)#cgZ%?<-_dK19f@v~ z0lxHz@w47P#Qb!)JrWg%hXZJ#yaN)gIc-<PiuH~c2q z$uq*V>-lNrh8=WA{tO8|waR|5ZwPZIL!XK4= zf`dMVCb%g&v(Z`~u-9Um!E0h}DS=}m#gm#CpY-qkvOjO4{yYB~nRd4o41VSMGZ!&l zF()jf1rmw*X9?@fgl_x8*6yfdgz=xPiGa3eZ z8w(ok?R)>`!Cm6_5bxK1X3Bc}Y_u*odF4jxd^FtOU_sr1In0jaY2+oa6xXfYvY>$b zy0%Gfs^eukIcJnndD^|ZPQ3$@`LMD1?)vzfk3CJ4rRKByHgFK$!oU@Z~O zbrBp!f~mK)dhMQx%0A=bY+&_UbY0h0bke@qqb4Jkj@rYrHGa}ld|F=c^!!}V`Ajo+GEXNe2o?Ws zH%W(x*U4hJ!P2_zb}mW~%;gP!nVM1pAg3aP##dWc_p%cwpU9|_86WSkHZnu z0iXVeOlWDzsQ9d10MqduSBLy(XYKX1Uc%JNU$MxZuG!oyLGi3urSj9+ziu++?SClk z*Ge}+2$gYELHbK<^1X6vCp=d}tDozjfXM0&t+uNPyMsUsAydE@mS*>;0OP5E1)!39v| z(Zb9qcTvoJ-AjhRx{Kno?LM6N)^0Q9PgJ;0ZK=WVjcA3Hn1NTN4BBT#gc$-WXK)d! zO5@n|KX-mvnu(}=cJ*m*lGZ}|rF-*0z4@{gVLs->ouIz@Ij)xbi`D~2?I-oxqh=oe(`dxttcxMA>2vpP zgd=?4;1cvft7$m&d_UQ#qMMak%=}cUpGSMQio~Z>O1tCdaf1=h@ip!51$F)f`Dqj(9v1k z9?f3wjhb7WL>Ed`TDQ1(IiA>IW2RwXVG;4WIdli11_lO#!Qh652B0D`u3cSO86#=< z^`v%uYkPaXQpeiV6c-nF(F;CMI3^(hF1uLv-xo(>{S(*IAEv3;(4f+=A3@zvu+WZ@ zMs6U-`c@V})=HVtTKnn_&zLa1#j^v0arc<+l#PwW?(J@zhGuwGLHIX zc+f9C1X$!5+d(x0+3zEEk$GxqZl-a*&DHErDYQFRi&uVhiQ6Xn)1?9WZf)A#LR5Rs zn%uAX$b{(R&QV!$u?n4+AxPERP(DsV5-x(4lY;>THfvDmesuWuN~V8iKzY^vq#xs zOzN8HTHF?2?J>f6#`BGotx~KJD&+{b_Tg+mx^^yyx&HfWkQ?YQ6g_f|&2xtXL-SWf zP-$b7hDJf(?}xnvLkIQ*3it(y6P`8L?6ENku7uRsrCL(%`~I`%_9&41XUpTu;kVa~ zq{dGus+(j?hF$2kQ2xHV>Vbd4%F@%J1K@FRE*?K8o+=Xa=yqXVx&kviUOvI) z2|aV`pi+(_sCSQja`tjzSI)|qC(Qkdx*|y$jt@HfZR|pWI!WhfoonKj5x#4K3csOlbAKJYtDWM@ICWdH&kiLFu zYHA=dHu&88&3!jP?LD!;isDgdij)CiaY3T~(FLVi+n{*kZRU zrGrNQ@7?OnNz7MYPz~8o(rvY#XnnNP1Xe*uYdQ72sa-6`*^!BJU)a%|=wVbS#&J)% zJCwW2O0b|l%?e~s&itGoxXF6*CQ-KG5)h1VN!yT*ZJSnkrbRp~QC3cmE2`)z{aH7& z;*S(mGJ$p3l_s0vrZ@cUeXWUP+AvdmGBk&5oc#z`H@@UJ4;6g zC76TIuAKK&v0$Iwr0a08Yi>7Sm|VD)u79)y>%44j-m#-l)nnNEBzpexRacz>9Nw2< z8}t*r@NNmR5_-{9jLNPhVO7L#=P#bo*vf!b6=kq<4 zSFQ21VZ)s1;*_(y6RT!jh2tCGo5$^5M2DKI2?Iqaz3~-b;y~CpZF0scC{rXU2`=@W zKb@TUR>2NvlB&ho*^GPkayU}6A0Ut-*K1cEav5LjxO0YM_gJl0(@6hRnuY|Bn`q^C zTonFS8YlqTH+$YOEwOZ#?XH61pT}$kSDsio%a;0(y#56Pko}Kl^p14L=u=oP8sCM` z^mBYXM!wRb`pqC^R_o1_G_m)~}|n@r$UneX`>Z4OM2Wwt^uwtE+ZMdoGdIgd3gr2B`T7VlBa5rZo$spp4a1U5+wV1 z^4I+v>MzSFEpCgrrPmnn?LKHiox8vWVvk3gSRt#hF%wWGw~PnZEmXSGi_q zM9s@E>=lW_n&%0yCDDgA#d4g3FMS!#jm`UlACU}#-%D9jiOrBZI)xEw-s;-Ch9C=i zgXmOBP*eN9GV=3xS2&mBmeZfYPN)qY@xbAkx)VvG#-|U^cstBwSHFDcEej5J5`D@- zoNBnxDfkR7Y}C&q#LgQ!pCN6l*k4Z2O$b3u2qQ*3uy${-O|EPseS64%uQ>B4=+!~n zyp3lX-%I%Az+uVUf={ec#$)$--uV`d_!OPVrE`L!E7!=?((_<`xS&@kO$hm&b!$RJ z@ot12*|$%m6AO;i`X*&X`XnE@Z5K{rw_btaAH33k6xe1`aKb%2RUFMaS0;x#i;EW+ z5pZCCAnPiq4)AKOHW1_SfmU4ZJiIWwIjeX3cp}n2fuDxWMHYTKh%m{SV?6fFX68Ev4(E{)O88$KS|V*l#&_GFn>f#WwZM`HZ15sT%e^^ z4)o7;P5X@Ol{Su5d^$KTp><2&dV)thEhFHzh!nOR{iaSR1u|3(#1cst=?OByMDvNU zM&B}-*^!E~a*@u*oHuiE0&Bz1006S0PYVT`C`iRJB@>j)NN~uYoOjYZ<*zt}rG|y; ziQuw~G(O9j?{RnMP-kQwwrtqifb*g0HU!Ej>5lKAw(Q6#uo4|oA_@3TlsRL%dN3!= z4==B{cvSnG9$l-;Ob>MVbac};JlaPI`8@2D zc=iFDrF-Y5U9L@BDEHs^&rO27quq%VDzuqk2kaT`gaAMx&%_5-3Ds#VTzcOO@kDHUfqrgt2J*9yv{K z`DXIx8M|E_|IpR7l>3Jzb5`fMnp-r1o9Ku`l^?#2zt9U;#as3ZT-A^BjpM>s6Iqzj zBk7}&pGHH?h=_x8x^n3q25oy)PUn^f>|f7WS-riS7U;gBW$I}rM5E_2hvA)`cR~Af z*>-wD$!OW#Ejp<+-bVV~!)BjJUDZ{2rA z_~qQiezicswz9;%75vgXZ7|D{#$@Khf~?e97< z#cEyVHCjD=!)x=$;{3H@SH5R{c*J^HB;?(rbJN^8?DH~(*Vys#`_uz(AAB322+GY> z`c82DJqZ+Waph?AfWY+9RW5J$~er+$Y;W}NW6+Ndsb>(x|CglKgHO^av*NqEHW#R z`IS#>{BlF})PfSK9=M!%WeE8Y9Zn+ew_SwN#lCQ?t}8i#)hG{wnyh@yfVJUuHmTgAsROgIJ*$Z0Tc{(V+$`WKj>26yjgvJS#hp60qs*DoA+lvNaXjBxjhQ5frNxfb~<(63iVY|uV-yY=rND{4m8de>C z{q75vtJ$w|8dKF&CE(biH;1X4(rUSm<3>@-yc<32>X2*S-!Ig2PCyKfB*f^7FJH~j z=&ggx4URY??kZn*tf+fGAE}V^$4x#wT6taz<9E(&_GVR$ta}ZSIuZD>G=1Yo!gh8Q zf)?JGRT%FW3lPpXFRw#@PZ66zUz;gtz4MZ2OB32wLy@JY3dwnzVWe;Ow`ub1GkK1! zr2XI~W1Xz;Rfa?00CytIHx}db-^6lQYxSISHf+7#5O8&@&pFfa8XVbkGv^zQN8|JF zzp%KJUew=LUG($hnPLY<8R;fYJRDt1_Mb~Lky;-X|9+=9ZSSNV_iUbZ=?8szae@oebhzX(PTQH z-Sy?`kXB{cXO3nfH9-SRBEdyMto7>aX3=-z1%DbJW5Rw|!qKAo`WVbJ`aL z|FA%ZgNwe^x~mG|i_KAO*jW3uui1^09>h2@u7@JvI0BJ`cV)WI?%vnVMI_sO0Z(?!a~);|yT9VFORmX&(z&9zO7 zukfk@Ds5#&Bu3XwbfdDryx=>9lI(JX1Q(-p@RM-oNfkNwr#7RqiZY&Os&bGEBQa`A zLPLM|9Ec#uhEihf7-~z^Ax7b0^JxsQme49Op1bV-G;ej8hxt3HLg1}ix*3QYS0JZB zr8jE`SGdk!bsVM9er|_P`xconj&_mEEkvS+>WG=&(vanTEg?%d?MV$pYTs?9wz;rf z=5S;qf=0At^W7%u#O0RU zA)-yPak#YkW;B3H%>7!9E+;mVF&g?P$Du<3A-v+~oZ>vo53Ig^g&_KV?4px)ZGdH*dZ2P_S&tj-P(7h9U`np!Jcl^LNq0XWrH+WQ>Ja&-=k~K+!JxpNW^LCDZ(TlC16~Jyoz1J0pj)-1)KuB{7Y)aH7)aGb8t`Hc(YC2Eq;jhf#x1L(7iMVR z@^u>N$>&P?+!!kRooj&)?_hZ{d=1^vZI3r`N$45p%&gfiE5`qp_ZxG1Rh<;es=*1p zm9hvmUb#FzV(($Ru-Q7NKfOm1Zu=fW*5PgFdoj@x)i+?JN9UHc8C?0cPIgwvlP`2e zXt^`u-0_aubK9RjxWaY@XT(2|F!kK)p~YFC=wv(Li~+_0Jqb=@e7x&RZ=ZYLuRh$_uzEgk00q4xF`y0o z0QB{9)QMxQpWY zEf=1awNX3K-aK}@6@kud4!7Rs3~U~9Uk*2fQ7vw(Z@&n2K&f}j#lMRvYDeCC){%>q zAON+&Z`a_rCbm&f!)};7C)F|yQXBC?BRQSbJzB3X>;6MCsHXn#(tTYZeSx5H)VbMt zBpz2lyZx>KOH%~0Ocztz(mtHXzILE+@Y3IYH$4&Le`tg*oN>%@Unc47c9a>DxwIt+ zUied?3|CdLww)w~g12>cVnXUy!ohlb(%3K8#|D>jrIiVK+7kKYb=JbqEPEQ_FQ^WF z7`SSSvKCBPmF;O##JL#o6$LW~N7j(WiSvSlRh+;pW8->PhV+xm9wBeramF(Dj?N!g z91TJJV`u7F^yaGyr&2i;NxS>~*8DCv7WOx#3=UiP_~P8QAVB7CyIK-OebBObcVYP& z+f0Zd6igU(rB3pcsX2#b*k*$mAfrE&Xxb9TW5j-@_hzbYHQJ~#{ZbenNiSZryDtUP}c zSfWL#dVcEgesz9*P^wTye#tv|`267m&ENtva-uk_+dGt9SL62Mi=UAd!KKSkhfmRm zZnWsaXR!PoEhe-ZNGI^^2v@!ZJA?wA)}2%&hz~Fl2w$LDmS&$&-oY{eygp$*`qCaY zrI zp&uHNV^H+p{Neum{)(w^kUZ_)_ii?M@fmae6Dq!P{qQNRMi9-yQ~$gom)!&ZBOsL2ZuuOPxuZgV%Q-RoGYcykFiHIR&Q?2=ZX+3gsc zSiATdxBVesp|k*6Y=_f5APXe>)WzD;&gVf6Z-tD_f8SjBU=)stTrKY{auOAyl;(Ty z$<=juRo#vK;~Y*un5znK0F$DBSt?eF%jG0qs%I}=VDY#s3@e1?L`|x&xXu!{JLxY^ z#rXLiXfqyR}(%g@yIj`l`~fpe^}sixW?Eo7t|rQyh#?Oc2nM2$(J}GrF9s21-RRju%dJ9 z6;~RCJH%^0Zf;bROZLufUU&S!n62?vr%%0~nT9e7OnUEuPe^;Ob0H)2OmytuN9XBm z@u6d5rcXbZ@gk^|ZLC9U!F#@Y_SVP2eZZkDq3cLRWfqgAo=9P^goftmmJXuugXCF+ zI6p#t6e`5H%7U)w+;d6^j{@R+DLUmlJv!mIP-LvtdJFZD2kAjsg3wbwl0qiVO^Hw* za6mE=AB-{#t^3q|5@7z57h=5j0geEA4=Sp0obyz=fhrL+AEjR7Oo3AHu3iE1vMrY49x7N&b-(ZLy%^rJ5C@I~M zp->WvlkUqv&*ly=fR7=ar%|OkkGI*cTp_aI5W$DrKAospyiVbr+fKFdv3FPYRoFsW zt{ab(b6FECi2~PaUT$Z}0D^A=%jw^W`;Mi!Z%qmRZHm0nUC|4A|A7N+T-Jkwq3YlW zS{U)r%_zdOFSo~#0h=p}S80v@Nbd>Wr-XVo`h^wYDFa3G&a zO}OO$hpVrSimLD0K7fRRponycC|%M$C?$;`-5@!1cZh&AD4=wAcMjb{$I#u~G2}P5 zx6k{$-^(Af7OZs+XU=c;zOKEUHE6A+K1b(p?Y9?c-j5@Hm*Xg$VU=OD;;!A>k z*~U!SxZsbP%bvVNv3$zq5C~g@=gSmgQ+5Z3?-eQJ${z1mf;jDWch?5&llka%k7vi9 zvulNCe@52hP!3$+C#p1CN#9glyjd_l_C|kV@Gy6*iG!=eeop7U07+Eoesz4s)!=m~ z0CKxsno4@-;pe%OSr^{F?wOz?6TR)Li+j4@y^)t^D1Lj@-YbCJLcuy0rt4(Y4K_JR z;GSKSxuF#>vb>Meh+W)*k1MZc6b^utFu#+fL$4^v{PiT$$L)+$?n-vr z+a+&Ab26&{;M(_r{$6LNLMf{5m~`aEL%SJ^KymcR>jLCqGWSQY+_^0t`NrGz9 z6+l>hXOU#3qK#BMl$d_D`Eb1CwtlHK!i7KN(=?u2|8L6OF-y=sH}dyzPQ)9xIPr8U z+cj^gAv)KJcHZ~5CV${B2uHHHX4x)J*$mZPbGq=e%NRDC`=I-2FsMXD^w*ZpeqQ-g z27DYdZD+3|zl_GZx`z9PQwvJ;@3MK`cI%31L4TChxyPX7JQo8K3X36c~1V&rTt#_Z2vlU z1rYig9$`QkN8o%55yVkG= zgiQ4qqfA8ISj*z{k(Z9$2j*_O)Cm6O_Rwo+*$ASSw0jb<)eY_s;!iibHkFZ*XW3PE zC>NpvHKf%^IY!|k2W5Glni*SvUDd*rpk80PSOC_A7~tzVA$Rh-J#WsICd1E4})p!&U$PgS6IYHAf-H4Sus zUoWom0gq;lOXAJ$+C&!Gl-kTmgLMKsJ!_9vl+&Axk*B&W`Kls}<>q0Jt@0)5{5CIB060iOY2lo}#+q_b^4J|14 zlm6CpxF-Z@`yz5Xp-2W}J1E)2XRrw-%HKlzy}{gV%67N}!94)NTK!@1WmJ*B2ard4Jxu_W+ktHu>-f=3M%O1U`(V_FzNIg$c zm|T_lYjFQ~mNhF%Cp)%P2W$mn*I4X>*wJyeyt<5^F)t@)Cb}9kKaI#LF&gs1ebKcI z7d;(jkI{U5vm3?p#xD2V(lj^@rv3HnW~n{x%aH*TBQXdXz9swC4n0_z7%jejEagF9 zd0|rmy9<6AmYz4nOdNT0mnmyP4=kN<7r<^PZWqhVW`_=xs>ert{qi-KKBwogFxiB7 z9^T&(F#_;uM-uVDdzU$vd$>sSqrdPRI|4maTre1aB8IbwhPU2L-5*JW=?&Rtbd2id zk-$d_Gn#LWP2#g2#oZ$?6)54^TCAdHM!jVk8cNG5X1u&~-TSnPWFjG+j%md?w{#6C zLAjtQ1XK375Um9u10Xt>LMc#mpsC~GxR*Wqs;&W3<7;)OjAWq8LcQ)KIdQXxRVnp; zJA^>W4WG047lSg>VO%g8>e7KJ60Pe6u5>Qm`V6d;$uq8TrlrP#6+POA+3^N`TUy56 zv>*{4J);cY1f&-yH+hL#js_q~4mnp~;A>0;Ba0=ZRf+L zcd9(5q_H4_6pJ$EgVaKc$WtE`nsb{pfC5;TvwtQGn^bx?i8e%VFZW{2Z@blLJEYFq zB8udHn^uS*#ZJ|<2U=K8zV*uLruVlIkx#oL>9B69K_MfNDc*Fi(YzQ4W%$HTI~QK{UCDfw46vCz!;a z(E|7l`YLedSGvz_)B$EFh06)0_?OP86!CT{k2y)F`Eu-kc^Sci3 zjSC^6DjIzUi?%ox{7iIN(j|%mavl%U;mp!jB+bdfg4!&T66!!dqd zPBOn9FoSjGP!Wpa(dmUJ0Tx-`c<>Jd-aW84+4{ietXGBl+u29!@O^M3ig8PV_TcH$ zqo=@<>k_-3csHpmV7Rv=sO zEb@Q7^bb8rK_fjyuM=??M2{emF8`7=?vzh^mm$upx-m*@-0V4`r8eIMwGiGJHit3$H3;i*`46#@+jU_r`^>nBCGfYzZLI4A zeyze`$0;5ft+}e>f${88kl=gm_anI~R<`bLIYmwDZ zX8Gi_(5?VL97x;Gra(Ot&hd8rB&@{j_4pYQ2jFgXoS%cFwKU;GM);`5SJtzBj-#bo zdwX*$vb=fM)=C+jXKtnHFA^$p*T&ASyrN=vSLe@J&OMDcNG*u}n)pFp^9Afay&ah?j5?=)xzNWzd9N@8>npj7P>U!{ZSg%FiHO7?~+eZ@spnPa|d6# z@plJq(PYi#;&XAHJaOPSE-WMY8vd>uk^9yL$*B&hs1_l2cNM1`XNTdtBq<7O$WRb} znMm_2=&abphIM$_9-VD8JL%#F2ys)jL1@5ac%(bibE(}|!X{R-ULT$UR(9`wgDF^< z$BTJKHA#8sgfm$N^-ToS37{k!KZ*bDN(>{#L)BZ9OiU^A7zrT0zX% zet=U0rkG^pfF=3@*@=u*7J5CN7F22=j5@-;fT88hE8-p$j+S!GX2rj{g$&r(*r=82 z`DSFS=A?M#f95hFEv>0R(C-d7G}Y98XJx(hDK9U_Bw+53=iD97Q#wCC7u1+TpyuQf zc}AwDJXYR3!=RRpcR=9ANz_p#B)XTDmR``v%`GWl__q1?q5(#|G?=` z)jl*aD6UHQnN}#%dyyXfkE}hg?w<(Hx{HaIkzxlYHPC_e))^xe=S*9?`sC@(~R9%hK2icgMhELXM2YWgx>Gn#)^~wl$8-}cM1mhzRYMSZ^okQ z?;3_G(L+23>)$oSHituSKBeS~gSBVFi&&&KcO5=c)!q+48j`plW1c8=;QiKg1&$EM zJzU~f?OS7pq7R#>!LvE2_)>F$vvIa+8Fdh%S5D_oh;xQf!cgD7MGYt>C}@gK%8BQ@ z_P*S8+Jx$@atrn+*oRfT?}VS-=1|MI7{*@+SBaPWNR}UBIsD{eZ9*#RYMy9tTevM> zSRD)<557YWCuA}!G}IVF*}aKyJ3IE8`PsZD@CAt81o3m3Li=x2j5i;+#f&w$%#&b_ z8#6bI5A^1xIN#&J8FEwZCy4DbHGFqh$x*%`IV?9*_Xb)?(=Dx^ltekv2etZF0YI0{ zWB-I!MEt}b*G!O#h>*{XFvnO|(aRo>ZP6+>A&D2WYf!TPtK$j9W**wBf5v@GhD;?6_P6n9%qcS8oDpSf!v9D+2X zD&3(=1ozg1_L?5dNz6rlSxNHEEgxq`ok8&J+LmbiDe*@Fms_|I9UetWTVF9YXVy~g z6_BwUs&bm^cpL_D18dZ~$b0BMvBf4c>$Kk%$~{~y6LV(J-P~->MKMUQXtVIxSC3d6 zMdscUYir76Ft2a^zRfMG(BvM&lZG@|Ht)FA*24xU#|99NbMCp7BH$oU3b^19KKh1C zoJxF!*@F&b0+K!~&(H{c>6gGu>)}imD8@jAulUk4)=gN+HQf^L(px>+7W6MIDhQG? z%Oy<DzNRboP=yOb@ebZ{)=1qsgaQpR=b*-+FT*FY1IrYYi_{h!i(}w3R?1F zWzLcApUTa>FJV|Ak5Cdj&#+woa?ALZ34qr9s#$na5sW1D^oLSO@k)`$b*rQ1{ZR5J z{G2r~Cz?34QV2=_5a!x@!?z|URUlWV?YOea+i{!)puQptCZm_?B(X(STIW*??iA(v zB=lo!F^u(fQrKwboh1+IS?cFgW3%42D`0y2pO+8L>m{2GlW}z`H{Gh7jn`8dn$?9r zFtCDy_XHPBl7G=#Z~fq{(s17clRFm zPoRek%#~+SsOSv_k35$)#tO`*W_LVvN0&GLjR!Fg{aw1p^Z?Ta3iGW+MqtZwv|D z6(=b1>v;9cA4=hac)!e@yvlCk+wIX^#K+RC8?8UJ;NA4T8b=Jj5;1A+&`)i=`e5hf zI*IdoCzko51++q2Qxur0QV7cmc_R?K#` znXJ@pz0yDAq-lD=v=907h{Od4Rx-Tji>A1;)4|k51g_CQYgK(4i}!SHyueLzE4#$w zPpU}}1jGN_f8bpepJQuTrH|RdG7hSGu$ru_GAw-Y!b=s=5nx<&JCcs*KQbd$rV3e( z@Z=)}>|HMlph}tB zXZ%_BWI8W{-^@ipCA;QA%h&bitH~Iw(VI!|z39`2?7Et0Cqb=whgzse_AVkXtrKy$ z9kF?Ogk1Xks@O>Cw&<|Uy5$2x&17|Py0nMTP45nDNpsO|Z|bH0^?H%BJqS$h*jR)*WjrcapnnfS$v zv1lZXf(g0btdvYlMfv&pblc@QIXNLAAqupWrKQ8$4mt8EFjJ1#QKsX$pAcKr(-c^- ze#XYagjlMYn;(3TP{z$2EiH?(vetuM&^$tJE!A%?EG&#SW@cnel7rn;nepB2M0g_y?j%&!kpfhe!;-7 zCzZ=~E?Tm-S`O)-QMwuzm!kZ%P~Bwx3g7P)27{phlMNO*@018uER>cly(IUpE0k2P zIKQ}n1!Bh~yN!p>*%LQp;~J2kxdm0`(l^wUHmpqhS*VE9p-kNR5t$}pqnlJRzvciW z&^Gy_-HvCcF(hXUe6ZvZ41>WS#k2e5(g-~3MQ&lVzGSN-JYc74GOjol)6h? z)_2F#VHAw+=1T1YK}s}h%*K{)KhcuN+>&QaaIDM_rKSMMrmlyWYvk=dM*tS z6A-KKe^V?zc0w#nd`3b(2U7$x+|A8x_bh~fMF_t3(?h8F{L2Ik#svaB?BIuruSQWx zbQm1>$FbYn+4T<$_wl+=8#3v-o-rwDD5)TZgiFkT=jE;@9x3wWIQ&dtyOr0?vh>t-CC|`x>>&MvG zLN-f2Dg|U*7HVT1rKPNOTI6J8On6TaQmunfCrwQ-7_6zJL>2Xs2NC48mmwm|yu5dW zSXk{xE{z!y5y71+>J`SEs5ackXJ?fa75pwouQ1CdwKl)sm17+d$CH2~T?N)vd>qs8owE&=){y32eaPQ?)N>%6{x zkV~NLeU#E?#S#NXcUX;m#orcdSAgHaEzZl71zovlTPUw$ZV zH9a^mDvIgF!gjK(^CI+2Dy(E0W5*2dn4FhYRZ@}|8Tw;@)?)QsDOoH8;OX}#ni02` z=wvx14JhJ(%IvMq;j^VftHSSeX={PYo|e#F?H!lM1iqK>&a5BSOY@$oTnSax|i{P7N>dsv~r zn3x!V*s(Zbx_0b>m>hthlOcv)04)XZg)>H?V`6yu z`K7ZIX=rGKQgoG++F3*W{QNR93|(FA?Y^(9tYqJlV4;zh>^sc%_U4tAMjEN7mSvV2 ze-9MomSA!sA>1<>U?+yFAQyifvJt)DdYUCEMdi~~UOppyTD`E%a3Y0_E>CKf5SOI% zd44;PJ3P)%I2Tbq7Y`ThXIXW=bZ{}0eNS$E)j67Mm6dD1p+uPazVCpSL{7QYWu`_1cG%T^w3 z2T2hjs5ssuD{|$F`z*(O|ED}1shN5;6(6YR52UKD5#9)06Frmgsz%J5ehZfpBO9W0 zXzc-HyTX$uT9SsjKO6|xTtw!98aC1d9s^r1dCjFBWO|rJ-T~K-Y|=~IMXiz!Z|YJ0 z`l^O_xt!DEJ6Z zO;TD~m9Bp!-SKBJWs!;Mv)D-;Dn_EQB1oZP~ zv#5?={ym76jW*2^Etg$!4ZT$6nd-G|OHT;(Hy||WS3_19omn7k9~59N-R zu3=HASls&ZwWo1xue_74CQ{9l;0==JV&zWupcJ$RqTB@9g}eeQ650Bc`=lR!S|lDk>_Dj^|q=84;ws zqmGw|5aRlL_g8W1v&Xs0pn08`$ywPvriOqxP@P>A!!7#;1NbY ziFqD6HPRcOhvEZMB;?R&^5>-(W;KwYd93u*hh~YS>S;{fV_SoqC~RYyn33t(`m|Ef zUgtBXaRLI^R_B1V!@u>YwZV3~!nyf<9%saqScfog?~%g04NV`bg2elng{p7Y&8Ht6JR-y)Oi zQIJ#r)4!g3d2({HfS8|xuvxu7X(U8N3$+`Z`hUNRii*hFw@x;Lp-84D3j-dSpu|-q|%mq)L8F zd?o>eReum}a+uG_=qAzAa;i39o?Z&US^Os)tzlY4q| z33V+pFF+??BgJ)U!h zTUO(wqkTE9oA$4NRK*#4qbuP*EXRM3qPYQH9UQ6Z`7w~Ag!FVxz>R5*uKu``9D^rr^wLKGE0Hh{cOt#Ge{P4qI)fp z<=>$(p=QmxbyNS(co&RqcgPjF7}0;F=LW4HvbuJnjiJY-{mjv#?wzdyh zibPmwn3$Lh3cgf_IgXOPoweXIGLFAne zL}Gtpkbke3%lcpZZ&?*YhH`z69pr_khY$m=+pFw^07C zZWtZo(y;e$Lqjh+si=CR^pUm^oo`*~PaI2P|K!a7ejIrGFFKiSTJ4{o68HE2p652C ztGsfLzIs~_v$$n&78LvhyM(*;`E=8W{dHMj-_^)+tw zn~Tf}klJhFP!NEn-=(yhlk9?@oQ4z}Mz8%DeM&j`0qK8OlYiTOYZSu&KlHy?JC_|= zWm~yXaUR3g_wc7ZIEtN7O#hRUP*cd9WE%E5mdcjs*^U*qvZa{+4A2x;S4WiM8@@mB zaH8E#r?z@rb$=lITI`jA- zai!3BHG@M`68iTRQ@Ql1uVrTxo%DD225-VugsuNB42;Epqr+sn7yBo)5QuW|pp57)(}3|*u%m-b2qb!bj7h77FFz)P z$mwnh{<&H}=)Y(Auk`%AX%G8=(5qT|X&_XEc|Z=5z`!tsHgIfdQIyuN!q2h0w`l-1 zNmbo2$$7ikLr22#&vPf6{@;5E)bXSo2+h0IU*Am6JR#K1qXuQrZd<_M3`X%?R{!o) zq+Hu>+2-gR#Wjo5C|aH6cX#2vK@F*so;-~PUPXT=#Lz;%<#ioQYIu^Il<=!W9+NyF zQbCx4@1J*Z@+jj!eY5|_gJdQFmm?`Bf305z-?r#RnrE*oThefAAI((NY-RBfPVYk1 zYj}NQ+3eNYC~wu(mF2_LVFbD25pe_4zu%vO8ZG!Xdg?Y<0uD#*9708_=RSApk*8a3 zc_%>r3ODZnQ5hHmXlXw`{;KKDX^+&%0QSqLGbE{O&y|`1+mF@+B#a7cSnH$*BPkJF_AP`q}C6@-|5WrF<;v z82aAzdKhd#FpL1L%DjH}7k75p!NOG0AG<;sff5n3fk*qwrnCwfKywrH8U32x=P{J! z&xUV6LgHn}{fGD0@u=^A$e;gyLjl{$y9KNZDM*vKk=%0>;rZS6d#ahEmC58q2XPc8 z<`VwABu<-g#*%*Pijdzs;G?a|wgOyuQ7|u?O{}t!kdO(?m0Q!hfZdiz?^extg+Y_uiSaP+Lsf9f-ac z>wAgaCb{Tc*RTIj%ANkAHs2DH_<2>iQGX)~e#bOGc)PB;d!rnbr-$nl*!cXX&K*(| z+R}>Y)1@rRB>X$OTDxUw5q=aK+e*tl*uMdFw`jqHO=y_U5E51;A6bhTXQ4h5O6-L1 za)^shaan@YKPRL3;{WPUGNX9{Ihk7>9>q0R4qNT?j0$i>N=m=#Z@I#ygQ+tLhja1X zSba`dSEF${25VHF4@^iao*VEt__SPH?fw;0TC`<``OsLplibsWo)2(%Dwie zlL_7Hex>gCrmpYFIr0$v@WPbx%OT(5V%pwT{zVA;MAv@y)}}=7$U?l4@j?e7eXZ0ZcYS*Hp!#~WPE%LR004t@^gyB1Fw&N z^tR{4_y5K=f7CycCL~>%z**bEz`|@DcchoXVD!!<&Q_`&7tM%w)3DZdo(yM>c6?GT zCjVm=ENHy*gux^|hk!@xx~N;N*IT%q}|gyfcyvex;pBT6fxw1VZWUo7z&3m;wX$^nEEELT=4@AE!@O{D*N zmmz97Q^PD4C88g^yPXHJ+C`~@v~#@%QpW)c1oUXgf-MhX~Q@_q8GJVi?Ui(%% zmg&U^Kw1V{t7(noulU3!;j8;kUtW9j%=0_lB_sB{`DOfE*G?mtLUp#2QLifWeDH6g z^48U!yAE+J3URh}(?9x<^3L0O!;MjW7#4;C6hyw$6f(l1Q|*et8hPMb@ZUb!oP;4y!bp-H;WHZ?wqEFC&rrM@9{vpbGzO zk72xvNdMGpD6SNa2(7$Hb!oqF7{y#B#Ac?{+K|aH&G6m01cm9majj~Q-EQN55HJEJ}h^CV1`^CjwX6H3-EmUTnB;rETFKiTRzUcgn2kIT3yVBumkz}0c82Ul2 zChh#;shQtHm~tPT;v0w=~?dO*Z9XdN*LKp~n zTifnZ!hadPjp~$Koa~is6^<;X0~-L>gj9-?e94-VVH*CZ1<}0Xw0#+Cl(|rAR+;>d zl&(p>G2KJ^>+UgJpCF3aVF8L;QQZ=+QcDnE;cvG&+kH|zx)Mk>4`hUzvS+@7eV^Kt zVqZ=WZa^P5n%tf4GXe&_-+pS(G=clCzdb58aQ|!k1n}M#nrLN*9o5ysha3WMgi{=h z$L4XUpDxt7M~H<@&d%_Dpki33lS`T6FdE!DmTn0CbuN^Yi#>A-2@*MF9E@t?r+;>HN;oKw9Z%M|jRxtpi zVyZ=puP+VEACfJBJB-g}Z{s zSe#ica#RSi78@G3&kmmQKCy|kDM^OE;h@>cNc+Sq0Fb`fq3b5VQAoZie8|IUn!XEe za3tX>m#xtArl5!_-8#v?=YKJc_WfS9inP>J(_7a|u7hFmF!&|%aL^sie!|PxiaUsE z3GJ@7H`OS=`(->gYh{h~3yCH!(52H_9SWG)gRa-rSq%EhfQPyLHYJWJxy8%>-1F#nWaG53j5Ny^=g+SM_xv{U{T|H04%fsE11aH;2$=`TCV8ozduW;z81hmZVpR>y^2BD&<-3vu4BT3tL`a{C z^4;Kr?%oN0Q;k+M7HW%_te#w}hBfJWIuF&-Hw?Exav1z_C{imfcXu+H8~8P!mUXoD z>Etk~9Jy!>4v|6rewf+~SSy z{yrK(PK}t~0OXEq0D*Gdmxt5AMvmR-KY(0L4FINM5OxvuZJm<``$J&%!UfL!*5|E} z>aY}uTx^V^eBO&0K39%&uwmG@+C~YAr@4k62~Ab-3399QOsZcQ?9f7wMm|{&+b>#u z`F;VZc8>SkoVIYR6zfeqj>`E|7M+WVyWN5bM}C6P>5Bt)y;8^b=3S6B{HCW#mx+E7 zlke_t3@Ua=#ggv_MqL-bDo=j;x8321%~l8}J{JC)GD{WyDQVEHcU{ZDn#h zCymb70z9^3k-bb>*4=>;QViI;=|2SB9TwwK^D+VhtZvxZ!f=Ggm>lMW8Medcmp2gL z<*O>pt<^UXAw%J`u$JBe@6CheYP)Dgfs=@@I6f%NmmRhRfoeG^A#hF+QaoaT2Ty*+ znMRHVgIY)fj-Wt6ogc^kRa~?HWEf)hQy52Z)Fn2WQ+6zUvZ1+|u&4RgGskz;-=kdI zT&ir4>5)?h2A@%L(-~ODA}`JIa$@eDt+4634>-@hCVA+Bx|O+i_j-C>b}9mYBB_4_ z>}wO|f0*h9_4%4RnMvO_ECI9Ttbw6lz|9^@G~-Z)U{C zkasj^QKH4nZ%Pk*tNBY}VCUnbK1HVzlK2*W8;2XOEI6ID)K1F_QtnHQb;T;MdE7ai zYdF5eAu)vU*yyF?QYM^ly%bHtSY$#@CD0`-H*4)dLJ{%sis+A|DdvsG0ET%SE|Z2w zk5y!@BJyIi2ZiqsgMUJ}pOUi`z&riTo}@v2cs=9sbT!U3u`y3im-}D??==rWmHPrm za}7l~mwfOcTQVTYa27FpWc*!2PGO&nNsA7x;Pt>MyZCj9wkJ!9uiVhZ?^=C zbYo$`oNA~1Y&JB$t2O_o@x5>TH`%cVA@`7nVGT^oH>Y$iJn=HBjJxLvyu`nVI-io}vp$ND+&x3Wom&FnG7i3KbDUjrLsCk7ur`dPoeM z$?C)=59AAe;CHrD#$N;T)MUTt+QQizWO-?avS0yrr@Pk*@ORw#Dc1;T8d$*5N%pWD zL#)UFFi@A;c{H;%&|!3@QY;r8{Bh=L^VagyVRyf%z4+_b)4|StRQzu*Jj(D8c@Mtj z3>L-dZRo4%>j}&J3Q*=B=V0XBSTrQZXO!EH9NVUo&_zs@~3^XH8+rUj&|`8ee)<4 z_W2RF)s%CZoih9bLr|&QOOO?5p`ZtZ(5y|6t7X6iHDZVW;Cl-G{&~%TPY~cR#jtdA z2(|wBz=}T6>)9Y*faRMezzn*S^QxLXtvh3|t)WJ%wsjn%%xapv-!8{Air5>L8q|`b z7_QsIY$)C@F4tO}-klO$@z%xKF9s4ub|(&=t<}mQ_g6ZitRHe9e8KI0-*OQ%wspfa>QV@iag`yDapb~ z9pFrW0PMaw?MnM@Szs;2^nGEHx8~sHD+=z#fsg};Hi51mVtJ-T_2hud+^PhzgJ2Kd z*VZxh0(vQ-Q^6y^SGsxy_vA;bpFWN8=QfZp6hJIcyAL!Cldf@=54D>=I-S`&FwAi> zUHH}=ji;kk=Qef_+{McDHDF+yY;*+L8Sry+PL{{U;9;hL4Iki%(r%~;z#$tQ?bhy( z{?bLkQ@{U9vwTJ(_H?{XQZ-n-+V1>p;&uD&Smx37^ge$BGbzZfWrG46#BHjE?%4M0 z<^|?1?o-`p8U|SV$b<$~9kI>svuCeWNI^e;45}e5ZO%`{&6$|l>rf`*=={jCp;J7TTBmr zP_}WOT*<9)Z;>$W+S~ayZc$3lCne#Mv4kupse*1hU8)bef`iQK++llG$*Tq4UYFUC zh8+2g{r$f~5P2y-fwIDLd+M_4AGzvTX%^F2W0=gX&knG%zWP_Sj68d=0BwaO_=P%U zLa)sFPD&1^;ngQ#>;0`Bbz zEa^s~ssKAV6y(CzR7PS-*VVbGsrVyGBi*^A{)4W{>q#!#^9xJGcpciw)BPk*fj2vi zS1s`yos62IJxbv`xMotXlr+qOG%7 zN!+1&(X+0{w2YPJz;5olncxhFM!;4A6G$A+vbWZqd)ypP540$mB&pR>cZn}?nzvmQ zBe(|@mlVx*eL(lw@259b+k$*D$*cJ&KF&~x-IDX#GpGdM`WT*z0 zT1Sx3n?=HMKHG3BS`~DpcG-zeC-pnca&oRa92z7U17oGDmAl)#<7?rjVYZu}_Nc8S zg&D5Uo5Sj&2Q)DMy9)<7l9h*3} zhU!xN$iICBo-mwanUzN>Rl1unIO_+nbgRfANOzTA9 zCbb3m7(^7rw7; ztJ(ip1N)wOe7FBT?8aW*=M(fWRstllbvYQyJEoOsl74BaqUO9s$mKfIPz`Q^ugR^` zwZqSdKu3MWVg-{0f*%%%&QG5B8mtV0T^*!R|Lt4!ya@ZR?v8)0s;W|BR4JHXgxZmI z+eSb8K2Ffiiq(!tgUi(VHz&Do{#iSC(yynGJ$*S4SI$d0)^t(t%*!C8%sem|GAFuV zDah|&H{La`ajNMSkf#Tdygo+K@Akb((?htx%{YoXt$V$pcxw0jSthU3+3-G>b*cQH z318)qpM75=FqW%5QfR&Lb=bKB-(ILHiF7(iOx}>85~8(4$%D*Jw%ZfF!#~~_^gT2I zEyAhwLhe1PK9-&JUvu^KKV9O`+j@WW_Q`X>q=Yybw>GkKa)t_!@Fe@y)P!(d27eAg z$5mk;hO(EK1kJj@AE6@Vx8kYiO)7}(Eb%(yrz$C&-y&?jv4UH!KU7|xTl|v<;c!yIr4Yh)9*+K zU@uvsD3Ge~u|hd|bBVlo;;)kl>d@iw`D z=ShLpsg5aAy`x@N$?{92u}>0>7DZ}kKFk)wnZ7F|pxe@w!xK6u=IX0Ec^b$t_->{L zDk_SfN+|3yU0reLpLl$avzT-#!lQqSX_a_ha{-y^3ETOUGRac&IhNL+x zE{@CgjDWbN$978)V)4^@362#u?vjp9Q0}C(672F*@r_$|xB=&t6L~n}6H(bjOP_2^ z9fGVBUQSTV`PQ7#J!bo4nI@V=RP1jC@P>`n-aabV{zkaTfvDvTqw3+L+0u9^cN5Fq z!TJ_fQ5=z?tpbwg=KikcJCc`m??PZjp!AwXd2?2-Mn6WjTLnlDolJg0E!|IZGnV!G7sDnv3{951{Y9h z+qwR$ml$S16^@V4Rd{GU>^ATB8Y0?@zejyfn zdp}iP(JUP0a{i5yy^&ncQ3Pa8WK<1u37*J{TZ^1S2h=Ib$uSAmjry32Xm5{p+b&x zJlYOUW^kXXgGrc96t>(s!!HTq&;CXb4}ka4?A#s0ziSPRgb8V&6^+6N^M z4#QTipSf1cw6KwNBfg{PzQzL-%>0==lgRd> znFKiL1u=yg$L*Iwz9A3JL~w_G8cpTWGEo-4rq;2!QL#H{DGk3@`Cibjg!XSx`AvXJULMU< zXqZ+qRR}E}Ind|B+=Go-5Q{Hr%dd!64u>NXw;kyt-dLO)YXHnsn1?QtqW-Hpq1AA8BK&jiE$@3+Pa@9f(e9!tj21GJH` z+kUgxaM!gi6VM8+kWmo+5@zRmM4|P4o%Q;sgr)})*o+I6W7qwV!9jjY9%r2-(Kb;8 zAN{P_^t~VWVzP`;H;LG>@KvGr$Jxaa5*7mEPHhO6gJQ%h`TK2mw}xVfC+c>0$=kSx z<0!uvzYsz(#k}m-D!+50Hl&H5O607*WD?C|4PbW`cSL0XS5gf3w6(-8Ce}}wHh+rE ze)EF!R}z0e@pEKg7+&A-gy&b@;hn%lZ6vn(g^EDx>k3YW5J7LV6x$v;9Dv5o#%@g^ z60zvp5!Uy_4zX5ysl~QS@))!HbSjWI`_j(odD@a3X915|oy)WhY(G22g;icW{AZSS zY@NNmOJYmkSo5y5cB2L7+o~W#SO`R7V0Y(-{b90a17(E=vsZGbhE}XE^IJMjRN%*? z)~@6A*t_O@FA9PI!&D_RbpSVQX9!fid6Uo5p)rl~0#E?EVmS<#VMRYALuq}P05qd+ z!_#JrBaD+#)@_d$Mgv@uMW{ZB_Qym4QGEj51xr=QLi>r&@;C%V=%eSQG32Am0oVhG(y<0xOj)PPL<2c z_Fc}-*{6lFN*Bqt39!4E@LrtSeELMYju(?zuT-4B(R@yXN1E$#)tIloT8FE-p@Ypw48QT5hgQFT$@ z@EKC+kd_doyPE+;RJyxUy1NDxkVa|gmhMjJ?(XjH9KPYcpZocqcmHv&GZ)O9eb!#R z_FlhEIN;VE&#_N%^N|#fUhSmlwWT8@q1@!RJgq8Ra7M&445vCJg8?-N9Wu+C}!t*CQ>hH;hBr)w1zmA{GSpf2&7^k%^MTbC zgB$P2hRY5jW2h9klC-qY%&add31|o;8^ZU;*?QQ(XBAb*+|*PlUs@c5g;WjA1dAz7 zkF5CCi5JG2>#H6v1d009_Jfa;WJ}Ims(LJ~byLJOlm=QS+dqC)dspdlxsjQc^_`8p zXQ-b!+I;0M>Aw9_n+V+?^EXo6MB8iMfX$65ez6o6VHo~m({Sw@(j=xm+aleXmx@hQ z1mTC^j9xVt&YTGrOC6p#{;ld2$?uS&4(Vag=RR5m)YqCfs^143A}cSd12ujpu>g_L zOp5143s%Q2jTOm+{Kj91mMzuC9bN2E9Yd=M3^#FxM*B2c>5L@e6W?&T6nuV{uk34H zNiwsf>}WWy z+MmnE<-AU=vcGS0gNJFeL12XRl9?**OL4BQ}RQjUJ zNd!@fV~!VN1h7$8agc6C!11+VIwBg`8Q;+qyo>T>fS~Dth_EoF$_jbzK)JrJMG$Fy z(N;?GxsKs^(+IH8qv06k08hD=r9VcfT5Jb*8J!$cmWfZuMgXH6oTbOt+?-)V+-{55 zAdXJU0&|1jfjqMdO*j;a?7gnHpybiU{PO{}(Wti|3|`D0faruK65$VIU*w(`BVDCp zvE#*WFY)|&TBQsk3l-9oe>YbABQ`zb;y;t4f4$a)#w8_1Q!B&GWi$l!x|~k}neyg* zKa#Zy(M8jfiVB(l4P$j}_M<7JRLK_CJIe&=65)3Di8mJXamnd;H||I`(cpb>A`Nh*eIni&dJ#uOv0ItwsOC=?|;T6MDI(n;R|*(^^d~( zzqk*ldkNGogQaBjHqVk~ECLLeTx*In(+Eva^gJ6aeI|w!z#18le0H{Dk##2=`v%kUiKKj z`8BY!pc!G&8{LsB=8H_h!hM)CbagAtw8#sr5Oxy?@O}765=F(WImNkpnAkfTYMN?2 zOhkR|pkJiTXC`jRHZT|&xO@0GhkQoS6sM?lT%oLG%v)vk{?5_8RAzW7+3P#d`cnIS zTO-AAV}k3Gr(UH*_OCMMUxKVG2-DV!3MiV_`wC=rLx1T{{Hs+gy9OD=X_7~8doB+~ z#y+NE$DMU(`H8}!me-bo^Zw2+^UkLGs@AMRbk@fCH~v;>zke@qli*Y4I#<@{oZb#5 zo}N2Xi!zOUERtSkPP)Hsi)iOajZTatTqnG5b|8@cs^jUsNU+Rw>pDC2XRON_2NsXe zh&qvn972YD(?u8{AWEj<17!-t8AJp)FJE*KFd)LA$FKrQf@cST^?*{;9TVDG;uC}+ z&^2V+7el$=5#1Jv8jhLk;QH-~t{wvx(gk8OJhl4BoNy~jE5;9`V8~Ic1pwcO1l*AD zzMKIZAZjbLFq||$a}XnPhsX;oBufs2A$;Pg-$!BBzD2E+dOQ@!Q-Elz%8DwgId}St z6L?|31kMz)3^^gL&Kk`fEjirr!Q=%!!68COu+S>23VGmsbwmWQ5KkcCt(>jI@p4tr zO9<8sJS{{9V)5vnIVMFx!V6c4To2a*WHJ)I9k(Jw^+fs#`33<9F<8YfF;OCFxr3KJ{5m?<@I4cN%JI;A++4y0rWkjmGhdC)Uv{^T0y3i?* z)wJ#)x)baw_!*ZhiZ|DopqpO54<_zH$wEhm%^$b~cf(!RKkI%W0fSkDDK6}>-|HQY z=O!o&vzp&7a@Q#e*mZE&n9R9aVqWg-0NSM1W}OYI`9NCzU5wMl#e*H#&kGV zDOXHs!(UkXDfB}|CElutIcZ|^8&*7a!uRg7cP+J;RU8$O%0AY+su#oJ4DL@_q{3_$ zi9G)E>tjx2zM1vh<4a3bZ({s!d5w|!{@``L_x7AZc;CxiGU$H&h)-Z>Aj#{IOKtVrWlycL;;GBQvgku}e9yV5wG$~nf8}JrBafss z-t;vQXK`wb<1lpn6faQjVtACpP!~0E^2u@IRDinOkk9wL-zI{*p!Y|;a->dSl`LGY)1s(mU&4^{$fgu$e9&3Y&5j^khm73m= zFth05X#Mvvm(y^)y7Tqe=|C)3Snd#-ZtBG4xv<0Z3t6!Zhb7VUx7yJJ&MCEw>;fO? zYim#vnUr=2o0FqtQ)NVtg6F}h4Q8jt_{BnZ%{y|&`$_AoY^M(!9C^XM$+~xW7Z<(n zixO>mcBZb}_|;VD3Qo|x)>TjQKKna;?rFuS551%*##<&AP2p#t`Z!%NTpFhnBGEHe zO6=xtR2}M{MzOk6ybZ5Q#&`c|dWSHYEmI;_`NMFWURgD#k*WS=pwJbYv*&4rVdgl!Ja> z?}00dRyu}A62cZ^#k*9qsC7S2p2B&i_Auy8w8>~|&c<{;l$p=}G}m<^eTyG@A7@w< ztD^|3b8pPbU7laDu1Kd5tf0LDbwgTwi||!r5fR{l1Bh4}@(BKCxMui92-tTTU6TwE z39bYn@y&)1*uEgd00=QH3pEbeKg^q>_)2p|YoomqewC;kbq)HY7xkw1D>=n$Lc{jC%p)353Ic4=)ij)nc9(b&9zcu$H+&+5t`$A=ht4=x;>p`m(wi5A z02*=tTu`fy-p^5P&O6RKC&I@v5!{s*tQ}wVIr`^3i3ky2vkP7z6~pOc9=?Zj1f7|W z56AX!XCYX>)B?dVk-(Hy3hEEv5=aRE^!vJm!SP5>Ti&@2vfJgvTgSXKF%91x#YCYcy~`=(Fg8?Adib*wnZE?5;b8ej z9c<^@e;&EuIN^@uu})LE)#~BiQip1*twxpLzD(C4+~oR9gGj2q%VxY&`x5!d!-%!- zRVaVU$=&k3vsY>wCwZ!QHa}6BW9-#tjlgyMoD`b0zCmAX%qNW08VYA(KxxqLu7&t3 zBH)fermb@dMCJ*R6ERt|!f*6ji+!Z84~-Tq6$DehF|2mskpEj1Mb!FeRC$C;Ge}~n z3-!TZLV6l8U~TPTdBUzgC+AAOpLhp_R5m}s9rsvHmh4* z{D4=nD8MlXLd*JD!ZPbD`mUa|>58Ml%%L7Lemdpn_Tr1>vNf5KVc}}dIxg_`T~~sS z$$;CNO4*XX@V8Q5mcrmY+Ele;%h7-(|F`{1w8TOTS^FKLLWR7>*4FL^2`BCNAHz_~ zi=liZ>)Y4p3Bu*b(Xi5}i&rxbW$zf{VB7v^OCuc#e}hZUj;OX+5+e=6t=@(eD?q5? zecPTYu}mmz@c9}$osw2Ed)JKMDO5&fno9JoY@(aN@uBar6DjB3$)GgX5PQ>%$A%ws zak<65cfTIGZ|Ot%#N&QcgV%!T!N$(4!|<)x^*T)bI*zd9)_JBN4vCqgv3~;q1H=JlX6*TESbfSy68PmE5YP_woe1!n~ryRcuFj@}_hs zGN+7y6=}v$EfJv-l!BZQ8vG9MhG8OC4YfTVNq)i02N`4G*ZJszd$g7 z+eec`0)2wK&=WaqsBV-McBge5cC?8WfOkZ;2axsHZClHKOGn6OzicfK;b|r6fxiOL ze7!Kc*yq1{gy4$MvsUayPUa5+bMSC_q$5*1^P^s^FZ7Duur^t9feSsi2vrkNc!^EI zsSK4A6Dxoqd68VwNcdE7!sND=D_ce2J=LJ{vXGJ46T$ZHwtTLo3n>Ei0kO zqe*yB$^zyA;uJgwq`j3Q+J_*>bA|il!=p*VE;i?5sL?8|QizG=hkxRE03acOISeh& zTSFo~z)q7H^7z8232bbBIXqx&)W`BIN;b?((rN1%52l74juhn2w{CBF6x*598 zjr%EVe`(@VVefD#bfc*WI~yXiGVgZMHvPf%`al>&=IrCJTuba;uJgK zlXWv~{yTMt;0FhvM>a#*6zS`lV$Z{Rk5hf^r`5OhwJ*33RWU5RT(rw?8#D-Rn`-nA zv&J%iRu8wV=z3m7q+)Qp?T4%i_s!1>KR9YT5&}yBn)7RHB0jkQr{NKHkiU;-N#kca3%OQm_&EL$;mG3jU^v~EFoB2m zy-RF9@e1W`RKjb`8xmt(>69lIE1K*SJ5)lj&)rHXzO*(~aA;`9c*7J6*sI_bfE;AF z({S9#o{<^Bkl8!@W42lU@+{sjIK*m$Q0Y>UnT)aaJdjm3iKIx#+uq2un6Ib^$!Kalap~(i4QLC`PS#BzlNHG&$6+Pnjlkj z|M~Hn7VbO#^uv$vX~zP_i<1Fg#7v);ga`w`6bKgt2%r`0alj%3quXf*_q4P1S~9GO z`vjaoI0DVGV5eoc}TTkythX1zn zJsW1hp8xk$RKZmna!N%YuC696lUs1}?PNnJv;}sSahsh;F%OOfuItKaqO z{d$j==Ok0ceF#h6K4IP-3iMv6dt^v}7PG0yP3q6Yl66z~#%ZqZZHAYST6fr^YUdX8 ze6e{*>gp4j-H%hO%!ogwVx$jE;ej{zNCTUNRxWZ5Z64dJ2s!mvH_XU$J)0?lxpeuu zXONM0rmciWT;^(Lb&-9#Q#Pg(AfgDoYLbeZh}lC41I!|!_)gbrQt-hy zr<8@0;Wh{0k7fJP?N%TgMsIEnb5q{S9U=EqNtM^-4CYg8!b0FBsJhK#SSAV2!mo39 zq_CumeVjzq?P6X{m8Um)3vdLMPbRgJtJ#l%7|TC{7d5Vl*)r~{$7_CpXzx8?E%D2) zTq2Hi9Ev)M?jx}sgfFWeZKtMr(9(~SDD-YL4P|Za9xAw0m}tB>?=PuJbUgH>%P}C2 zy*5yn?l2a#*XO5&Q*q6@&aCK=YTi6S;Y8oPt$MzYtKdmeou@OZ5@e0+*o&mcOCH>X zWAWpg)BLf_?VV1R*`?!WpK2KR09<&d;f;`}0gZ4;@Kd0Bb9HrTmT^G}lAH7N&6ix! z`G<#&dk-lgc}M!%)_BS!&unY^;k$ZVz*2)iDHh?2O3+0so*oI7^fS{&jQW7$(b14J z;DvlsAfE<Z7cvsX($CnMeZ0z$UxQac$Mt#J(HHU+= z4+rQ>-7ji@ZxFjy{xsA`P$5@-^${}8na+^+UxTBu zOTo==^P)RjYdwm~{VZO;aW?Fagx&T7S?aaOX59~T0Z8GZ=DEO3hc!7An!pLX=1!G6 zSjOjeX$>0p*z_#k>8qlQmU#Eg$u>?WypBvUj`aLcQ-6QHKaOgb2&k>b7I9<5e@h!a zbMd#Ck<=FI=^9ZQ=iz+5{V7{xqTma1^Mc0<|Kv{(!!!o-zpHXJ-OvuW7P@sIyiJY$ z{Va$fM-P`=GTL56Thf5tsC6epZ7`1wbVx4m5=8*xO?Y z?+@zqx$PKqAHH5Mo?-A&%{SOxGRv z4>WQL*9B|rX4d=D_;^T&X&YowvuU{iwprWoWdNhbDNjXKJtga6=EB#Ow$*6XU^ZM? zS5WhTfH)T}X3T7rU(NhqQE{2IdGA|`jp=^IK`7_t1Ej*P>5v8p4@>GV4hKrw9RSO4 z!|LMr1K^qGFg^f|oLK098VVZ60~LI?Z%IK;CFkE8@xCQM&TdqSO%L5u=R+PP1XBvA zCW+-$u1P4qN-xXl1$@$kuuRp~Fw)Ga*YEz&p$$3O06Petz$*{|M8B1#Rjc2e+X{)5 zXa{8m9iz7@^EcIN}H3uyb!LIUOQOeOm=WVS~?M+GVltJdFz z>UoK$3m0V&-JgWQ&P^-Wz#4|ZdUrrCVTb9$#M|wXU=1J)dHyR7!5A{q6aoIDw}hp4 zXXe9$-`X}D{9>ZJz(%fMEnPb&@o`bz)YR1DbP_i+x%Anqr5X<|y9^u+yH0g2Q zzFI^sB_=^qnlEnFM+;f^?@|YnKx38z34KOtfq_JcPOkyO@90#z&8C%ucL)YX{?flglE;}Td zktwWorOvPQY?=DfJ+v)DWU&)DyH%r(reKUZw*&jDS5J3j49{u5o)14iq54wjQQD%N zz?$o0&serDda9qm5P_TfQ2tB1*sZbEhw`f z3u~;& z6uKNuW53x1!`_`#L>N#S1Vc@?zuZYUVjzFsvSiqMUl8Te5^qrLd0S*;K#m+O>^P(# zl17nAx*Vr7!+$C(lE#0d?eDOkux69�nS3h>q)H`)sC+>CT=m(VLCDzlPa-h$t^! zTNaUNQ90YytoLQzo^@Fp9?E ziLNb9B#tgf489Ug8N&rK@ZA@oSV0y8z5oze-|$wYG$C75EI2C& zz6h-?$qkMxA|ZkpTwUuq1jY5H>hi@1!3|MtF<;>lpy5Gd*nIwL|o z$TN*Rt+aLvN&YawpkA3eKAA5z6iN=847x4pixT5m{-jS;3iGo%sn~_*v=bG|&)aam zbJ8Zu1uhBuJnn>k>%Yn=~4<|f&)SB_>+JJ9_0LyjH-ZAt` z2{EHibWYyt39(eW?%xkEUIhNI0Wed@TEZRU1wR0Efy`H;hi5*+63_24272K|8l z4NnRPMPe4h36~b~m;i|oYY~z(VCH2OC>376b?)w&>?vhA^NrYFF&Z(@kR^PZM6g!$ z)d$ft<~TYX3#3>D92kin$6K8!dLoaSRE+m+$@r$5#~D)(FNyy0N4yu({KydtYexD_ zp*<6(xcE$?N_r|5cmdw-Xx{;^lghSgivQTJ35o4frY z{H&p8-Rb247U_L-mUD&HH9k7TnSVMTf)#K_S_ktZfQAHS}aU@_SL>~qNJ-0 z*b=K~ZlX6XO)V`% zbrPcegA#1d*X$8R&2Iz)+K8;QT0_^Cmrn^;-B2G>s60D;WzWNj2B_?;Z=ysiIrbzc z!R5sK;oP-M`gN_>*@avTFV}F;kEWEMo}d-61zNIny!-aD`qux}pP2LV0-F-nda1H& zZn58&A!bW>EQw*#`A0#8Jj#36v}M7fj|BiEuT=?X<>9i#Lcb`gyTQ;49BnUvv_*n% ze47OI_*t9wepOeQT`jjRQ}gg@v{)qNOqVw1EV-tU(?`_L!TZ>c?5OU8+3)a8){CnL z-C9vGnY4^9o_?~HU^MAFZ^g%#nanr-*=t9R7balj)2YZ&sTqOQNCw(Qr{T@@jFbDO zt_1ry`KtM^y|I#Hj~Vj^${7R{PXRKXk^T%=cOA8?uY? zYu2)`!KVtB+wt+`f z5e+qdDo&zehb;Oh(C)@9Wl##U%c8E&wrtk+E8W4 z+xT@&$|CVDfA3{7b8{EPpP?5!Gc8A_R&RhZwL{jOywrHTHan0BU2$ zmV3YNs6OPq&e(c01THf(HC{DGDFRSBI62~5kzC-^0FDza@cMX|WX5zSv*;R%<6|be z7uc}`ftAa?*sMi%>r<-x1)PpA1j%eI`yKI7Oiav4>Q%xQ(VAB*e3X@%pio@F#7mKbR()y+=F2I^~bqttLW;Zio-8d+!Qp2DUUeHqeHXj zE>lie*NqEVj@LZklq^00Yj3G9c$zUpFK?wyq<;KL@i{z?_PFAwhCO0V-^pg8yv!m< zMbn&9Guzz7Mz^Yx^U3n}(k?Eq{NCm<9quLHt&8ZRDFC@IW7#6#e;6If4*4+knEcCH z4%Y3=@dz*F@vs9sv5?^DQ#NO3mvMZ6SPRrTz+Kf>v^1-m<~1EcR`mvH5S`&&*zY^P zlWC#~)BQJxMs-eJB0w77#d#m@hpzCOQ{3TAk1?$e9_S>5YL2!?m{;4awa_M`LEL5G z)2*=}bRIGue;2pfmc%79QD>hrTbJD3Odl=6GHXq(oKXf^-bTf1?%8Fqpm~9zC_)*p z$uV?I1~Vf0cZ)FdvZauZ<#=(S#Jl$%9X?HZiV?C%K6R}&kFh?(ds+{_UA!o4pBQW& zGot2x`P@6TzaqN|ZA*|e2{`9Dfr49+-hud6yjxLk z>bneOn!O);3`M%c)QxL?i>OKDFo*ZTt9BiPO*fcfm5;0Z$B9iKeDQ+Om`y? zoChs0`>5aT-K2ZzdPq~n8g)P)X9-ZB2Fh#b(bz(AWbnFV2~RdQnC~{ewaDnsZuYa~ zpCNvkzaBlgPDwa%Yza&~$ieQjM^#v_VYxqyzbt2&k$~QuoUh1s3Es>Vc)j7WPONY< zRiTm)mF!r!VF{3xPCS`_EXg*nF+^|XbXqOdg4>t!=N?xD$O)R*%y%+NF5=?0Qf{yJ z6HvEl)UDeHJe&@7?rk-e&v0=c_cFeUP^K*N9{GNJ$?gBfWb{H3)qclkT-D{tB=mLZ z6s*D`CgwQ*=T3Kf9>NSc%Mm{%8G%1Rcqf8UisV|Bp4Ic{L23DhADHD&&kPq983{~6ToS=8&0DTySx+FpwAKr#3q{h}B z27U}to$qH)T+Bp@NVv0|HbGw$J4|?MYIN8Z-t=(s5Wmm#Ij{WbzQtSbK5hRY)4;BM z^;p`qS$dsCb9BxL()tAa$ejNAEyJKXAH55RJb-mNj6MBlFFDhYS zr~gn>LG6=h@Zrn1be1p=C%t(J^x$!W>BoA#tCWLa?+lx|p^2^(4q=ZiXr#Nhjx|ci zQMm(x$X5>c1qWFdE8;A9q5;IZ-7@g-t)tm*^k4G(Q{qq1gBxQ+Ic^W<)fwTRAW*Zl zy_$o}Yb^mAXt2=ca5{$9MY}3_T}x2vLUdHA&1E<0G+6^>cjt^lmucn^X32K$&yc%^ zi>&xmRnI3$g&;XKp9@T|u=`n+_cB#K@Iy|#iGZWbG3oK0jvZ*48=L)lj)auo}~p!>t3F%=g%Ox0Oen=gD)Zh~_>)_CEm#uVy?!mEthKDF1wBWabCIY*LU z`m|MFG30Gk`#3y$=qASV_#muw&o-T+=~i_fT$CcPY4Z-IJ7FV(n6`j?cXRJz$)Fm< znrKx&kd&->=B0zI=U{O}VA;>-bjUa_v))W-`MCE;6Gu%@&`8<#{dJyK1DEOiFQSkL zt&#N64>kkw!iWCr!z}y=twpx?PdV0_k+G(>=*O%_4Mb5o(yo^UoFHawp*hq>vQ5<30F3e0ShGx(E7$26;g1<3c&9xdzB}xJ&-?HbsU3G1Lvh3;Q@o?Vd z(O=S3bWrH>iZOu)G#9#uBXqZVh_H(ZzI}=03EkW9xCtY`x9%r+@KowVdQ*EHdaPT< zm<$7g`1yX=MAq}{Zzv0jc*NWQv^0L<2lW@R&!3?Mutt$iTnWhqCwbOSHRx7ynk3;eq$ay?cm&Y=kZ%;F{ZhGYtwkQ+=OW2|EZ?NL?(%YxoS9x@Y{>* zdqwcTUTT%W)JR^g(;l;7uJprJQIfEO0kg+IUB14OMf==KS!Nk#<=T&Et`C>J9H#d& z-sc61vjSH+;m6~`XuaDYHKU(azk^AG&tARV==Uk6@Q{&1M^z6_c=_s_G zPQKR`+1}A(vwG4u0pnsXa#^>HBRv20f5y%h&L(>h0-$B@Og-RbHSM!Sb_>%9`BX18 zxlJ3;4`%9w0c?AMPN&p1M#a8#$w7L21j5Q(*;ldCQIokex!dZ6;*x#&MEkg286E*& z01?V9lkCj|keS&Tc)v#HMUXYKi_vlHrU?NFVnJCY>ooNZI_XvIJ-A}4P#`jUI*9L6 zwe@T&6#$VwA-Nl1?Y+Auu#%-OBOavNH!Yqyo^0}ohpgehO$isQI$NLhP&{MGu5%0Y zxfiGl9-gWuMkzlVP@w>uOqM@MqVJE5o)c5qGyM2Fq!q3BHJ~F}Q_YCc`gYBMX{|Z= ze!12%xg$EZ)cy*|>OLh|h#`KI`sUKdAig`?jKgO>O2w#hZ%pBr;s9hmvh1XB!-*x#n*@nKHhi`zo-r!NfbD6wm(i2N0s zb8eMe8A)yInR;~0bGMx=fl5~VpWtZ~uUy^KR?vzzo0j`wLit&nw6{cP2%X{bQ#e5fmn0CS828Z)=vjESF2o*HarRieo}724$u_5M5CXP`;NVJ${OCE7Lq@wW<4Sq)^`< zLHDXvS-sChU2I(M-BZ=|-h*)kV}@BNb!le=Lm+>-eYU-S-pD5`Y_hFHhX=8gi!DYO z6%8`7i>uhomA=fPWvrxQmDBfc)Gr8HVZoUew+ zRij`=ag$$f*0XJrD908?>7W?typ?Q_P>f4`8ztF4wR*OZ`jn?p$37aJq`iSpNC6*u zsfpBZ&;8r(M0h{D^Dj2$E!WUF)$c}9w%(`|GqUhxnz0%iA+CqNI+rc#KR5lHlZkT< zgw@{F$uO;Jyu0Be{A^kbC>B@svY(_vaBZ+w!%V&din}R!Z%wU> zJ+=2GWVF4*#d<*T(#nz>czRfj;v)J!3}qc9ZdN_z=jHTa03Zjl1-E!RjD@t5Ti&;1 zZ+lc({A_w^D%4DQWRwu}^Ya{-ExR?*vLp1H{ffX@Yz{)YN2Hf;U;u)or~v~m>gO!j;^Fk=@&b2wAB{)S)nprPcRi2>xV=?$e&v!^4A0u~tH4;Qxx|dvuV^~YY!;{jyF_A>!nX2JN{bM*AdMw-caibKY48kO(q-?8Y**u8-!y~&Wi&tL40%@?S7$(EBO zj3h#z^LW+mUDGUEd9|}pqrUfdk!~_9`)&i8e(7I$abWSP4PUCR&L2Nd_N$?fY_=BF zHCoV%v+wABVqK|$|FGk;A~C-4#r*ff;w&+@B_SDEvtf7mH)PDKgIR~Wi(S=&5g4A6 zuFqqY)Bcp|by!f4`ljvK#up$jFAoM5q3d-SlbX5=>vgPI`IXJ(XyNQ}IEn8Z29A*= z=vJ;;qElfyT)h8*2IlrOBmotCESVEOx9Q#=jvkf;SS3y$FG_krhcCSDo$i9`?`w{* zxAFjr2^;J^CZ5|-=;3vbkOtmFDp1KWMHu- zB~{aH{K<;;=xnHR8_D0zOv~s9I_UoWHW*mI_11AOg=Ufu`f&0qF)7x5^?O(_&!(_J zWYZda>=yIQGEH&1xKwwNKUlvRkc9(S2^1y%I=CiIHA*jsc^(-kx)-O@oz zMkKHqgCW|?%RQP?;{?T zpKNk_mo}>R@6ofrrFcG;?HnA=D-1PTs~8s6tar?#4UQ1HQWF#V?o|VW1oOPx&Akfp^Yhzt{|Iw%c2X!rbTv$> zeNtG`DmhQ(^vB5*_QM1M&^;|IBerzt)hc0;-Ed;X0|kYajcEek)x7N1O*8b!bbpH; z6pd$^3gzhcJbY|g1CxTOTo(RJn=;;y341#~Y%zuat>?>_%PD+UF+Y#J9wt1)CgkFO z-b@pCD4Z24urY0A{v-$UtR}elknHCE)I|5^qqFlkw=nE1CTO4Zkg)3Yk?|Qd9{3)Z zQ{vGnsXw}%bDY4#!2L$D_V1027e90oo80R%qhvuTDY^bCv1T3hTxIZeM4XNXG|XoC zO5v|Uq~}50OS@jTX_72=MGI%Cgah{7Nz-Pnv-*-W$q=vwy9Qzupu~)-0eN#DI2t9H zBlxuNizR${%qJQ9ic7YaF+Bg zBfcY3*i{;&I5-MT&9K@qdqZbON4!)H zOryrylx_12r-Gjvj+-4E9AIm_dv!Q3Bqa1Hl(5WfJjZ3pt-JZ#r1|4v&GKB81pzkp zo8Yy*$pRP?&E+c>JUwiKVPGZACZD$57Gc;Fo}T`xU5c->vVbQvBoF~43bB-Q&a=wq5i3=Ey8>R#n6m_s*P>Lfo1FRRKFQb)K`tR zfCaqOS1-51;?K?x#0yeA%mRZNo+upA3!bia)yj&(1J~B2|1*$!p)9iV>o$9kxWue-nqdUDX0h@;-GQ$N)(@AcSd^tnSe7N_+fk6M!1-~oL zZ@d^acT6 zQ%oKSO2(jElB&|y%wGX^exUxB!dtDyM$d)KsSJ*7l!T|}cfaPw-n(O(L(XHF|07Vb zZIm)u_DyPY`H#L3IP5+350gw`8i7MCnV{@qc$ihC%zuHK_D@9kV+TKh$0j8=cl+ja z9cF_oOa_U!sY5o$0(VunnN>$dnPG@5TwHjq-!O#-Rs*bJu#p;9n1TEYOLql+f3aJY5$>#Rzs0r8`SpOhJ#2y>J?W_i zn|4M6M6YLXfQ9bqU{Z-bmZukvGBL1LGlYYkIa=M=S_kC&W>6d3hp}63TJ+g>)+Ybo z*EAXX$_c}+s%U{ZcTEmaTFl64afRV|p@+S&?$4S)aaoE-dD zuLMs!aF3f{4ON41d2E(E+acucJLwMFLx;;P%gx5q#o8K0SDSIV7^tWu{Ej=f=i9PL zyx92oNNLIiDzK-aqM(>B);novXuuG0v`cuN*>be0KgsS!ugh5TU3S7Yj$53}Gs~c7 zl(90bw@nUrVXf}%@9!E6@Oqtv>L5T4aNgvbd6Byqhx-yelhN`1*Mt)QyLrA>mCsX> z75E+yz|S+7ETCF4J)FQn!^&z4du9xI#aeiG1i8(0@!}u(OqlAd@w&faGwhnNtS&ES zPnJ>PSy?S<2A7t*y@E~c|B?l;6qf@|o)onMcJUm&6R40nWW8Nx)VvBLl1)|ugTD|{QvLoL zQ}mzdin$>C9~)5Ip6e0*j}CoSq4m-m8ygm!%}%79r0vq{dDEwQwsjxbCmufatG{y35wQDOQ90h)djzm zcA&^V#B%`$QGppsUNMtd(*sy<&QYuu)s*Aix3qLbQ@91F==c^_@Nsa`^7lVK%Qbx) z!v789VMK&Y*290+5(lv^Ll8<*vgQ9rS;zx z`=9GIO2Yd$Kd3|00gC$CTA*!{NDL=;`T0TCvW=>5{`vX;x#ZI+*OX;`0*^~EsboGE zRdGwCcJQbR_P@d6sEme4T1AHBV7~cRP~T>HLe$_;fFizcfbx}&aIv4E509=3L43c& zfD_0N z#lle|rR2|PhCfP?=Bq0g)pLR*A*pZTEU%6HET2l)asSK3|1+0)u|NAYU&$Takjjn* zQli|ZB=7%_eCI?vvs3)cgrxnqZsNQSsVx6oDN1wcPIgP|>1K2wt_Cy3r?OC@NI~*! zPS&JS=jQagd?F2FkZILN82##l-WmJ5QAvd5q?E*oK{~VgdwZmY7@J+Y-?Zk)3jHz) ze{ptMvOMw;YfxOfYqpZZEucO;5w;3{-b+EACFOrczTx`CG?@y))^ko9@@J&RY0WyLozF(nC6+P5l_`eKmT3I7FTD}e{&oZP-ct42DLem=QVksGV^wrsa(v~VV zDXS^bt9vVSa_`bIh5A@kx}rFjNXI;nIM{ zefKZs^Y|;E?tLF$87`7T#uXHViAzys@lMkxvQu1EoOrl_;{DICW%+mPRKJ<&yCC?E z1b_cISmC0Nl?M++tUe9JTLKLdtx=+CCN)eCF<4|8RZL~7NoYEzJnfJ`XJ zXOKAaF7!Bjy+l&YtKcHx#x$K4Zbe2nHMcvwtI&+~ro@gB!Ff6Q?K!xek>UVE)`ook&1wwHMugTUz_BTsN( z_bZ5PVu|FG4*N(ffTyKlAm@8mh+fPx%48tvw%^E2arh!%S;o7(i2yLZdnHaleWzGa z`eywLV{=*shvtIk*_GH`>;iiFRb{TEYkd#%rBWHceo!9#(TGFSBIJ{fYT4 zONq0Q;oIxf&H2@?F13d4{R~EvzfS_bhNQL*@dnc&%MfNKDt0Uys1#ximETQB0j<@n zA&2KC8%5QBw*X3EVyFuE6Yhbk*56BW;^LEA{D2|5Xt{+wLpk zMPvob209?b8IKMaR0w<*C*R1pq<(rZB%DJ9qZ9E0O`gGU@WOym z;P0)B-2v-@-u(J_et8*iGG!^A+TT1hF(JasyB{(KGBmdL_K27S1zi@J{XmpwzT7m( zu(Wj8_3v@Dy`Hso{&lfF&;Cr!{~ss%ODJ5yn6Hsv>!YerG#gl>dA| z+V7&Zt<9j^sz31EyOH7Hf{DK#a}Dw9g9EAy`4<(o{sIodh4;5THuvYnbTuyKyT}wY zuv=KW^4x5*0s`ExzC+oQr5c><(mN4=k%(B47#mSZsz^Xv8Y!G?c0Zj~P~dl0e0(d6 z6eSj~*7H*}! zIqe;I5b0t9&%NT~<9Tpys(Xr3+r`BNT_x@ov)cQ=_75Jiimms-7CIoZ&LSaZS&M-n z^zUJESwz2DWE_`#qUI_fJ!V%0=g@kAlxPa8^EnC0kCss`|55L5+CM9^KqFAU-3^m) zB@Iq!@EAuG>sVV{LKow&mrg!31$kUG_Z`+I}scajP0*% zu44tF5hljHO-b~6)U4=x8N4KTMZgv;ecbJZhk0+68QTHXAlU|CC2S4KgQAk#wqv53 zgta17l9L)F=hjU!S0fhKK;E!+~ae) zciqVx`Depur|Qmd9COERz_fVEJ%-V zYr=m;#Mz&DJ#0I(u|2|MdU_(0VQ*n;@n*L$_1f3D{oCt3>~BrtwPPsk%f_Y^qy0Cb zYhk+_u=mUo$_1GcLQM%@bha4Brnfz3KYyP#@T(WZW$%-s_YjtnViOUO;apt_ey<Ic=IdA40GJ0` zodQJ)6j>RhQwj0|q+~dOPpk%sFaOw)lb19Ljjwj0Nv{{BE_rW)1ykf}0U6%gdJdg` zJqpmv&{0Y^3WEuu_E9#uCzuhG3dnWDCQ7j*4ugqm5CB{e$)=aN5key_@MZZ*s3^V= z6xg7LEggF0LYa|#C=5ER=fGwJJpC-yjuSB&U{Yw)5V80^^q#GX6VN*wSHVyS?p7&yorX;edMAur&#W#6d;4&FfgBYTif3Uv6`T%7(#vvjAu}&wkaImw$$3f_XgM)PgQ_T0SoCvEEIPxbZ z0DK|96v`WYOxt!vQbut``3VCkg4P3HQ95W`bV8PoEE6>jBn&K5h#6@lm#ZS8!gB>D zqCLQpfw=~x&uttm)L-rTpfEbVPO(+VN#J{ue8OUdEy8Wlli?RzJdSgan8ohlJAa4J z)iJ7Pm5@g0Q%J}K`W>;Pq-3|PShC``AMdaO4t`*h2De&NK&<8)&p~BAK|w*tr%Y`_ zc5yDQBEu$c`J6*A#_Q_p0-ij10;<_!K4xcU7X=k)`z&lh`Q$>)VvNU~-QD8gZ$2%PwDU0fJF(a1-(( zZ=g%?D!6t)nOkatk3WC@JY?RQEYAuG3i4YY9UYYeqft;nrDnx zMV5dvwkQPT;lqa@ex;tTzWO`f7K|-bRHB~V@9OOh3lArMRt{QjP-0Luf7F<7XSQy{ zq&!c(pg&cDGgdxTX?^LfPBk(;-N@Cok_m@2*bIzY!6@C?!()G<)CiRP1#b<+S+~au z+2VTy1q6r~rR{i&?HwN_DyF5S1rgDI=WS4A1+gIzcYW&VDGFo_X&*hA0Dn-woB5RI zGX&9^l614&xMJs_frxy<^A1DhgE0N@OClhuYqg3DQOBNJ*2|94t~?4g@+PKV5ToJe zuk!A+@KgFa#s1jgq@Q=57HU!1l;}=S7mx)s58yIw(T(HQ|U!JK{(6=7FO(ZSDUy8uR48m0$?43X z8=uN6?jL9_@B=GR-JJ0sBIsXFqk8oZ^O!Jz2q<{e>kEgtVt7Map!onIu)7L+#)~GD zgq-PHUOhzd7_$uC-N5w4l3caY#TW&PfhiG}NDhrUZ-O(rC_oKiR3PB!f#!t6b};df zN+>Hh@Em3hM^}Bc375yk)PzD%(oFO?kPB!b@{$gsjID@zj>4oOz=^la&@7`NkJQ(& zL=vHxQFIwBgXE$_J6I3kWCx%VN>JK5RjR1&hhzn8^tC3%)@b`1aH(+206l<~goq%| zwf6=UgnEj8W(+H%P5>Hl8#g8w#n3`!G*-BOp#D&AtyMa1OfwWs2H7X~?q?;9R&XLS zv3X&j7X?K-cBtmUzQSd4vv;?Tt7yUJf+DdYjK=nT{r>>GbBRTo+n|Cth=N4L#Ka^d zXgB-a-dvluhkfoc6PD{ng6gqT6_%w8KP)Qjr@lYSQ3V$&yO@|xv8LSf=MHoAAiK+b zce?6z*7K>UDez#X(ppVNC$aVQk5~6UyYA0Rv9Pd6Nf~@qiHW43qN;UX2IbHvzsd#; z4i2`qwt{sPoUhKKldo4^SL)Rl-&%DUMt3=644pYji%Co;nG)pQ#3~<8!j} znw<<3W(O6lLDlI$Cnr4Y>|g~QsAhWq^V!7(i2G<4Yd$8?*VnIhm@81t$0BCfNdZ9g zdTDvN!g_=i0EOIf2nk=kd7P;8h>NsF=5McO%qyK#Smg8-_P?FpCAhIP0#%*=!8CAF(fR? z;Y?(=hW@fca=vA1Nj5n1yN7G&PuZi9y+B`$dX`>`X!bmV?E6e8yGkgJKplcKnr8vy zlWO89re^ez?opzHtR@6rzJCxYlRTE=dHHd_S=!ss@_S-ygkzuA&q80`$TKg;@X6V7 z%Uj{%Y4;7$BH8qNO}~shcN=8mNmKUw$NmEZfH^xgXeArM7Toi~HUUIB0LUSyG^{cC zumQe}3lOhd!VxRsv8)65!fG{IhQKE22FSzjmYp~^1(hN?+ z@DRBY4j-(F>$=b&WDrt@?$u+|os8UN`!>rKf!D>5CFIAkzWx@AUh;uU=pf?@NIf7H zL7!Iiw6`q+GZC8ra)nGpJvOdXYqJHYBlc;nz*>GwCejyqf5cRwyQ$E^kEg&xl^bIx zLKQsV0BaQfKm`M^QVDKerImnzi-s7rk(B!IZ`4#Ts8c)+CPM8OnwuH}zo+UJGf23D z)1hd{s<*4olbJJQX^eUDZeUM$x1DfuXlQ79dU|1@Ex5-(I?cX=>Yt!%j!(%0s$WCc zm2)FPLIkJ?!0kXQ3`{$i3eiBwC@AV18ej)UN1>6C*clVs_;@KTvG|UT`$a`u(OqW5 z%mMz3n;qAS>z23)LV3{5!1_pS6z(D9@5-=ftb$!jNUu(?xfSbFIp=Hd* zjc1XLgd{2{$wp=cOvqwkW9MdPV_;ys|L`F=b7K)?_N!x{CMHfnwm>k8`@*1)m>0aj|CMlds_R46fq5!F;t`25{R10Y(r?9ouA)1-FtY z-DowF{`UlUZskwNhpv)9H_G(wS}=aBPowcME*}mBb4K2|iiW7~kjjm2wafL*Iva7= z&4WEp*C0)EIT`t)A1GJH&kxB0t-dE<0)fSfj_-5jvz~svJ|xwkd1TDRUAWEX2u;U& zg**#&ym}|p$=%BjD|7eGs93vd&o^z6asDlX|3z3UF5F`%_0ycweRo*1hFtiS&3T32 z$LIb5d`d-gE!yBl@XyO8c4R%D1BI26zx}CXvutEr5;ga`o)$o3cg!plkhXB530Giq z$oN80#|Op`X}88X!d8eO3Lo_vh_Lx#1i4**fk^}%=D4Hb1IPA-J(cjM$HeGhdWC?> zod_Z}BO1rLi~f3Cu~X$AffI3anHsDsB!Qf?A7NyTJ|_z1*`oW^P;f0|ogVrEaoZr- zSqUX^R_k%aaaQ95s{GLJhs+qgc<_a@ ze)4Vx7%hy9jDW5ecyN7vUGe>UiI_#*p~={4LtTCSyvG8#WnAv>%{73Z0vTI8oSc6h z9oA@T(ET|v+j|S9pO3dE7Mgrc!Nj4fIDJW}B-8KcO{!G{BH<%hKL<7MTKG z)yJ(@9(uTY0!)ffK=YobRSkRH-r479O(2Q`=)k^2yq+?DI?$=WiIp?GI~)+NFR!L* zbI6SKUsPr0qZ+1_AQR+?V-KCx`hW?S(lho(pfIcTyMbX&MjAqH-^}f$9_jQzL-Qf3_3TJK9 zCEsdHv2iQ6+%OK_J;3gkaW7gK!F6;{kBzKb@&a2BX{^*9^6Sxas@%vzX3PUdo%%Pc zq_~?OYiKUJ5>mXOI*`u5G<2J`a(f?-oq%jYJpd42N&R-ET^c`O-9QbY6tG+y61KmS zXf6@su_u_C2eUyS=>{SqQUj*wJ3C*12B|HSa@)23FsUzBIeic0*cciby1NSliHa7& z$)yb3Y;3t;mI8F0b4TiGYjFaj;^V0Sql%3#UEGa>m$y`}#zC*zE?=~9 zc6JtxMz^*;%~9Q4cA2{S3A93>FKZk5fRAsm`S;Hc_~A5<#Qm>o^?6E-TKr8+G7U?? z3i-EnSD@~BAk)+5&%0L-IXMiCcEOK=DL~7CG|-!NUT$9>NY`Q~!+Q6_N(e7drPNT$ zhh+`CSVl%hcJ_l<`PJRsUCF@fDCIQCO_0#Cy{!i}DMhulwf*(HIl$e%VoIGOHg`Rz zG;>;y5bK?;-|y`N2n2G1H;eZn6Fh~mu=`-fV~$unL{N~Ng~hj)mX)OLkJ;u;lkWfSSGmKTQ0CO8*Fw87|Wp;vEfrq;HZE#$Y_ z5UPSDece|3%~_^bKgA9cC>ib3#H-=Ug?s#N zE%DGvh%4#Xn$I&goggi)9<{2LAKRO6wz0OpnG$MRhMj-G-_+gzg!M|V!HhB?#{Z_7 z!{sJ;r{Uf$bsI#vNkp&Cd;!yX=FP8}nSL&#+8S{7%ekisDA8Tr+;^TUQM^~{b^Eo? z;BAl36h=#0#e|f_EQO2cZ+-qNJp%g4$7xmmF3{VUzmrDcUqC&(*)HoJKwYzgNZ{4G zPfRkrRr>d}3EI5Y`*YW6Y*s^~!0qCxNVuA&g)|8c0YITqvRzz6>eiH8P$-j<@2l=@ zED0Es3=*C}s0=r~O4D?muKWo)v1nkW0G_c72_YFYFiX_6?E#$i;96s5nKwO;rqKD< z=fc*T3QwQIJsR{IwCDkL6y_vi5kAr#?&6YF;0rnldcTAGA4Q&fFKfMB>9#yK`YLnq zhzUem?3HHR;3NPVhXjK9{5Dl5xLNBHrVe^XahJ8;AV&X$l$2R;5`vrX zU%mrq>Gr3qwn17S5uK##ilG3uY)J_?{IzGNyo9je>B?QcTdV6_E zfskD$yM>wAq~O=kkPz?FJ^L!%-IrYenEM4e8l)5y-zK(q^A}f>L>w8YsI01V!CCHC z{N>9R&|CHeX<*T8%JTuY*PyEm!al22c|CfOj4})i4D$^hj-hR;hK%*L6D9IFs$^}T zI|BY2SmOcu^58LeFg&bUtf_aCS;EWyH*Xc1pIvlSC3dYH*H&EKr9JWZv5kUZh1KiA z8cV;b$IMnE5!{RtQoCm47x-^WKlbSfbLO*AM)v@y>`$If7Z)Pi)))r5OACyOx>uH7 zhc%Y%1?i_PqNU0|N_qEO3a%czQoq!BV;HQOmA}5ta>tl|^R0kDv|uQ&UzVy#ucRAk zxNKnKyPw%{ss>6=w$2+HvNF%QR->1un+11bRm`mq%hTC`IRolrLL74)K3s zH4Q7U%PU>^j7v~Q-iS62wP8Mfy>!yIe&M$DTm4o}JyTj{b)^6|G_EzFkP+wa{Ljab zb+GV%=_Mwiw%L|Kb%)ekW&0bUe&2N1AN;>n3js}?7Ke8`)b_qyf}?W3UQf*2qy$LA z@W*Co7>~e5P9VfkW*u>gaG0o!nI|)Pm;GbYWkv%O=A9GMHC|BcerV3D! zodgfwK}=7m_NtW>O|Ce$SZuKnlFV z9V+x$$YbFsqDq)Q^f|N#gqaNdVGP*PnBO!_9*sj}kwYkA4Vhqi!~%pEZI5o~N#4=w zr$s@TXg*=8B1m#M8v~oQX0TnM^Dyar?|+Pjde5kh+lv=3jxRJH(x2q#rqO^t14w8I ze{KpAAA}v}?f9sk{ubt-8V1*?Y!K00nWx|w_j=N}MjM6$nC*S4q}2R;S#RR?EcMg; zpnB7Vi*Q+egB%kb!>%p^>hg!*~MGa?8gfASEprHHCJ|;RwVVV)A^y2$2b{yoe;K2kdplTelZ10gl znBlhakrk2#_W=!{6q=|K@||bEA4>(}%;3@C9n{^k--qG6kgpM5O{r_wP^aQQf1Qfh zBB(FoR|IK`)(T%3j0P4sG8VX^?y1gngkYHM-rV|GSYEpgj;p?~m%RR~tZVwIcUa3cEPyvFwqgp4uA0b2*M8%5&>gN&27X0THj z7ToaI-19C7!ejVjJy$omg}s5L4+J8Jl*qrAdYo|H9a}(IGf_=yw2kOK1~EcCg=qtq{O| zn{~~+Al#gbypOtT1z|_H!R`8>f4`PY34M^9b#7a6?(S9jJw6_!{5I_v(MW^vYMTid zQ~iC}*sP`JW}0a^RA#3MeH3b1vJQn3SjNi;J4|gRDQazqcrPm7ElnHJ)iygitTm}H zetBss-||e(A*zG>3H9%_IAx!@#6#ts%Qrh7a@|`t{QqQh_ulR;+D|`%FeSL1CwApe zwNVy6#s`dD3f|oNBn)t9VWR#a?7LpRt=pN~h70qNZUCJ~u+Opat*AP-FF_-wD=Y*N z3+K!jTYYzpC5b}-c6K5~&#pe~2Q6&^ekBGLOc@3Yb9@GhVe^1AhB!nM4KP8akXHqz ztpsRGG`}%(+2M1EePAAA9wH71FfnT5HKMHyu(&Y7B4Rc?GjO9k$Cw;p3((hat&HB! z06jhd%mQMM7)RXcgb4$d5U{r>ADYFOF9UlVT8uz+7lWbVLKG<#MgqbYO^lXBXJl<+ zf>)Bx5R1aTVss)p12%B4oU$s667yW)(FN&)A*RiXjn_zXxDom>3Tp+}<{X-6c#HId zHUg4FRLEVWXAQQV5ga5qfE8|~>-0v-heOE&>OnjeKnFE}n(aQG!*#_@f^ndrX0T*> z%7552L-wp7>5H4YxFidtdfcy1oM+T-B33T!;yk@P#r8L9Y7W$Hyqt?`D4i#gUE1j< z9%zp3_=g$fswJF<^7D<-TpLO6bLI?Z4Sc@I=nNJc4@{=dUGEDS@c!XXUDj@= zJ@CM=g67#u4;ml~|79F0{ny;vR87|trSFM>c=p&{4`k4wTj&=+V8Bvm-T0^eryg~7Ut6woPu$IuwfwBy(5IiGyq>v+9SE)FruC1~ z*q7Yg3lE$H!9v$u6J^7)`7IU@yvy`af3%nNz>VG|{94B6l%C<)*fr!zVkY`kX*RYh zB1#4N^_}|X2r~R<7#Ns8N_e$@gb{1_$|y zsf#rRkv0&$?o}b^vM+VIcP20;<?@SB0Vy#dv z?qx6l>$$a|cX`r2#q7iHE~gQ1{8J9MNtv~y%W==;{^`&}Ojnu%tF@hh;71X$BX1C# z2c`-^PYWOaw<*v6J~@N@^DLx!t0a%!^@eA=icUb z^wgp9Io6we|45(G9B#0|m2EZw298RafA0DJyk%qcP|oO#iXVl?Q+$n252F8r`PQ%R z4uOi=ZsMx=<+&Gr*cXnvx=q(J6KY$kNX8-#LxxzeZilW=0|bL zNDFJ&%gTm!N{BYfX4DiWU}q`i#KeKM+^aod#6sSGAL2^Exl%P)cV%O;`&aBQl$lB# zCAlRzXP72-Cs|Q?MXp48In`(^%lSlTVI?>HqrZ3gf_YC5`o|5$aNw|cK9$MLgwbE) zO7(w6Ae%p|B(ql4KHtlFuuQ~5XQ$D(sj1F8M}EMAq8e^T(cF;yPKQIS4Ba3hk&qKB zY%YWEd*AznkfMMCZv?+Gtn)`#n;@6AQ37wF4N~my_q<{hl=ZOx6=!oY?4KcGc#=YU z>vP9=`_{`!WBh3zECi&IDBkrs!w@ATa0o_6Lb#$_a}8(_CMX;#{hzql4m9rq78Lr8UmO-Z)f#WwLd7@C~!suJk3buxc#{ps#UtS znr-%gB}J_I)};CSvv2VKIrHI*ynMiqCTi4fo4^yo2Jusnv&5%AAZEgv2AGjnXlthN zwYA3xKZW;Acc`=nO5UOPGU!AJ6Mz7w?1xf}uoM7DWl+c5TSqFvg`)^WVf4sT0JE%U z4N(QQbfRF#zJT1qR!~~iRhUr(G1^VNeoWE#|JPH2ZFzo${91JpE$Xr=P-_n=rwWf- zbMw;Db`pj15Yu66X{A*^={fM~PwV$BR>yBfm%CZay46&2_kGUG&HLhIAIBr@SMxk3E2c<|c;&Upn(dAKO|t=h+Z^iP_2IBxIcbVK zMKSM!vb8&>6eY?lsAlQa-yP8*g}+Qvrz`Dg*VgwQAYQ9!I7wZ<{ko)y!rD($e(DpO zCUNig4%yQFD(an%p6~9}=95?WMgJ_De@3+P6-aD9Dz0V$!BP*D5jTA*HvwugX+Z=s z05t;q^u!B3`e91qn=L%7IA<|tccS(ru$m9Qj<)fC>8k=J7Lbv}{c!^b4kDDF}PM1SC|LcKD zT9Vzp)t}2h)DzhnD%YpW&16R|KR<6iXahDKIo)H%Zyoc47v4->Jv<5Us}$*&d|#6M z=>xm#4`fd2NdVtv@*gbT2Yrh0fq3b3u_6~lg^G{;F*fg`w^uD+f)^I^_X8MHkH=FS zBj-9iR*il=tW=wD0>WPF=%mDIedDWfeM<4zsW+-*PsFg!m6pz(d6>cUozBaxq2+VJ zn^U{dtsl<6574pliZ%P@_YOm7$aXsly9ZuqB~Y;~M3wnVd9p2yaM-YEN?^&IqB#UA zz+M;474eZOotfXWRp{xqcKbNF8ixe8*Vg+)05tpXI4PLUH?A$q4fA~AbWo8Ja31ehT8kj3zDe2`y&5&}?^ z0Dc+Dtq=xkxBz0t&}`H_RLhA|1RN^^(rjgtq6Tun<|KB#NsW=j01Mwv^_~6azY5EV zjFHda92wX#DBNVZfGqctD<1^KL7DDzUq0+*wWEX7@Tx-bFp)4cp?Zy914 zeY(+kEfpVY(f%i!n}S`|EUP(RGQ&pkUy=h_hU(uIYkg`8_C8~F6vw6ys%vWgfEwS! zvte8;7-&pz5*HMyU>8~Kp2BYnQ%)XtsLJi|Fni0J@~W4cCpKn%orT4$XY+09{}@v4 zFw1S^HDVtk$HWi?`n8k*mL~+@gPf7@0ha&+AD(d$9+e!_4`Iecfd&i^n+T}`GAF_p zP!mMCW^maEKy=_y!Rku?36h4!UvCQO_4^LF$zd7f*S=GGaWoS zup7Y+!NP!B_bC{maEgG4Q7)}}gIH8}4<3zIPjeX)0T~2mcg|Ky;sgJEtYKG4h5=)% z=eSRvBn5X=oHOJ9-K&N=v?~ip+da;VBP8`$@SMwFsCDU!%@2ps3M~>-o%~!+o9f+F z(o*ANiteXjpKHyTH@yx0L(`|Opo)Ls~Z7hL3KmK~qo;_~cU<1@8& zYRz0QN1igre2G3yp4HYcs+`39;NTox zz{!rBT{zx6c1tvc_CvVq7XddbTMx_XvcU3?$A({w9P$kk5=}poX<1F5VR;_lDJU9? zIY_5LvmTVrLIH}`2(T}7e?3bTh?|5+B`Sa^9(UfD0{ zA@Iw}znQ(2K?8ivtPpQ$CJbpzX-vQuyU`*R@Vi5Wy;JJ7Nvw~BHQFc%`~kk{u)saA z3zCx8byI)PS^cVIgaZCB-~m*@_Vnlx8vhDp#7@1%p~6#y`Ik<8$A1BVBC}o*9QpvL zGKt-{5Db(nh!5fa!MK3xA)5Pi32g~WpH~&4*a1`rTw~YHai$5|37O1jBLN*tkE(;1 zFhQeWfUmukEwluZkHEsgs)vO_c*@R7wEp7*4?hLdjieb}?;eoh>4{_f{Wc%-)q4;! zO3iBMr`B~@Qs4Bg3x8Kiy~><%aWTb@4!!NO4hlmJDH*Fj)De6f_kp}<1J%V>!$ z(P`f;)NDj0osNwo)wD~X`r#$Y?cjIWNH-albh@d~^odbf!NJAqPr8q$`#ZaPl=I?z zMW$Ns1f(_E4(WG4a80x<57ra!jpCdmG#+|rW)06S_TyT=EEi|*>`8t2(6p}~l6K$J z+}Z6$kjwqVonYq|BO{H&T;PYK0aHY>*lN-1!eLnCjs4#yeE&Kg=al7Q5OF9U#AviJ z46_7i0#ntxOmuk#UH(`HZ(eoH+x*sS@xbte-ywNjVGf#DjtO@f5`fNT?|lK|eGA85 zwS?PP?8sQSYhT<`w6ifn11f~bvnchIIztGS5mOmdep5p(s&Lj5%1~cmKJ=VQ8&>37jc|&g#$@ zluhP|pBaAr_ufzGdqgE7y&mSx(aYqC(Cps#^(J=NBDc@=;$@yuH)5k`7c}YqlF01y z-skXmbxKPi75OLWCUrW@NZ_>VW-$#a>$SB>pMG5?(dC(yTDBW=%qbH_pOkWoaKn{k?QzRO>#U+wu6Pz9GgNQ^1(w{fX~QfLhQ~?0PZ0UOb&Q z-NCUg6Z(VWl(g%0E$(vrDL2zt4>K6c&bA+3zti=q7sq+k9hhQZp0-U`;=l~_9#SwF!)^>8CeDkSyy@tTfMEt&S{-8pv&@;LE$pNSqe z($nG!=Ugr$sY<^2)%fGi{5%5@G@++7ZQ**?BYAoB5$9b2|HUxpU-$j!sO_IEmUtK% z(K=#0ZEe5TAXk~$vu5`@8dH>51*LJ(t3GE(O_l=H$ggj~#P z&Qo!(2Eu6s`q$gf9EN14)o<5^ue;!yNnVFe)$QYxRO6Ykv1FiU(X0H&ad)4y+4~hH z5GE|c!mrRZzSWVP?-H0aC9$gOVvb2(uOj2fsH9UK9a%c59jBpF`1NW!Sm5!qCGW(;wf0_&W$>%MqSTNC0+5Knh;U&-O z#`ak=^4qFeA&h{4vBNI4M&x}+7~HqdC%fXSJ$)#EOXA8K_dh$v$;pYYbW2Va)2+X+ z^uqjnGk6FtPzH>{>N%7n0@$EGplT}u0%uV?T@%bc2Kf5Eo5ksmsJE_fLu zt6M7VwpNDx3_Wkx6>L5lRrG958OhN9k~rQ>2xh+MHT0YE81EMG4z;1dQ)CIcar}Ye zH+|k*&i}}>zn=JGjg6=-KG*vB^p&rl#+#Mo{ed0Lrch*w)yRtTugrGB(4?^h879AC#nMFHl6keyA}^Xt0h%;!YA!uG4=1J*|U z``q<)0t#$3UAm9l);98fkF5K8uy^6Skl@w5nOE73ORRYAn*67K;MrW9@4=HAES`X5 zej#A-J!5>+YvMJW45UYom4eTAbVzUgP;kmdgShI+{t|n>$@c>pWx|Nq3W<6!Qz- zmnJ3%1Bp745iboN#=k5I7_FYm=q`VO*G=eK3V4(hmc}z%5|HWi4#%5qH;tEqq@8Yf59?A=82IV`Zb`t+*p= znqt*NmyOeksrDoT7uIuwvzu7I&1b9?PbeCE&AD=C5XiB*YJ!-l4&hwNm%T}WKV=o> z=Uz{G{Tz+LYT=Kn@q6>!aq~u*@;chkOz0OEr;70LCez0dfe&f?+pnf(DE42vL^3#a z+@X!2voJ8v4*`EWjQ0r5=RH3-d~9^>T?x-w@8NMYIF~a#yX&Vn*SROu-IJp$Wvbe# z%yYRMXpn0-GFGjmGoUPlGvt{G{dHAPiM4~`&#rnJSh}^n>yxU{3UN+34Nr5Y`WNxxHDMqGB zh8`DFYpkkfrJ3fjOVsVelFPL+Tgoh2xW|ATzrDG-fW8Xeta=OzeR=gqtA&|y?T2mm z3#WPh13_sV#+AG&{v4;VA?hBbMHa8AY0YG&L4HmiyBZrS@QyIQRZ@$Qvampi>r9%V zL|-$1SO6%H5B=l2fX>i8rRBSiJ+rSI8@slBK5zIKDSZ-m_UQ2{4dc6K|;rz{0?lPwFGg-~->)&$(<(qpA z_4N2d3j0l(C(dRugaaarACF~f{2y;R9w`JQ8=WX=GfCZ96{c@jeyAs6Q<5>BZ2o5U z_VnD=eE+@I{0R9-@P*&;_NMGzy_!GSxc7+ZlW+OefFE!jKHJlbsyi;kfKbn5X3EM$N($Mly<6tynOV@@b z6vsj#aFIf{62r;vZyp-gFLf;X6!bK+rc4kZ77iMjU)N7H38G5r@;hHhd745%|Ugn zLGhd@^K}VUHOg-}9O=OXi@H2QBo3E2MlY|XW_^yND(i|=R=0NB%W4AT9eq}~p1%Hi zZ}#LU{`PpOFWmjxFoD)R`7Jt|PL61aGp{2^ld@T@-~48e#(1d3N>CM{)1U1!|DFWB zcX))V9v?A%f*x$F5po}I((J`}T`0c3+nfqH`}ULc;r&^?a`sY>K@%5Q1AWod-gBd; zR37_NfilV|Ud~Hp?u9(r<(ZF%=(8!CIX$yEu9H#&ne}E0%>}~et=2+PXC+(pU#%WP zHTr^E1x!2`iSPYdl)%;b=zKtQ!e16v@;zJCt>xBrvCeyvI!EA<=^an6PvHXjKi3K* zjc2pG|9pBKN2$>ni!~W6b$vC9@$qLEsTAPEp{g3vlsQAVx0|A-o?nHtOP|u@D_LnB zv@CDub6Km=#`19xQKnK-$JKB(9?(Xh{sB~LkrH>^WajDRjiar5BBv!Xpy}S$VOrX? ze-!u^++BHWx+{5obv;K<|6V`zWo}HonL)gj!!G$H7Uo5R1Wp}m>URzi-j*k)8@_)y zw^~-h8SGUf{hD)B%=y-G%eb#Rqi$K_oZoucJ*HH;N;GA2?c4gnv{Sy4KmR>TP$WP3 zfO5poghSDLd7I9%#Pgt%?OWwjfPr3oAW~&-F41jhcI9YlY3!m8vggG%u*c;SBkkyD z+C`-@f|{=I25YV1DNs)WRgtR;%zJDZ@E~0%Zhc zHS&lOmzP3}R)l<2Bl&j`mdJAiwf4m9lpSmqiGfb)TP}bA%{6Xr)Krin?6jI`QCd;F zmC;6DQn28Hpw0t#Fc(RMrZ(mVP>dS{R0zNDM|5B`mR9IBg>?WU(-f>Sf0V>VH;GXT zb(}~c9ydYGqtsA%Xi`2}9ITQtf@!Cja}=>A?*!P3e$1zG^xsG%YHjIl9q0#f(_{W! zJz)8dZGX2?Coav@N6U`}+Lsa*GA_qDZCVO;%2}1|{5d<`UVlFN(iXOE0VpW5)B2YY z{X#jSd8sBMy0nb6FuWONOXI=TCW+nXckMIoqy9OjiWf;g?;W?t{29r3c1cF)<{B@n z6~Ja{x75(J_2wgD2Ax5;^u1=7_>Ao|C9&Me^Uu20mn_rF%3#LaN8AZbL z6lHE0UR!Z%SADJOfP+V(-HpdmwQpjaoo{(peesQbldL$Vlkp%2SuC{RPX-M&xlh91#y zq;zi{$AoE8cxbc>@VrTi6U%R$aZixQLj<>@g*`cEYL?^~y=tkYI> z?E@KpP7UJ5-|TNpx6)?0?ECz)ZZ51}K5H_f#Zyo%u}<5c{N;FLWL~r1Jtnn~uZMGZ zL>qzpz+Pla+0ocC(o6TXSwHjl$%zX*u6fTH^Mib89udRZpO8yEyRMT?>Wweo@!m6g zdr&ZlIv&Lr{`?XnIk$3>I;-N%Ig#kt3)aw-mAq@abkDcn$o7&wQV<9YkCqj$U$k@R_%wA5N zFCQ;&?Hw0+U77=wF6ZWtHu2uT6KZ&*vYZA^ng#-{#xDnG{m-}6(?Xo_H<#GOK6B@v z>Wx_taBHSd`diJ{xZLdPv~~E9h9-(T6LHyE5K>4AnuA5=7(AnIsV?V(XVBD8* zG@|SRI8|cDHA=~hSK}LG8BDm}T|W(vIC)E+v~u1SU!8h&6hEAl6v?%0BC~PnY|3!z zf@HPr8H#1h(VE}V9P_@|c&R2)N$6%w7Sg+nOS_=Vr7|~b`$kggW78u~6q6Az@mHP} zf$Os=`h{$=e22)#TM~ZlKij^38PjXN$yCvE%;Q>K`Mq^s*PQIHQ=8q-C^wwPE8}|< zJnPu7_ewa?3t8OWC{h`qY(&WxAt|`NB7%K$A!m;TP~2abC{>K40E|^WpXuzwHmgG5 z$?xfZe)Bu@RD3U}_1fRN?cUsO}Z>UA)R&q>|8>bOuNbLdAb8z-18n~Gw+T{ zVpR*j@_Q0jiC^zt5p)IYr#5C~UNozQPWsHpGz&`jU{94Fe$Tx2iR6yZnrmE**(X2w z{U>dCreQK)fp}jb&t-xU-$6YxEwsIIf1BBa;Comrr$Ru(TXmP*4#7EF-xhbHSE1CT zOG{n+UFKg_I4R?b241f5Ss0=V7~8{ZY5?QKi}QLd8EnAJ!C=g>cQ_hz^x-vLztk(6Qty;%1 z&oEu=reU?zTlB44LHEz1_oE#6R2$v?es8{d4~4CR6{Q>Oe4O0^9=A!KRSL|`)n>b?6o?=*8qmq*i6K1|0o|PC_c1vPKq+fk!J{?zEcO!xs z`7H+p|Lk3Mmf4FEyrE}{NNXPN6ZLiePJmF`tac2jM+0vr%6r&?Z*Kf&=1D2S>iEX0 zcPyLolp{SRh=`*oJM5*rntra(kdGhb=e;M)Nfdd@BeuJ)OxWKqe(~$kz+iP6Y=VE3%vH=GL$((jk0B_c_^;!|`m*2jU8Bl*`;=qQ&aa z^+UZF=e<*6g$+4pqmFV%WV0EIQXNI!{d?zQ0VzT-0@qcEH3#wDiD!*xS2(xzY`DuS zH&-8XFAq7Ciq3PRes5fNtEEN2i`kM#YnYTUTNuy8)386^uH4eL&NbPNKw_y0Vh*op z1BAM#8;`dXDkNQQ#hX^Y1a7ZV%r=;I;0!H}jElhZeu~$uZt_|fjZIuLKbvf-vYtvC z{d_lzqGgl(@-&~3MQWaE$n9AMy`o`z13%tHEZ-3Q|L4JvEm0#FcN+s`2h2z4@@g;L@u|>-MWc1LuT)I*BD3 z_QtIP~pY^laH}l`>W(^orf;cOvHXI`Ze|q~JZ> ziAC9m{ft&2SMIsOVOyA!+wt^P3a%TWTlx|i8`n)T#@UA2naJ1BFVnL9qFd)6z3EDF zJ+PbnBL+X^I;Fk?>h_Hl<1o^vM?W?A{%=kV)aR|wNpCKaErR6XuRJ`MVdqYR*G!0? zeR1Eh3Tfw|>uYz?)=QoG45M7oQ_P#|Q)%*Xy(Ek^Iu{BD?q~|>rXY#kOQbK02DI&-|*Nuv8MfO5jL5o%rDKBZ|$gtzO=PGTZ*NtMhQv0%_UElS}9! za|#cI<<|MRuYhAK=G|R&Ii|OEQz>7-?aBQM962Y7j4XQ+{`=xi?eV0CTNAsI$?wCJ z(F}DcD9DK5eCm*vu@SkvI0p5guU~HyMyD02vbJwe3Kz&QxWcDh-=c#;mWF;`c=1@i zkH4c~?jZtg-Wno&$Mpq1b0cYN3VPYwxU?Vh!l%f&P!j#|op<{9CUD@IPfM69-%^dW zxngkYxgt7Mae8;p$Y<^4aKQg-9$%Gcag8K@#<5Knq^8xySKFXJmb9o~>!M zXgAH~T6TNuctItHJaYZT9y-rsT??+>(!4|HuF*mq$;4XpLF4q&q`ZKNS1Qe2R;v9` zuuk;lflj`~+rZ&&`}=ZX8k^rgQ8rzBuS>-vD(E}<&5Z3-jS0j_@}cd${g^xT zwb1C3^|&Z7eF4LV(%T!YNDj+{lVYO25C+CJ-xgNNl4(a659PY>GG*9Cpq{}jo9LqBW(wUIc+U%|~JeZK*Z5uVhBMp~8aKdZvU!i-Y=SPGJt5YltT+CS=t zXC)7IQ&}7~Z$02EBK zoow$(c(4MIbd8v6;}RJ1e|`!nF~QGYUOwD!LDp+s0Wo48k&!(T$e2zY(qLq4u|056 zjPj$Hp_`8C6}J88{pP%q_4uRpK$@tDTu)igQ@O!bl;_M5H0!|Pt{mdH$Z|3dT(z(O zP@7DU4eLK*!XCoIhMMc5z&)y(FXF0=$YoSnfg~m>K7Qb&k&ILwBsGPUGW(c*Xup^H z39lz*Ad90ig)F^Z@`nZ$nkhg_ccJg6oAE=crdYLlfJR9bPRr`FO9#L1#X7~(-FX{h zfNcC`@;ofSL_T8D55$Pa1+Dkgmgf$M8l#+Fl1%f+NPdQJB-yhJl^0O@LiQqlh<@xj zeO(k1CD(X&==O`2{2l>^M!t;&Ik(kYW#g0&yPA<*J4WKEP@g~??lBEwhlN{ZT~qX~ z>@T#T5Y9=MG9Q{rejip&K{9HV0s~QM^z@^2C3IaDrQVT8%1$n%ACA-FjgK&b?6{D& zU#z?t-#maiu6^V(F(%7zbW-G(2smG3kWcAh!m|zVEQuVN*usBHvd33*{+tV*U6G=a zHN`QX9t2>{Rhf<7K`PRKH>oLk=lodPr>x1@_x;7n(te)uq-z&}ftc?f|0+--4*{6T zR@$>@RmNZJ@-Pc*zHRPMX9F6g%^(Haxf29W{t zf?wnIiSTiGUP;W8#BmzRb%o2PUwp|iLXl!x4n^=V82)co3z#Mn+vx{B;k%J4GhtQq z3`tFeIA>3ohi-DDL-scxzACd%DWw#|+a(V|`uE@aU!e5j2RBjC%vvIP>aq%Tt z7H~f0Hw1SE*h`?}WD`VHN`MUcR1_vZ`D}CTN2k1Xvqcl$cY|l3lU*1^JC%}GL&F=MO|2sxG$ZYR=X(GUsK3F!_a2>VTw60I^wv@ZiWbbH{qTD+2$ zG{E=yB_5R4Slx4Oa|lIHF=^dWUZk!%wP`mu|Ij}GH6Ry&8hL~drxjabXppFd@yDh z1k@uNQ}Lzq@y)<)RCVAhB=@}qm;e8`d*n@!Y2)>fg#4|;oby!HX)5pRXzK#^rL!}Z zBLbc`pzY&hXrd;zHy2gz@>0;sywpzm@NoO@qRu*{5qlz&Z?7Z6N(@^PBPgOB6_mAz zb##S{X7R0J6jY`q6pvo{eyM7=XrKi-j7l9G4Ml9*rc{Z0rJj(l^B<2)Z*P8oE+u{0 zcqRX{&m9xke5?lfGM#^`dx7q9XUn==y&-7Y6nim3n8P(bx}kwlvL)$HsbD>b^Q`!% zVMN`T^%QnR5QtissW3SJvvyg9d>*oU`D6B!9e*jSfnwl#v$|4=JmluLjeVZoGqQSA zaqMjZ!CVJO>6FiPbO$}53cYH%fGG`)(A)DcZ(NhlG(+)h(p-18j)6Zl!G&5Q{cf|N ze(qbl!p!7g!OUnMXi$2T;Rl*CT-Uq1Z%O#`;&bBiFCo_cUR}LoA^R=TYCDfZuOn~Dw%zI7@?!8ivoAmAN9althRTjKdflanBU_=uC-3T$t}Pktdom(#9b#W{{KOb5)NzslR`*+#C18bUtI55kizoFS|KFG{2esjujK7bV!kQ*Qn*lS&aqF53;ay3a%EK3JHNFl!@59vA1Mkzy{IgD}GyhzO64l5r!N0O|a89v+-xQ zh$=P5t7)o8L%#q_-wj@mhVswzW=RYR+bc-M*7KZpO(f^Ev%~IsIlAeQeSIrP(m2fHM8C2YIxayMsHBTRw3SZsV{}>AbVOmEf5&f>RYj%6id3CU| zNFBqya#!47)?6&ed;Rsc|0X3v?y;Ny{KV<2e@FH958OUS(VUftkpRTqrz`rJH|w6W5TY`9|pQc~-pS`O`WN{&6GT*@pUHwJ<3w zrctp%8194@3?SVW=NWh^l7xJfC(S02dEy0*X zt0!*m8j*Jc)QlgQ5BywoOX6UgX;4qeL(WW93TP4u6S91i*P0bs4$Y`NmbI6uKi~Yd z|2&X~yYi?!lr%*(0n#IRL)Xv3dpPj+#f&Q&u&cWVEHyKyaj zu>Q=|Nk=p{UH+lunT(yH-xDlHDnh*VTOqaxm;BiIU1F0lx?k?~Evv+{UV2WN#dp5H zoWpCX8iM^6&4$|-M^_;smx)>3%T>Ne@i)Q9@NU1qIKvD zIz3;j%bYcp(mNdVBHQJf+&N=EN+H|DZ|s^m<=yyNM%=zvOsKxSLu&S6uVVZw)?|jw zC1`6mhX6vnknzMxHG0r=gG~re{MutOwDT)`M-Bd^*t8GwD6r}Y`wK0Z*adw_;ygJA zZ>`%I+#%=mmkj>5kk6=Q|0bw4iiH#6hS3E?PeV4hmdMX9P7 zKXr3=nbx}Btu{pM{p_(D}g96pzGFfF4z^OqqwxnfksenF2}TJltZ zhib;{a?Y;H=*|gu1ItA!pLzSKl{1wozIw-fiIuxV6$3w?jTBH%@;aH}ZR55u3`cKZ zbiDtD?1qJsBf#-KH&A-!I}|T>i0o+Ki}QJoXIK8^+2e3z+19cjsV3knk3wKt~fXZkrJn4^Kn1RSF|=X%Yk^gr6N) zD)ary`}E*xdXu6gzNeMkyymG+{00N-jBI{nt*vk4^d}iuPV`imumqOL&yt#jJ@jrj ztYUgPQwDzI`V_g$3|qD?dDD5DLKwEum#~K!z?YCR7x_M=?R<(*w0;6aZx#y z?|qgr@nnM|J4ng~ZChKKEIt;qQImB191~CZ1FN49jmkBjbvA)##81AvZO{p)?$=u! zP0RLodKtI<28d@T)+wcowJRCTE20Ei@vkT`(c_z=?$Jc@Vmdb9XTEedtE);G4Cu1c zhl!0Gby8g}2h(*{!y8KyF+?N_e5m5?O37KIMKMrjnzNmxVWE2rQ=^Lb+tMo7Y zsn&)4pQrzO0B-tY*2w1z4mS`W-Xfk=4uND$V*H(+5fS>77dyiPfE0#(Uve#$q*Jd* z_;6=8Yz!u5$+GUC_DzwwmxZxpbm>82Y8%z-yZ5<-jSB(j3`4B76HGJNks{!QTlxK+ zj=TWZD@C_m_}OTK+(liUOjISh#5RZXy~=gEVm*`SLoojqF3#^*!!}x$5^qb7%wYZ8 z=Bsx8_aHrXbB4kb7Hz&7K4z*L@^zQWWf!gH^_=&drPzGN3JQIL-}vuoM}?*x|FpVf z6ZikDdY9`I83B=xWHB72Whi;E9zWlYy0INnYMkyA2~F~OJe92wm$$hg-S5JYI-rHw ztbK*KNFwaC7v1!C7wIbNus`4#r^mi>V6!e)gJpkX)Pj!dNUCxn42BVYPwt~=&%zw! zUw?I)Ca)rwyabsW`*4jRdvaURQEVQ-<%M>TcziCO7KckATs?M#N)|Bu>6D62({Lzz zUQND6w9fgqRLlmOoHhH>2v__~e&4(gSNT!6Hm|DnOof}?bjVOds-x}sW+gUDLys+6 zL(g!u2O9^hr|bu~mu|CUBuBk>!lTa8d$I2rEYZxF{SbZEz{hrt;bte*b-^?hh6OV2 zV32RRy&CvfKAc}Diqw_tvrf&(a zf$I#pcKr;6bm8K$|NeAabm`}ahmykU<|1`8J^78$v5L_C9l|(R*mChqHa};wp+_GR zlEU+_*qHI=&PRrhV4CN3dtV&WQhpMcH7Ort#1YzwqCdYIZuBr4{ zY@8>u4>dI}xE&9trn#8>XUizc57EaD(h?qfqfwBi?OQ!FRd2d}r^8Fz}^Ky-sS9Y)j!Sy2{B zbZZ)3e(&F(b79Mp<#Sl|F<(#>^fqLH)rO|=t~fpHj&=6CU#KC3L3%$gB)q+vz9OM4 z+*fPn;h(!(={3|Tef$hN;TTh;P9#Y=8EB%1v}V0wr1RN>PRHa>``^Mx9^U+$SNjC0 zG2#HI3LVYLnW8WBn(k=p!W|Kq;s-xvV*<~KZOtIbquR=sW5{KmQQN2 zD(1)>=baQw$1q{rx8*&p`Ldslpdz>n=S?i7LXoahYhRb_5I^hgV9cdZMKcpMkEx^9 zk$q359O&H{f*Y6d4mfe%(>~bQsXk^0vQYJ{iFx-0X|{DlIVYyZaoK;oO|-+~{-{le z4)9r4{I_K|+s^&&&9U1X#N{yj{L7<%il6ysh4FC&vGbIe z)0y#G+Kh1p{fwZpp7`%hxDMDS6{WPJ0zoZ271Hq+%+W|D)WZ&g6b{#3*H@i}4a}1- zli1))xVed|R=;vhwS|+3qS*}>GO$DhM-5wEUH+Ix@!+1KQ!}OK_ugK9&kxe%7reOG zrTNaX>)>YZxznWXCfQfoEni3sx|zxum}PIeiS~N!GkxE30{t855Afm`jQY7thz61) z2Un2d=ABS+kGNR;?)UQMo~I@r->w&9PUchi#c<&XXDabWRD1yAC|;a;<>H-t7@FB` zrns%Bu>66tY+gb&@LV%kgChkg{ZrG zpwLM4(%uMF8w|%?S$(2<&Gg}lw&`a+^Qb`9SlE^JV!y^?(6aX3dQuM92~+?-6?Ac3 zF`RO4y3D&9!*mx9;ITa`*u!pss;QXyIbM~p(72RX>mas{I6X#faHo&US1M(%NlrQS_ zTCJh1dOsBUOQ7(B*>9Ar&+DYZkez5u-dJvQPGCje+tambcWuLG=F(>~zswipS1#&) z(&Utcv(%1nx7pXbfbQPJ)@+)#y~aN{cg^TTV)s@(ew1{=#*-!^)63w1VO2mg8Y*)wNr zT+ms*Hyfe`b>i#O9+OX(PGVj(Ot>uBC=o?4n)y76kP;$Uay5sY>T7E7g+3rhF<+#p zH~ozo?b{<)=>Lioe1aLwpeHhmJWjFyJL1_oav3T0-%<2J`}$Uk8rj9X5EY)yAl4)- zg5~$BTQ8(pHsUz;zrNP6iv;`ixUS4!a&RD!yEAknLVkOc1stFG=x_WAA5G3;xF95c zv`E>i;HBYlGR7c_<@j@-gPUxf1SKuu$dYB}Le-#>7(^;oP~uF(Nb}0%o~H4Zsl2c` zKDcf6;^-+7h;-1qBEMy9d3fu%Zp_~WV`81ndpx@$Icw?G5DS~^oNK#+my$lKrJ=ZsVy~_k-zS^IAc{gRn1jFr^=+ z^Ew?%3>B4>&SG`2sanfCj`yX;jQoI2oVo@ty)*-pM4|_{K5v(s+-hORr}nUS}|gq9jx36BGhYLQk$^`^5UT z>3#8t6sjoQdFNGIdic}%oP^4!zn4g7Tr$Tvv%EBGxLYS^pqb#y6$zLQB$fhUxAOCB zR+TBHmgh7>iS~pDR0Lk$JI6z=%kCkIqO+iZMy1er9@4rRw$If5#5Ll55A(LcY=B;O6xpcq`XsYHquef-!<7L$uoNRNf84`8#H_d{VUH9xvTJw z-rT5a<8P4oWXw~{r>pUbNio05Gq_lZLo))$;9xeTXjMvXL!Eb4leKSsFO&K)>kC^t zb<}0jg<=G$BUXFwy30_nUYnC*gy*JF&Tqq;$+*27+{=AhNpDZD%V@>@_F?>25jz~B zg^F*}WP|es1k`AbqTcpvLB(|2^Vcz{x&!JI=<0eZoeq({yaZvhU7D>G20rwnK6O^T zTUn(Oyg}{Q3*izFA&*aSxzr+$oN{k1?1}G6mQhgoWqOKabDfkoug1Ib(es&a*>)Q* zZ1WNBfs>PAz@s6)txYkK?|Tt4D4=913*Txz2P0IhqCGO-{_EfJMw|NJJd;Op;l<*( z+e;d-dk6j+U-T9$#U&B=-q8$n+c0s+pU}IaN%tXtaRrkiZsSI70pfLuMC@BNblCt& zn=sS}#Cryz#r*p?LB9$TF`ErsM)NI$4BI zjg$SaIrN{bYpo5D2ksgZ;vzIao;mJ8xejqi^#)YcRm@|$)||3@ZW zvu3wQ&g9o`!7Uv@(Y)2-ok0s<7Z^Jx(zHH$@s z`)-c8+YJm2Lckg%G)Kfu;MNKh3a@&n-NHA?Gn)EP<_04&usC+7S;{ShL z_#IorsbMl1M*>AUw`nptIS&TBqbhH7mR z9IAm9SgH9{PZG{r+oH$>t;G6@)=o`IiZ^e4ay+-_z&;7Z+QROL{LtJ3*lAF=6o);2 zII{{tF74+pw9ol!PMx3jM%!ffIm(W5;}|@nz=|oVxQ>>c$N&zBfQ5}`3x&husJe}W zg6ci=!7>JNVH!q}u_+2k{Su*!Igw0meNW94D~)|)zfs`^IQ`;P+1`AGWUq88#jpZb zth857F0JfTDmGmjFa>M*gxPvhg2FOIIm9HwtppU4W{J2sJ>9OfvD<(DIE@}9fBXCF zNghvBY#3)?vW)qRQmlAfbcUEJ14B4N?^I9UNu{*+M5vyZGF~ z75?kg|0JQKz*ac!(4zH;_S_4bej9TXzi$vj=#$Q4$n!BU`SYnvaD?yGooBkafly($ zVdd%@xmjZYR?>lfwx8AxDvcI(4v6tIo1UV*{@91sZBx+Z$*)5Ftt&}RO#eIi5wmep zL*`|N4WFmUHe&^0ft*Qp{FKrPf%R~I<9V`T-?n7WVg+2I)57!dSIwqd zxx&a5CMb#HnThOmsZzO?VCRU+`J=WVR1v1;B>fUUDRSGymwR=_XIbOv8%`LTH9<%rY%!C6O8LaUk8UyS=+!y_x-<5&ytM<-UE38y?c zD<3Sbd68Mhdar5R(`wAAsE7J@z?-~~x;Xta$g@Z487}hvi-LX&B{ogfj^#d=LOu%u z7ekii@t*e9ZHkI0iP)IECb!h;p?`lc%AUcpQr1%iR0c3U1Q=AqodnCBQAA z$@T_DYZVF~k{5J(YaCd13|E>;1qNyb5w*CK7CZ86}fxX1!n(VP;hyZDTC?*HFuG&S3 zAkEG)pXJvEpUQ{Ln=6gfoQ^NbUyhbY1@oWNyyK=LL@L7Lub)j4jnz!27c|(No$X$b zaemNIl*2Q@o?<`HG0bzv#8_>AP<+Y3EN)U#-M%xE7~nBWd{hqdt|8_;E2f~9O|?|g#b!$u5aJ^# zv1)LnOTegh5s{!sVWg(4c4Vt2-mq^S7ELn6i768PSoJ~_kP zoym8&1U+0Z?WZ-~FMm|QIVkp=b$YTlU+AzUXKmtSmTPCuh`dzaY%oHqlv0b>USm)# zwbb{d>sGs%zc{}R+m}UJvWY7eo*a^$9$)(t%0J0|M*HvbDE4mvzYCBsH5;7w0Vrrv zJeG#${B*>gO0_ zT_$Qs$AGOImJ3tt+BeQ+w%db9Wg3NqAfzF>51Yv^shh{R26whL*7a<9RS9y$QiRSR zuk|d`Ua=4=mC!QnUezNsOE-`r>mnYE?$2tZ(AC56s>$PBox&yz-(BOM`>C*zr-dv z?(`2yyNGWg4?Eo~S55`iIN4a!DbgQU-7L*3ACt=P4JF)ct`9OX%3>%T`rp^w$KT~M zVnKQmAs%kmZFFA9T5ppJmPa-?v!=vpdBse^7Zr<* zX+*h9nLlvX=|(mb=X_#-`QYQ#=wEb!J29gD9_E_9K&ozLleA{+r`3bRG}KM{d#%92 zxE|ST!)5_YR9G+XcJkiL*3C#Vidm?ejOH9^gKKm=)0arPO7+G@r8KUp-!b$Jss4Va z(OlOtxtLrUDfEeH9pt;FURQlmv&n!F^E%0dT)rzcEl#OpvR+|rr|8>PaqsN7THdR` zD(5(Sac|FS4Qt6LX+r4Rb>Yt$NLgMnZZs-SdSuWe)NV3l&yH`NNZO}# z)SSXos8EmNhvJ6xnB<@rWoF`A`npc0asqMRRBhi-vQ=Lb10L+Oh+d!T*$;j>Shgis^?-S;V;dawQ5EhE@VA!V}-{YoDFP0ylJ~!a* zt`8{xgz~O9&{V+Z`~Awq$Ij>ob#n$&Z50N5vIYw&X9By|)spIUx5ITqu4gDWUq& z=!@mAtsRo51`P2jC?~sMlv;^w z@aGNM+1jc>thH}0_5t=M1s$e* zw-cPA#9Q_LH_g(vr8}4n%1IP)&vK&;jg6eIoBC=i! zO()sk?P50)lD6rz1L z59FTpQ$0rwZ1fdt5Zx_9p$o?{Z~ zct*alwSd23+0>(V+lOqRa5ytlG+#B(3%CycjNtZ+oVbMd&{tkhW}1_&J#E~Mz+GUm zzIME-yD@($c~n^Fy}xP_4DQ?jGi!!tsVjy#**+i&h1HT{uLg1R>*+}*z9jFf?2}R4 zvl=<8eIM+fGu=kVB(QPOVOxh|BC2&-ySf<$VRY=}RgN`~6*j$zlcK6Ke@FFR5#FE< z16$T;prZ33#W!asp##m9c*L_UWc=(MEzzB8JT};-GM4O?VLb{A_L8n(gb$a`p_4Tr z-DXBNZ1u|xw`#fZbFt@w8z@u0?a87Rte(NSf!=qemjcD6xdLtmV>T*yfN;S54`1B( zdxV85_c~VY<_Sj2{-_~Mu-BFz%t_ODpzH=TD*^ISVd@^P2ZO`3aG>qHti2{jUi#02u%1-G&wa~@`h~-JK6dY`8pn^he@m= zsY$x*-=T@&nO2cp1Pi~3rb-dG8zY<=9d}AI4Y!%FwmfXMrpnK@@;GzPK_^0-kL#{x z>w#>a)?#n3;nb)lCf!~J3qo}p?I6!4DZW;bByH-=-!5wBMr?myoxj??lg3R6W4II; z61w-8(1vj9kaK+LFcHFf@I1W#{KK0Xh!aFSkFAm*Rht#jtE<@sQgX|^6kQ9Ar6VH4 zMTY4NcYH{HW^f&>{{@Kk&P_mwet(~gfWXAc3P4ENeAn+Y2s-!l@&YJNhkJXsd({9U z3h?pkIWj&+GawY$+gE&ijaUh!P>U0E-kUyJY5&7RFE{Mo9g>ui`Vka#ba4UD7$8|$ zYkr(xz~_SDn9}}{qa$G;{68Hv=7dBijkRNMkx?Jp{P=d^2>xg+7yN{G2 zEO|H#^|m}9)_LVvwH-}2%+2K$RuYto{;Kjin@oEn3BgzBq%vRcW?yvOCl_$ozx*OL zy8as5HF12-=ic!;mVzJs7ddwE+mDKy+=dzr+qyJ&q)c1KuOjBEPt4L5C>M(4$)b@& z?3{CDIzc}gEy>ncJx6&Kqw?WHohTx!Q7q0l4>#w2I#8{Uf_gFaxn8_)3Tod$ zpqp)R`d3cPTI)f4)P0uM|RK| zK~~-cjRiys#q8W~B-+^`#SdocZLT6(wTq^xw|G@2%mgkSQN>1O!U!U1=4TMCc^83P zGO4=+#~<`lRLT^N@po_!vsKj#U~=`(Q@=!Gz(dr??7VeCPL*TWKeKD{70)YfY#?h} zu1@X*|Ng$+b%Mejr>X&U)*NDiDBB0i9&~w?sv^JwSTPBO8r$GTpM!@~LBDQe7>>-t zT}$+vIUU{mJzi9arA6nepVzX{E-9MjaIM&^W{Re4F{b;Pm3(qCqKYFCDk?0H^$m*y zL{&G~@w}j}5Io6@GO+I!vK7Pig^qc%E0vi0;){0hW>;NU*5IGzM02g=p4L{4&%YAi zD*LTHaV3Q*duH@(zf0UY*`WC2CqYR6^ONi-KpmPT{#u0a#qn`4z?3^`$2DeG&moLcZt!`yhu&XHXM6_L z+Hd>-PTnZ(dm}dDf!Prd=m$6x@-OyhlulaTwX*_CbpwNgIM~=^@600W>JX46?*_M1 zlkl~!P~x?||p^zpFn za58D6Vc;S3Kv8s)N*R$qF6W3Z5=OuqYxC5N6GW6z5y}(k_Q$8J(#`lZ#ju7uQ7dP; zj1HL8qu)qDKPv2FCu>w*FYhd?n>%wAkruo4EVqatUddBCN2`H$Gt2N)37B>!G_Qv0 zL=}dm@3n4mr!2r)Vim+tM(P#&I z`+I1!X>EazS??76`wagsJiZJDP$ZR=a{vUjy83**H=2-9?Px@TVkboC;r0}Wju?-= zE;e}t`up!KLDTeq0U;E6F$;#?o`m>eBH5C5uN?zy!g}X@3w?b%09KYan!(Ofdv3Jp74isWxUi8Pk9tSGQlGrezO~y$x&tJfddfw6 z!(z--4wdQSEj_F{Ay8FD=lKA!QswUCwNOa7;0uzw{(y<<$G+WNq{;l2;2*==`s0n} z_hSy=aZB@zN#w@FcN$Od#@8_ZwvR+^MfZGcc}!(poUmH8?k`%LA6-}&7!kp|Dfi=N zp~t?gWO`$z#}ye!0RQ`-8lFb9B3kE^u$C7YDDrkd9+v3N2;AP|d^!wYV4nSJBBP%$ zFfjqo$%6v4itUQ3K2SIre~sl#1ptP$nl7VcWZWB*7v3MvXa|sr=jAe;fgvWos;&|ore7IdW9%OlQsZFMy{Ik`_t>~&OeadBAK z0DxJ-$LPEPP^vx;mp&XO{U89dH(sd9;wREl!72Up6JVLZgW0|%B($kZbZ`@Ex`9WS zt%eK8&@wYk{U+Dy>n&l&QNHEbOf}HBiPv%=AM`8;>NXbq**4Qj(qrN8rzU2DC+!T&?b^a<#{P8=3(3X}KATSC7 z!_gPZkei!}+#1cYv1=H!VbALe05mJ+03EKQ0CxZ_l*( zW4U3?pM;q<=Kged_h7OJ7ylMfaF(BTu9fHZDUAPKhf$gn>8<7NRbaV;e# z*~Inkoe^bZb+Cn$j}o#a@7->YeDywTS{xYDq9st{axv>seTpo*XLG~c5OKA=aDX#n zI>gz*VlHg}Ic!`La9`=mKuX;dWgDu|4s5B3%{2z3+9WQ>Vo`mnjAoy*kVyM|UfrI9 z!BPur?u89CWvpB+oyzZOCXvv6B%B0N;*$_nHK`F_h$O`G!6WP5h*z~{wp3EmQwqZM8cJ2norOJv|s!uW! z>dci~!v6SW_604faIflS&H``crh!3t zP2#n1Qmc>0@$XogCCD-cCi%hw*TYy|BV+J&TK z+ipHbH}Apqb=HVV(Rx~#+5Bx6XX9RH12*|U5l4ix}p`4t+1md(c?n zZC;q#V7*soEE?G14ove_#J$<7UGWjBW6;Ap5bBeZoy9BW#OK#+cH-E`pLin|dw6v0 zavQFyl1&q{z=PNy$6z@m0u5V{uz6~Bs-*WrYfUyfIprtx(#5>)@|ww9cIPnSk81b$ z`Gv1sw>XMbI9oJ0nFd1yPL#9nCo08>-knd$>eQjiYoCwXtI=H)5jA$(W*IoGPwseY zrjdSLSYWf-ALtvCCaL@ScjjCVXjke}wtf1zQ>2%BpQkwpx2#BEss8QX*ex`%C*g{03Q_q8BaUzAB z)Ix35+-I9JSK1O00fgJ-f5g@!a}6yl_{wAQ@=Ot#HqD4X6fAft)Y-{N(DSP7*y&~S z5x~_V<9ANxbFu(Z-k}u!%ZoW1fHk+eYEY-81K=5~HUI`JFyaf&S7S*7Lk z9m8G@p4K_M)c-i-zUZu-ApXmA&4o;mKqtNXk3704y&;gO0_@KQ@agGkQvD5!FF?O` z+#c2k7>ydmH3!v#g+OrxOw)JSlF=41GS{8PR#w_@uzB}$(H@7_4AKBuZfU&f6}B`L zMk)eUnw3Zw@{^hD%6jF#K@7z6%hku#VB(dkerGf>*ZEcEJ=~`8RqKNC@5_<$Hs^2g zMXAr%Eet75LMk~9llg1xI!CTDulH-(jFumqStgkF@4ujJOR1I-FftZnZk)Xg{S%TN zoF*q#eq2yK{m~a*W;f*+K|xg8(KedR?Tdy{Y+YPmIT{ILptzjp5xf%?ZRB6G_uCJF%4 z2_xn(8A=sA*WpbY^10cE;8F zhr;ugsentbyX6ejIRu*TtgNoDj)dyJIN9^ar=uDf0`WB0X`d1pBP#mI1p1N0bNgZl zxIX2g|NVEbk8FZnq%C%FB7wWH*}AtbcZICP$H)pAI(4%>hwB`ooxEla{CT;Bhl*(LV(Bt4hp4&l&Dcs0iIjiHa48g_leAAKF5SSC~U)n!V=e(0Pgp)IG zTFAorU2b&m8yzLbtfn-PvDAEV_1rygB+TdH-5m3`_N8-s#-K#Dtu{9D((4&LM%~5w zbHjG*+XILhn@TO;Ht{$dm2X z4F;OYPE)g8)8HqR+4-!)`IUHwrZg%VeTJ0>y1f>=LVqiv@z7S`XWb}yzS3P2<9Ix{ z_#nne^kn43c!m_@dE_5R1U-+q;uwCSkg}qJ{`BkOxaw(TfyiTYUY8a*vPEK)i6E8D zZSz4v27Wdc>`|PFg~1~DuTa!P(B{#oiMdI7iyH+5W6#eDI^%m{2R0W^I||Mo^Z#rudlJ=!~3H`O}sfE z>|0&Qr?{lJdmh2OAT}3kpyuVUGjs~239GHx$_vrgkC5eec0V2BHpSsA^~gEzpmteX z!KeE8Kvl`b*(cGk$ z>JV+3Z0`yWbJ=vM8mdEF+7LcSvR!&O>^36gu668?Vb%;=je>PKM~mlBAAX0;`7Bb& zIUU2MeZ+)nAzp3svB@PH(@f0tm9Tu5c0c5R&EV>ocfiYWBL1a9-2ysAxH^7E1BtWS z{?d-wm{t2%a<2e+-6A|rP1eCNzm3p>D!$BQ|I@jll=mtNH>1*AdsLR1xWf8at}&9vJEoU5j9AU=@nW%h_c1{6)`9heD>HI z&C>6Q?*fOIp|3m=r!Ce~&qz84lQo*{vnygAh0cWPUkso^5YT%XiNXUS z^R3Ad`)=BfyNxrC#tag8_0*M&6{SnH$cN+ABOGLmSC7j|BiztfQ9hx|A@BPQwnVgi zdZV&rn}%H)wUUS(afz7cohV87Yf`Yv_j|^9R-*YR8QmF)Qp9FibGbTkuio9~|5y#| z%2eS`;Q5{ZKr#I6SMrLDA><(8o-RT*!XFK2bt( z92oV+Y_H3?BTkgQ-_u;j;E1eo6a{?sK3u|Ge$qp-qL+TRipl|w zq(kz1MFZ6$f$UJ#)cZbpX|G(H3f*sj`+FVqIDvt~FxfiQ#oCLn_h||R^8+HRfxl3% z=`EwGAqZoOl%aG@aB>DG*mQ@B;WLMhM+>JN^n3SWc+_0f%ek0@Is0sIA@ms-ROLt6 zTXWO;XP$k{w9nG0)ueByN+sbGx359p%@&gmh$6uo&UL+Vpk5}z7A+;4rjBa=Y)8cV z@`V%4V0?smrhWCq{|G^8%lyjZ! zZ?lAxRX~H7q;%b2w*t8Q=I&I>U^<9;WQ&8NPUVLxc0C5jEMPv87{8epFFsmOz`C@R=w8!#omXDC4Q)Vs4Ae3DzhI>E!yvQK2_IRdtlrxn47Pz48yIKBqPGI&47VCOYc+K ztjr=0)C{Jvt^OJ`(@2x0+bz=l@+CG_b=#l8Re^sOz)tq#dPi)JMQuP`3KxqhZymsQ zfO)R0y`UO{k1)vatoDD2m9ZKLZmx%zYeco4CB1~Q6!1L;L~!`BR@6+4%q`budlut| zyFmlevFVFE^vC+>efMT3W6*vEqkLT{&usYtVjec@W&A#!bj}FrO~!5%nlHm#gL#h` zIKi$>H0`bD56SZf^jg$TryDbZYATx9tC|UXFV(IzbUV-OirhY(xb&6_wm&2XA{%ih ztFF0qohXsJE=M8U?6#dH@rQldw6BlblQpK+k4G_1+I!mb96fBLtsGB7VL09D_KG}a zzlS!TH0(g6WxkmYE(_5$2SFuF&o_OdD&tSrC+faFycY@*;;9W``zuKV8xO#rL<^{g zGv)Q}k6U9@oh++8>~2O;;gEToeT{qbb=%v+M{;(U4D9tL77BEG>K<#}CB(`G-(J(| zmK(2irSNDyh(5QbJh<_}lhI9h9ioNvz=BS&ANHT%i8g}jxREb5&|a3huLCbiZml7t z)S0M_r}xb%H!adM#n$UMrn8pInyBrLO}1T$(vlQ3GHLd2!?JeN_$FT%pfCtbK`T7! zpA3(d=Y`}T=-Fl_w6KZlZBn!nG^oYG?A_BlSJipy8XP?n$Kc^!{px=5 zG81=q465kr^ZptBfM&4-*JRIT*~s=^zT2F_QBFIa|DC|6ON2}P@rpX=F`8hUWA&t0 zUF7~G?riv6gR7NbJs6|J4jQ;CkUkR4GW01p7_LBdd%!tF$%LNq59t-u ztC~)EgAW8n3oL8$PIhZM_vr6m%eSps2xxh%ULNC|H~e8OZ{CN+wuL^+v@q?v+$bHL zFhA5*xVl>nevl%^2p1lzV*rzu{9X%3i~{yHP-+P{I81vu24&Gd^Db zS`)YWfb7K9s;9G=;?;RsF(J*7PjY*5)O?ViAz08Cm32=WJETA!4#D6xG8%M$6(5o^107MB-f(2{cT##N^n`+<319V zgeudI0e&Arm)T$?bGfCX%AQR%>S^s@yZYMYnFxZ^Zhb4+>HG1NdNS!#9X7^ebv75- zJ5x7*2!7GDT=umYLcavHN+08xW#REbQr15q`Z50r%CWjw^euON_!VLd?ks~eU+@t^ z$KE{xwqFe%QYUuA+bWM=r@mU?o?05(en6wzU7^T%-lzKVbw|$-O2F%n!*{Q(T=5Hf z-y@l^A~;w2fqtzC`fBCxbux3OH1&7>+cFg;p4?&KhWy%Wu**v@$bYe-;{u1#QNTP@z|&@H-#uR##Kv&$YX7P?M>v zrY}PO>&IpOwa6Ei0IUJc>xbe;)l0VJx>Y+pf9dC>H_JR^C}GQUE38{wAApn>EzJRi z;QtDEu)}=^cld|4kADHIRdKhz^kTe+{Q;)X%lm|Z|4sy|#eWCr-32`eU6sR0TcS^{ zs5xNYSD|a2gH$Qs1StoCjv2yt}acJwf7Fyt>KmOJ*BUca~Ii zKaHh^Jg3IsdP@?!Zu{e1SG=3azijmbkH>f1mlH;b!8WG-!pE9_PCgSDS8Q$T4GYPe zQ=vS<*q5A-v_@L#%-fE$v&n2Rdo2E#E^^&#`PKXtV5`R%e)Z<&AfyFK9r5f7!tm@h zn*h1}fMx;v3B$V}}v zK@yX76zW@?0I0THRCHs5Fl)9)wMS*WxbcY1xR#Ci&saErOz?tL5a%VNYr!|)dOwGT znEsnm!OVySYU(ApDtSlqk)6HnGiTFh%j-tOz;CG0(9p>Hin<6_q1>!H)uoJligX!V zX6N*8N2|NwKG8?GNUKi_^^@;}_oFuNZQfYBTF;yQn}C+77J;We2>=i>8AIVsk0;R3 zcr-jri6#jp6)agnnFOZwyX&o*A&Cb0h3rj~?Obr1gEyGr_WYa&NZ17O?7wt_rQUHt zeHhQY z?v!;gLS%z;url{gmwszw`6nz#Ym$q0SnmnOO1cxx6rE|qyk|rI|BknR63OGp|I5Hn z#5KyL8X1QLnYYii2?@~fYmbdbw!gbx_yAWIiUKR40$((>IuUG`L7RFD`SIbPwl29C z_XC!q^I=%Q!HR6!-EmBT@i1c=O8&Xd5_W=G{LMGh6B8N%AB!JC(ur1&^FcldS-D9iE1fy$LRkp?_ z4(b^{E|YyaMr0A@{E#Y9S;=X{lXmhA2B9|m2liOE$SBmu&}=XKzOZ*777LY}!?YC0 z;mp-|N^)OeRETbk3=t|f($g{Paz(~3f(1gYjheShq%Psb<_jG+StWpWTKuU}0pap7 z^7!YQxBF!-Aw()q4=pD^q`(O9Pz3)Zc-nt!W1$OTG~XigF1D?F{0COO*KyN@#V9Spq^$!~mvtpvT@p2a1!P*Y(Qgomi;i-q>)s9nh-7Sv7bpvf|8fi=FVD7+3{_+*>-b zeP_Y0s!byzkPWNG^J1u?JU*!CUw8qc1s59x1<%`O(-{B1>75SC)dV9CPVjv&BX>#p zC*H}9Y9e4SQgQpVP^WsMmV0RWf3~G!-m`%8faLQfjqFt(z@FPx=Uhr>_ZdT*+rx)Op{u#P|MhWI)F~DVkbG=krAE+Zh46TpxRl zGj(!>@)*0jP-E0K8X3-_&qYmc^&w*06+v8A+&=Zy#-Tr0)PdG>JjGx9m(PR zX$4S#Zt9KZIt7Jycq-<@*!$T5qB2 z!QFuHp~w9v_UbKhREFHE6aRP1x$iOkn|tWe3UdQz@HDerw68>FzEz4r(%n8bS2ztSQ1c?6!Q{O@sHnW|>hPO@5s_M^+zBwbLzQf5Y_(nq83kKM4x@1g(#9*V`_CN;HaOli3Iu zgBqM)1{C$iPb4O^xK$i?=P!{GC;AtrlInv}2FSmo$~J$sK=#wRP2O!xE7Z-C zI3ByCSh=_$85Z>D2%y72w%2GbWrIOl2HefHEeBXw zCff85?rB(@kvU0eScI1SE#m0ia(lEAhur#VCtKP|>;v!7fc&?6nW zr&b?m*-yr8R-;I0H=6C#eb6;ddj8nOP9rP%w?Zv!5aODFBcYz4$$RCuz#yQ50YNLY6KpUT zu@);;r!Kgwm(34F3T6X(qx26)H~1A)=@-_EmWvA7LeYbfdyfd?0Ttr+ea8ZVj;YYs z%DZzw4ciLS_%0Z_4+P=|BT#26&RcbYGr?>A@WA4Z@_+>TXyC+u!ZDe|)cF()v2u$- zT-2h+c0a((M(XqIOU+W857kN3R@I&444(J!K3SsqPtw2J0}zGu*IMlXYDsrdk@2#i zrzgZz_GGi?EtSy_AgRq(X|jWaLXewkYfI%dpDM=2vPp$e!@dIqmg=34xOjMwn_ zV1|jeIZYavTV%uHD#jl#DA?aU;WaVnX_%T$CybIIli$v3CPcsbyF9IqutBdb5i|4E ze|2VnP7eWb2~i?EDZno!Ao{ASvy1A zBK}p=ZSTB`CtFi2Hsd3RBTxGttn=+QU}+Q=loisViy{AbjpqSa`Ll8ki+IP=+951m z`H04Jl;}WM_xNf|iafXk8-V_Sr*0*qa?=S-rnyKxcMQ6_Y>lSk--Coqd+^cIR~Ho< zur8M>&w6lkUi!jj-hRD$jhUjM_`d$B3CasX!hWel<$6S}r+)(+7Z+QUnPu)MQ}O{5 z%{QpbG9l(1fw$c3EG}1+OWVzv?%hosWS+PtOUE369=~%u?t8@Jk6U{dG(IvyWI1wA zC{LSFUugJUX?UH5U(t<|in zpqSpt)7x>T%;DV83)mZOZr_;_PX8JnIz-xhBb5K(Qp@rXo-u>t>h_ZgCmhPzgV+5& zJxRh+v9B0^i)St!N?JP)rOp&lf^SPcYKTv2&k-aj4y950t?=W|Q{De^0rU%>AaMLp zhuY3ZDteaAtWq`S@A`^WTrQcn8a!|eUZxI|t5k{_;$RR2{=-rRYo4;IO ztG!r2p3{D^GIiEOV%aufJO0mP`JJVB6pC%LsNBJhIHx5FdneM=^;Fq>BKbc%k^s!$ z;YlD*agK_P-fxYU^#Z2A4nQxt*yy^vw0rqPLsJ1T$Am>h0FnSRqGCkrK{-%tu2*>- zaO2St5D>7iu%MOr2N$?nGkDg$MS`z)Y(#(GE0J(r5WR-bRaJdXt+HhEjN*$Z`Cztx zEZ1iqpU6g(z^BGhUrgm(ol!d0I{ktNLR8U z|Jf@@qF^sW(&}vd@xxKT+&5$pN1Zmp8~Tr5o!^4K4POl;vo$9d%N6q`*GJ9rW;x4E z{f?RY%ntV!yD$;|>vxQ-?kLbvp^U4Zns}>Y>kTr>_WZlcDQ#(b^k+*=j>~nh@yRtf z?P%+hC4XZWpASEgcy+>(i%(ZWA>r$l`peKX4;{D7eYs}@#{J_axa1O*N}*|rb%{Th z@VUSpejWzZ66SoO+gb{2ionh+aJsgiCh6@8a)cT>hqz+Zv_{p9Jj^fS#0}|$0 z!5DX)MDF2D@hY#%yNUHdZqMycC;Q8`-z4um4t4(w`VLkHG^wBXS839~q*!JLv{U{6 z5cQ}JcqxyQOQ|zdZ}smwi|$0eV5$;!c;8IU`Ef$;+cbx_Dp_HFoa$;|GhEdDRK^3M zR(<=+FO5e)K!B5^llxKJO9SrCf?SFdC?Bd?XPal`F1HZe9h(z(#oF+iIJGd6g)App zvitbvvTA9lzz;EB%ner%d|wRID9u4s!M~ln5Z57)a^_84$T?#xsc&vf_)5Yz|>>2d%$Z#@{ zIKPM>V1GA2(5@TYes#=}%~y5S8c6FY^Q=DHBgZ4X9|oWEjMN;b6Pc{g(Ttq7!gq8R zhU0p|Ii*D5f|$9q`eJvpJUf!EOKZkiI7w))zdIp15XD2{Z0LqECkQR`9!|f45Jhk$ zsG#D_J==krzvMr`gqQM|daoks!W+7I|0t&dum19(8D+vxS?=lJ4cFRXW!3Ks%q zeblf~%+Q%(RwvmKNa9O(Wq7*I<=$IYCJ$7 z%!mnbarS#7UtWTs{Bvb<_yF4O^71k?EFy>7Wo}4F2teyr``TPvJ7vc5=g%MNq>=u9 zQVx!#xj7~B1V%(u42+b71jPkkU`*-i>J}-arKG5`<Z zmo`8$0Bg0{`48(C}3_ov1g4lX1&moQl0jc4QW`arYEf-Fw# z>HZ=oIT;-T1LNbzt<%%5pTlNm05MPx{>P7UD&;B_0I9V`yQ4VAB~Q-nWPujAbz56o z5OoqY3rk}eJMuqS14v->{_^yUe6vaW@8EMX+eZ#J?z;>*TwfZ-u5^?-H!Bsde({b- z3%;pcOz{>@8l9%KHm~DJb|V#+{gH!1=qZu@LY;QEX*QxBa&xO;$6(dW6h$Kg^CK(U zKEvMfJ$3UV_@nxGyz$a&X68Z{74+n0wq`FgR-m~B#N7NSDC!GuMZ?L4v6O46QGMgd zBAaa|rbpQ+vmNu}@a(9m*wFad%x#tbzT6iRbe?C(H?`E)Ng~I+v!&mu)n~y@$o1@B zO2cPsHrHB@$C}pa)2$WQ&()H@F!`83VqI5L1>*2B(Asb7KJCPd!^)`DE_GNf-fOYd zQGmo;kye0p!dEKRQE$_{gN}qB+X(D_^&csdvfEW1$8C?$#OTw_v)PRRW58*KE^JPy zDQBwQ778jPxiESm&#(oB%LV#Dn8I~3Eh{%5$!T%JR*uD9q(eXFftg8tW_Xt0+n%;+ z#nz>>_nkRA+xXAc$F2@3f@!3Iw!hPm6nrjCzUVaX=lk*w8{1AElhV*}FU;4+jhDRJ z>&V2H>kG@qg25>GL-v44G^4eRdf)q)w5QU~_RsSqnr_-2ZZMHnB#e8FFm3Q9Z#i9% zXv=LcXwa*ci(g>Z{YN-TAwlq$?^>ay$|vO-n$}g#&GWAs9rG$@&5BhGRm~O^3tfVC ze)abo%#jSA>*?Dz2?W2Rg`!qX-uDswak$@rbl-1~rFfpVq4s$D()Lu{bHF2I+3HKl z2TuQneC5g=PxFCD;&I-3?zf|5Bu%|V z_Yd-U;*f^QgYOfF?$brO2846#oTO8$u}Jl7f(!Ej+!I5^f0UgAr)Hy|io>!RKF!O; zCy;b9r*w-kYF9X`$@8iILsyAz{*Rb~FzB z^os)2W2j8+?;)QP*l+hI8;Z)J(C?slvir|0+b4O_!CSyueRZgD!vQ%Ka-#uR^S4c5%@aFd6MJ*3{7O$o7{hsnKW}nVh@;h->5H<7cT8dJJP@ zW2sUmqB7f;mzTS{yY21m6A_}{zbB@on3|Y4w5<&%P!MoAiRtS4-W*OJOy;Oo%nJ$% zHZ?UlJ3FIN$nbbSHA<1s1}g&B^!N8aJU%KbDWPFtG%TM2*^o8prdGcX(217GQ9eIE zudJ*%I64AcO(OINK#zcgh)7IKOsCVi^a(V+^A}LN$YgO9DN(23krZ2fZwZ% z3VOzPHjBA}l2T$%k1s$8RaBhq4?{I)Svi_586O+t;pHtTC?Fvw9vK;_udi>_vH|Xe z|Lu9Ujj6i2I)zRf%5Q6H3pfd|0bv6K<4Hk?1bm&{-QA6iEQEvtq@RsVO;1ivL`6h& zTJ7=*0HVI&-rkJ{2m16OCQBg_K_-NPlwY76ZzidA5 zC*T<=Q!9&_%YG#xDS+rSFMa|rtHZygH9#TS#9KR}#4os(A?GnaDm%QmWU|qTbqh*b zTlaRf17;rOz&_!F%GQbRvr*gp{q<7G{$tW%nPD%p1?rA#Z3&|ACEn(^n%VQo#6254 z@DqjpO{sd={<8Pyx3#=-Y9KyQ;Ol5)Bw~@!H$Qipr{pp5nYlYB%TVxStMuBlm}Qav z^3DD6P-$lzO{^jdY!tCAUvCW#dO!##7AuA}txdj@*1!wn7S?c~wZ*OX;Br|p{aX73 z)pbCOYo-wt2xjdQH=o8X{@Z9j>M|1)h!TBrrz!MO{QM5xD!+;Py1y-eA0vWm0D;S^ z(PX@FX5DdSd;=Hb)byumY5G;ZD}A>{IntY~7%ZIArwUoVNqvphb8G02^) zbRB$fOTRv%Ru|GXH+cTzO!Z^KXQarF)9EBQC7|8KL=>j6>9bGAWAJQa*`K>&7Io#* zG~N>hn3=l)+L92NXggPz28O-q`6^}il6+*FVGIc{=s{A|%A>E@a(L$Ej0kSVW0a5P zI;;Ngy+Fn45H9%F(jpsv=O8Qy_#$jQ)tuc->5Tlu{gaVoWuwnK7rY%7TpBZuuFi`% zLfyLe>6$miM;J7=Je>%g9YRzzibeZRq z^|JT?OW!?dAIH{^3`RvF^h~bH+riJD{R}Rj=p#ZS%`Rd)9p&MAMlqvV74t?g!aoOBQMRvAm5e%pXYL8xcnlV_?E zi_QLWyM8psq*2o`27Qi(#vn~hF{ZY~+pC^a^igGS2=DD!!>3E;KpV|2D_JIo+cUN) zP9ZVN>xRX3Aw(#g-%Z!EQn6tjJxlYmUcUdit585~T?4vjTL<=uzl2hH#wDZ;)l|5h zrEOdMpMFB-lnhVMo1Fq{ate})z!*l-?njLLS<&tGC#_SXGSjOEVy|c!EPKr6jjRMl%BqIQR-GyTp-L4*~~Vl)RHc~ypIdyq_~gN zY(351>SxP)1Gdza`*1fc#c{00_N&NveC{Wh^Vf6*EO@UA1Lh{a&$SgM{gFW%)e9{7 zO;mqMXd%&!>{8|&+f}J$Ccai$J?dT+1@AWwvn=;lv|hNbq(sSKiwvi@MTy5yeBb^K za=BpwQ(u_4j;$a$I{Va9IR>QUdabg<`z6rhX7l9WO($Y%=kF#C=UW&0KS|Xge6dPJ z3_z1UnkkA#9MO1sdIAXZ06kuw67BZ(b|q|X-AA{kv=jtNlPaDz!$b@QL=oB9I%Z~+ zAcnND{e2Th$Lp09jWj7(5MD_)A0MAXTW4!)tE4$_HK3sYrDtS3dhvmP?SZMWF<3xT zQ&Uk<@gNPzg5N>;E0oQt7*6E^_N(URWlv9f-8r*C6p zqpvTosaZk$9p?{i!tk7h^8wlfAQ%85@(OuMa`MXRDv($L>lv_8DWDmfnJulXd;po4 zm;fqH5Mi8H;5Yq)<1ZK(5C{YZ2L}TkeFLk0twxt00Rf>&W89=c{cG}f;v;v-FKB3y z1pIvT^fiE*LpB8QAK)GYQYsc&FOgzeSoGmGPQaWU~ z;sMjVIt8r^mg|A*=dcAx?t~lZSe8OD;;LzS^4+B8?3L=!l;lJ|?u6oT8w#>c?JjLV z4tsS?H|OL!o<96>f?&V;!j~3x7xV4z76dh?2ER96S5qCPrm)quGx}i61=4H|3>v~Q z5x8Kl_Q;x7lSy$hQ0eXZzD+jXFS^hdlIm~$EY42YxVr`eQs||g%joY)KeW6&?GT0U zf3Dv}D!Ibz$r;T9`I(iT;QCnen|7z=656=tZOFZCiO=d%PQ00Ppn?3hZgTkA`I)(I zjZCTp^4O;+++HKz8rHhbeS?@l!;c00$+c9$kp8HjY^pIAygH&#A3xZ;+X8&#UaIib zdl9yX>Vw;-A0PMUy)znuK)K{Nj$AM*Op#EF-@|7VWiN8ur0~x=3~Uq!i~GVVbvi0S zHYg`6g5&P!MAAosSGP%Md95DPSGJW@YDfx07zMzKJADHPBx+wM=(N5TuF7V1PNgH; zK?=wpHXQk4lSGDaJ1ma44VMa=6eWvsM{Gzu&SD|>&{>1o*+ca(`mkY;Zt+hW){^oj zV0v34w(RKjmJS%J^&e}IsUT`SuI|T~aN*)#6^qd?RP)z&@M$fFv;lZH{M6P2-)vsS zd?NS?5Yw1^r|UcC6uINQbg>!DU-w#XZ&CCSoo2P~d11Ml6YnW%#;}&toexh3J|Ojt zc=d|0yNW7XV0GMU@Bei9P!NQ`=P@LnZbx!_Kps?<@HR(9<3rr-^GBlyd2*L2o!@m2 zM`k9FW~f@?w)5qnnnlmWBc-^syVXQApRdnk<+|bdyq?4LsS|}Ct5P@aQwMvs;4ju| zd|a0G$L(b&bzA9fXS-`;PwP(?(G#cEfgn4mKGXUdTwT<8gEQ`H-vS-Cp7w`%r?Y5% z1_vd%x8h0~d;2}>MBq*$#cQmLUziSloy8REuh4oAE2`W5DS79ww5cD-ol~MuR#>DQ z^B(?o`$BdBy~w2F5%w@{c(|-fm>=42ZQb*kqanOvxbCgFaYlNb0&eW#=Z_|*leDqb zZIg>`#lr{A8QzU0@tx|-<@*(W+65ug$}q1N$$`Pe_@EYAuM~2?$V8ucbAv&thh=ZO zSt3llCl@P0#mdk&+cB@8hAP-`sbXx2z{Q5zfV+#qdgG}ma^DtNrz1o=7>B_TOE%r* zQcNi-{o;(fR}Ce1Xos4t8(o{{q)YJiRaDYeWbj&ucm2KKX&E(${3gy5#Shs#vWW&n z>2_o3?nLX6>3vIxFAL;~8aItrno38*ncs1XBM zB?+!PSrLSx_zUxGzg933J^)N9!+O_jF^fs=5ehT|`7hBfhuz2s=ADH8wX#hlNd>v7nMm z9qOMr^&0^atdyjr+lx(Kz{CYmLba82l>XCJ^~O#3e)DOvz%3A@FW7{hART9G^qFpyR>^$jNE)n_Fcu^G7Jb1CSJ_< zpx-a7r!OSVpEG_J)%36puSUb=k0ctx`95u1;30unz!eL0;h4L6Lx#geFuq=$+ZkG0 z9Xu`PO-BXm1eO|Nf6Imf_X;K;fpjq1QT(fi@59;XBNi<8+w3 zjRh@XMe*k#>zs30+aKrh{V$aoCY<^|D{7=~9GjT*60DJ&$x`lW-Uvm+Eb%+f{UM$MOk2>@q1g|ARsxPyz&Mof88#IcaRXeW? z$$ZZKvzmAZ4d{QA>h|{irlPGgSG=#Wvch$DQZhVJ6}xF4Ln_P+bh{zk-9w6};CjLW zsYSJ=1u2@ibE;2Qz*rFSYG*|y35Nx(&|e(K!aEMt0ff!Fq0J`y;lD?;7hXKBUQ^lH zby*J43W$;~V?a=vhfUfuC4jPCVwcxR(nbE@m>~{&a;?gR*SiVKmt< z{(M<6uE{h4#d29A)HSP&5GnkU(MnAs80d1u_m=;KQlpJpD$vz}7EOzQ!k+|8PB~phdzU&B1#rIvPX;( zP$DVuJDQvPT%KR?y;0|%)Lh_bo8LV?@9?39-%E)cnlJ`IO%&ZZ>iZs}fl3o`BTr7c!~L5o{xbsYcJqbW@$K;EkJ z6%-rQF3G?;Ou(@-P87T+uiInS-N@VgFoT5yorWwOoB_w4!$1%4JlQ3y=!G6S3tBc>#9~E^bp=Bd7TwDylXB?WG7e|H9<&OjQL4SFBBBF{(fOITSLi@iI zq1$l6Pyj83dH1eX?JFuODhQLsY$}#W$mivD5zzU&2A%;94am40Q!_IP5c&l7`-bhO zCl4AL8vGbI5F%^sFxL*;T;ro{~~bLIRu9!6=1ehntm!WodO48WhsU z$jW;5oGeEM7$dD$9tRV*shP7f zHhNp_FTao6qZD@FFT7hA3(uC=C80`0BDI?IHNWvnOD}~~uRpdwCRx|OipZqB3TMjq zuUwF)y5L|){RCDBn~zt6?3*&hS&v8ae#80!CEc&4J0M-rooWo>He@~_r4sfr0ya*o z%dr;Usw=IRY017uTcCg2+e^rf5uA<(r`J-YYl||9=KbGz{V{HjRqs&YW%`7ioGpA# z&k5DV!vg-L`wdExU}_4ss-NQ&N^d5pKX}#yvBWW7B0skCquF(B=yp0B#M`{0OT!QB z050iy8`JNx!BWxf^|`f`b!ByJT--hFNQkFjk=ld#6$O~>>{?93-x+_Ip?G~{43tPs znE^$gzeuwH-L5Yzip$5kSF(E@1P%$i8#&G+GxLU~UX{tvSZ7YPdC*$Ed|M#HrJf@T zKhK+Dpbd#;30X^Q-M-UcOsi^xzuh`K9a{u}h~mY?iPYlXmB!fiZP%@3l-Qkcv0!vr1Kv-!37A3G}c!s>Wxi zAEj2}u>`P@TKAV*dKo&*lfvTEWaO8n`d>2|P1Pj3)vZ3&$WHPLL3a<59*Xi_S9ocP z12FKatFpowj+WT(`8K}|5$=m}!-7D8RQ9IrDycUv&a85K!W8BO3GlJTS(9;$rx3_S z=L7NQFiJm@wnU{-jk$NFg$A5#oTRMj{ET)dIRn@aYCm>L(97P(pi(AB;rn&fq3M*NU3x{uyTH66`$emF zuB%k|yOh%#$DR4l$#=-+Vf5AV8EuHHCr!hJ*#Q|i(#f&e^K;#mhf)@G;YVC^k18Dk zYIa@nkaY60iQg(ru(ytXFFy?!g9RhuGy`F9xpxkM*?d@#eSW%?D&zkPAxMdgWG=ldy<5*wsuukYPe%Py`bcXQ(qmY<)WFDfcR6K-m5 z4h{|md=;ammX3~&{Qdm_yx7{>+S$2f-IdR7>mLN8NrN_Jw8=XuB?aTtCwUEx^qd@i zz$a#FORq>_InJ$PXMJo;ik-b>YDy0HASNT=DcE!1hUWuie$vlj04^O7At@~#1-wwA zwgyT|Zqtj<|1`pRx+pOFij4})8S0v+v(Oxs!f*`RB9HUJ5i?Mp?r{~86cQjC=)}xPQ?;O3*Aj8hYO|8glA=%CzWb?&B79R#A zmy&`6BA!gqy%F2w^VMum6@j*&@TZpv#7Bf@;Bvh>-z+oA+=8ezQ5y)8o^8hPuO?TM zzxP`R9<-rHrC!YtM}OC#r5N{5Hbl@cMP-Z;&S49vFm1@Mh(^J0Ogz^vbm;LXsBn3|kxS+jSSn&xOm| z@LodcZ_(_5AIXjVM{9`jBO9o-1!3&n?-}+p@e6ZwDc7Kn-(+zmD04BhuGB}0ytPRP z1%$wq>DXLXyrY^KyUoZRPg);GxhIDkFYTC3!$uWpgk^_#*~tD1|AG5{r9oG2zy@GvY~NjG)F=Z^s+FX9au7X!!G)S zRQJb`TgE5w>x3Z|e4)~c9dp3fA_Z#jAtJ0_Wy|?7SV%)&UJz%?MACE)>2kX)(c|UC z%#^elxjFr+Pr2L^lgL%yV8Sq#_SeYW28qDDhU-vRRjKkyT(i1xshA#vlSKSe=@1Ac zU>zVZe~asw+W83u&}=aS{;;%g9ycvu$4%~p%}zj{cNI+wb|F_ zPv~Zu+w)J?vY~QZzism1OsOepr>SOp6SL9DFWPNGHmIb|B9D3z9_~i{R>>6|@CkCe z4~$v&$-Q97K8A9+%r|4lowJ)CuBKn*u{J4v=v{Cp%DUnE22xT`7j@&zBL`!Cb@5y@ zAItoOBZH4cpdkfmUju&R3aF!lfg$dH-=xeL2!~fq5vpw6h*Ze z3sdDhj+Z5#z{1XknqBNgC*bK{SX#7G3r{UxfY$0IL;^TJ0N-0p(YNMa?^9DJ##r|4g8eb?F3rPD1a5-TyrfmL>i@&b6o+8Ra3{fLX#>y`H>$++C0q zc|>^lOgBEeO{RwT>dSAytr0k~N=lW=0i4#=T+hnNYUl(3gYHJoH=eX93G4?1NIJm{ zz)0~#p|9*!*FAXN^V1>!P1C05 zX&vA}2xdw4L8t0I1d%fxt1!~SCogNM{iGEZ8TzqhUj&3xS}3;=T^x&0yj~!NH7-)S z@jtu_+2)+am-B1y8V@o!tE6$OyWR0ZCw?disoYx@=593PR1lj=+NXr-v#Uz-));## zrZ`LK%d^6YWYItKJd}&xx+)u8OvrC)a+I`Jmya*a1m-G5`n;UEH{(qF9(n-37Ya}Gx8eP7VCGUPmxp94KgqZE zFy_ub9h{@=ta}Z$-`zXkXYwpi{2JZs8i&x`_$2!DX8j;MG2rLSoN|MxSeDJ&|9!&@ zH^+WnpW!^e4fDn0jay%%CSpbJMp#7ZF%(IXa8KEkH7jp#Jxu>CEc9~vd9uUZ^GaC# z;97j$hOWu`x@ZvtOC^(|=K{`O>^pU;3Q-Qa@IJ)|OXF(Dvm2 z(~t-?*L%Eg@DVI=G>?Mr<)V^8Z^6QJviU{Pr{<8hLh*prr9sDey-u3mD^ccXB@?2? zDs1X6RV}pKqWIf?P_IchT*1f+-n++A$3pvf3=C>!I+s(Le}$i|l6j-M#?w!7rOrdT zZur-OIj{SBLc|xn3B7voieW1yR|K*^e0a1w3(o=AX5qV{i=eS&4&TV@#uShEGsNzp&xmrj*t9inJR7@AlTioSeOdNTMoAD!X*1*LPsx<-%-@ zw7cV+v7#MI=%0zNKULllINb>A;Y!MD9olN%Hw!>N|_& zM1mD4Kl`I@?yF>BHV^JQDccL3^sQLpzhO?zB%+Zep^^RMobSz@?-70wVLs2^oCeVmu+11bY0P|pX95-CVYV~uok;z<6< zdcn1HzAnzQqJQ7ClGZZwM>uudp0~z(ypLAdR9Sd-N3Y-gW)7!b#`oW_Dql=nZc;bC z^3mzjx@dbA^sm;Y)=^s1W1Cx5UOWl9Z#n)kLfs6J3xRY!{Z#t0uli$ks_yB&HFv#N zu(Q0)#pROSDJ1Rc0P=SBY?gSiBr+~YKZ(E&twDy z%JnfH1**|!Xn)h(t-W&G`=q(M1pi7) zOMAZI1*>aoDF~@W;aW_-1LeNY_ARxvy${;_B{4B!VbI2lacpI0sjW4-s^RJ@8U}|& zgcdLil3T|eMzixS_Bfux%34v5b0EpsXX12t7UP}Xo3l!j=?jGtNzuGPB8K9Lbi$iK z`Qz9>J33Ux`+1#nY&LyEEq-RoQ&o~CJe)<#mtHpEj&ED`%HFsxh)ao%`pzJo6B>-Q%0dQjzttY9)Z^pglK0MkG&kb#(XBKV7TSk{1Ye zSw!=^>{a@&ELMpYTfeY()mZ+!6MaMbMp9j2(`Z-ciX%{iAu;E%ud?r9AurhV4&;}Q zwbIdnHC&ilSA2TIKpg zMq1pW;AME?|NoHn6+m%q+qS!LhX}3-!QEX#0t5^09^BnMKnU)x!QI{6-3ijTyF0&= zd(M6D|6kSB)m2^9z4zL4t~I8OQQFEs63}}LG5eI5Z)XHhBUif_6C6{}<5dnR%?88?P{{tTuKW^c8-zmN694v-0DU%r3xCqvN)sarGKtyc{GxZ$(q+zTn{lz3`zf;IzHGjY^PGg`@ zDN5W%*gW)09g~oEA!W-C6r_|h0a;1YB8?JLNqQ^@h9*}LhLQsFEqFAO&_!7( z`Ja((%ee`v0KG*d-kPmrD z!9-n-*Rf^1OGN5-sxMD&0WV2)MlV)n4>bvcN~_5Y+^!+4W%rjGeAuo7+miAMsn1y? zyIteTwUrD#;#WWJ15jiNxh*_K1w}>ixq+$P7(^7l2U&V%)Q^Q91roHUr|GFl9`pCDE?tfun~nDw;MOB`0BmqWVINz7pI!d)w(=88tT+gh+6xGH$m{hWDY zrY_IZ);y6Z+1xC@@&0IsitD?jcw?I)zT(M~J8H4>))gvz>uMxwY@%WY`KnghgQ>mV z^8Q#HgZk;(kA`Fydlx&0WB-#NUe?ajW$Hw#yKyF#FC_!U};%_JEDSzrDeyb9N$pqKIfr(FFV7B2^udxZ~mUd=;kJ+C=`}I zW&iyxMcaeNBF(GFXpzrwRf4nR#gr2BE+v8!ZqrGV<7W5U15bMjZs`7FYD8#&OwCc1 zl)7u6T}}eI=SdrLkKzNY;-`-MoMcfDV6)M_pM%Q`xauAI#5*A=g`g9?VRxe@Gs9%V zGEC)?n-+%x%EiyEvhg26kj_#D4UIP)76Ret(!=PQ0uU5_P~MzeF)hV*k(c6j>Hv|p z#tuT_WgQL0QD#m2YjkQ%@6ER`gf!Bfw$q6veEILj>o0g=`b#|=`p-6Oo`<$!%PEDD zTKP5wlST&u)vt*bPhN9TeH&covWN1kD6`;oF1ZnGPircQH`p=|?%5aoDyGDo3@94P zvEiDkM{2^hA3FTmst_U!tmZEQTt#mE#27;2V}(-}(%D=W2xRtKQyWkIgD(KZJEtbE zND*ERi|*-2NcSTDNf{X3Q`yxja`|RvQ>%8vHv}Xs)p-|$(e_L46G}a6Y<(r;b=5^c z_%gfnAINe17S+ws3L`9)%j(OioHjkxo0%C*g7Bb$ajLz_!_{ZvyQ!T~Vy-D=vSG=U z7t%j1aNH|334Bglc1NOSNcU2YwcyWyP)al3ZG_`YrPs?m2@#Lo*|<0Cr+Sh%aafuU zPqjz$n$ZJmZ{obzGe}dWH%8)l0R)U*`epl5xPy-(2vV{R_E(^m@V+~S6z-6>%48bL zO>qcd_OrC!@LdhHXIlzsSx2?+kje`n=v}ya-~qoIsr!a}6fK?&b6d;xC36M7?8ia< zEwp=BwbdX^-;2pb`u(K7d{XiLc_Lpr8KMJ^VIc$2??>ImrJSx#rF+I03q>2FS}$yQ z>H~YIFSA;Yloa&( zC}=fh9iL1k`Y&9Cv8O&aV%;I|g`wR2yFsPFKswMMB)m6u<`NPT+RDUyb|E3h0@u)@ zv_c4c@AeP;dDB$2=hpWGCs41Rx{6Z?uP%3Eie;m`8n|;*?#hW+9#9}sN0nyEDv!uj zXdHaQMLlw(e&Ohzg?FsRcE6X?=c#x69#86|akN8@EPuQZL8r+k-$r`DPr(T;PT`yS zMTI?mTF{`1aa>k6-X_xeYG@BdWCUC~TiQ-8+!{h9Y`3QQfDDs1cWW+6l@IV>C(O%t zbF^pG2zyY*r779E*7}I^#$@I`=+9!-zQ#H});%^CUY{5MD`QT2L)|acDL*pGq(rJF zDU_|NaQdOMv6xDF+(i#j|nFiY5txo zH*Z}!UHZmC@}U3+fKVb!larEuiu_VMvDBsP0-a`)T;EVev%-PmEW&J(toHQQPnC3< zB_MpZIFNFQ$d*zntiu_B@3r{FnpJ9wp~vAvILGXjgQlyfEgU^xTmH)y@tnKD#W?Z~ zgZfzy&0tsLVrC)HUCApagQsU6;xUZl->v!qVD}6VV!+$Q|-=_xobj2jJOdTqq zK>(h+a=tggxwiBj$tL6at^}(BH|%NAs@Gu!M%W9dc!vT@fF(~)vGiT36fyr+Pm$U` z(3l8Te1bN=^ypm~hcHKN6`QG<@v2Iaz-Z7!H!f~o>nmdb_`3CYe|CQ6vmv7Ka%2m3 z#CBCNHkF;Wf@de6VS{{6`Nh@bs*}N8NXz#3nI~*K;NW<;cUs~5v;IfzLTUsOtLoH6&ikB#uT8bn?DpItm&o{hpHe;RyPa|k zw4MHPU5>Ma!4e{-vq=v5X!YvrngovCmA?|Y8KmuW1LQH>3i$$g_GwH#>c%S!H$#yN z(0T6fo^=4$|Gh$xgBK2#pl$}z5vRYnwT}5wDb3YuDQ?>B>-S~Dw-~MKC6U*LaP}lU zu{{HQ{Mrx6-p-QPztmH-exF^K9ka@MoP)J3moDaKA5bzU5?S$t3;UZ4=eJdwR48ex zk`|us8NaZyl&Z0;bPa|=$8SMGCPtL=)E3wJF%o3@{{5(`mu2fc*RLl&6_BqCIaP!` zIV~>{@JabgYRW%cC$sd8N=yU5TkFg3E4VD&^D8SWU&5tou#~!=?63O+u@oOF@j927 z&o6BBMCNhM5QRUP8HkTYqL*zW@E}>)X|Wp2A`?9=qaZ})pUN$@hEGy2-j{BqX~4|b zGXI)tBjHN=ed-_I;2R4^4FJsYLEHi!9Beh!I&lqIbm6FUK`TfcH;xxZ5)2x2#U%BB zVA&Xt0^>e%87M2`9-&%Wf&317R+_#3TwQ$o_7}@TBgbF!OS9Q7Yrg*-QP`a@s7BRi zd6<7zT2erq9mtr>C4`O-D>=nP^z_gKM`!>2FpFsc=V{4^Rmrh6>*%OAgvfFt>C_+y z<(~Y-9{aCD{V#j|)%wx9`F4?Ms?Ea~_8kA^*<}DM;G{axl}jOTb2M$|^p%5t@!YaI zPP9&II_zYdZJH23h*eRMSJ0x^ClD#tUF1}&^!HbMZGAK(j;DSKurgP$qp9{H6FyAD zVYFl(N(kg2AwK;YgQtH875)0iQBW`!HSOnnMI)+pJvXiKOj@Y%*(rtf{rQ~#5_a7S zZV}u#29~29)@7FHPH&1CM(t!OMQFn1#y9>NhdwcjRfmJ_qmvN=O7UA#?kcsex~fxz zy*Y3xDbTpE-pvlJENPXP&@X z!|9t|H(bOV$8w82C$9$$-ZYzpYp5qAZF=2nUYS!~{B+L*I{!}n3We2ygW^{tEbw-` zZw!Tite%}K5CsoBM<@Dc8C|!FN&5Jdb zx>10!qrK3?uJwigm1;n`)VU9-q1T}qW#Q28>zg%X>Ccxe|DOwCxQfe|8U&XsQp!?J z+qVYhga&3|CyNi3#MiW;O17rH(Uyy&ngm`d=E4SMIZpa0RQ&c<+i{W2jZ6IJvZf*C zo$!j-FX64GD$kR64ieBWv9r(5$;ETtLlX#QyT+2E#j9MG^70+@SbLz40{~dJvbxkq z{Z4%Tyl+uz2zZLxPY3nHc+{5H?JKnV`}^%&(AVi-%t$G}pD^iTG+md;x0x4D_UaLU z$7Pwt4tCmf0*kC)T5E`71_0n&8D>|n6zEOgho@H$^GzpIS-K0fh|IqGW&y$VPOTCk z3g+PvA=Mh5uTbs#HLP}@&;%_!XlRT_ANjKMn>ZdXq=TV$c_p&00plM}=a(~RBc6F zt)S4EZL;5e>IfjZ5O?zjjsGvp|C3bOr4sb6{>9kx>#6O>k6tVzX=!A2o~|FAN|1s+ zv$Zw5?CchT{XoORcqEpckq3+NL(=Q+oTJC>&QNkdW=ccUxoJYYb@1)Du_QZCoctrZ*A0(-LoVV;g&qW{bLe)zi=Mu^$sG&n@LSzixJokah{k zp@B{6+$P_^cha+C;#6}HK)azS;0^WUZ|6a1H`0$Zc1fe@B8Cm4*gh*?*g_oYgzNEvFKm&;zDQ8vM*ZU`i z)-2O;a>|Se1OO()|Ek=O;pm9(RHL*tWgWzngx5Q|yA>>Y+9y|F+O7AWo$^+c81i>; z$1}DJ8Sr9XJkR>T57`pwDrp<<>%w%&j(F;GSLx42;7Yr0Xl=-%!suV>q<($+!3;HR zi-`yj!R!i=G&$i3#cEI?r{r%6| zrGi{R%-dgM8S;Q%@BDy>_N>gZh#r#(AW8l)3rH|0NB%-4#&S^j29wpp8lU*S6XT`M{KZ!f?q$AE=*S@hgtgtrNQ zpVpG+ZsYwn%J+@3w6S_RF4kY>7?)BgcT*jsUQ20rvsXK4tq*^Hb@^`R6uStA z7)F-WWWP+YY1(}%A`k?3%1D_N@+?3JdN;kopI&do`tgI&!=o2anr(Q)yy0mGg8}!0 z7Wm#%H*IGet=xa#$n*Ea`eQQu-~WG)E9lm*LKGJ(L;!Ru6HtV}m2GVXJGkZnE{m-3 zxdyC-$4-PdVP6%q3HZ3TePOf6U3Z?6N97a|lM&HGR}zRR_B^SQL_m1Jy~P_uaSiY| z!YHl7L=E1IW~K>S+8a0eB^|rLwsCeygDwQr%S6ESWa`hfYFIj z+Dfetu+S0@qi+x+=E(bMn0+66AC|E2EIg}d;hHY=VJRqib!2vG#xHLPUzj)j6-!d7 z7YL%&Gg)yP=0{eQzMG@!TI%;6Xcz`>WCmItr`>g4h=ey`r*opXoq$&e_7l7;kFSF| zc}goy>)8*KrPKQQwJ&C7q9>cQ{bm@=SG1V!#^#<#RqDtVEWgl*%j|~YEsXg<0W#_+ z3433w4Y9O9c%jAMn}dQmPhPtVhV3#n?ss&H*Bg6m6Bd6Ozl3&Gd$nuV<3mD!w@I&T z^EvzQF}huh&`|~ZhMw#&;a18{%^Hk5MK4gtE0iIPPnD3^7DT4FnrkkeV)wFc0&fS&nwXXp!<9$n2$2k@~09s z_hbM1xjh9NcwkCZdUEo;V*a$fM{8grza}+piFpc(M9zr{ph1EhYcWQX~w|JNFTVXF^nPaC*qGd_ z2#;{oG%jvOhG|8QqFr|$skhU6;zAlAyo2s$_f1ThH8dq+MAO`eRF|*{p5~S|Anqd=W%XAycxtX_)y3QRYU~%ny-L#8d*ljfC z^;~H^i}dnJf@0S<`-aBLHKTV z$5Gmo!u;}b;?td9=C2xZnwN1OIZP1H`<>9sq9%pUF7BmSMonF}C-!O)FU`&BmxNpR zM+uil+>U(jcad*U=QE6{3VCbJ*C$l2Mq=@Hyjr#{?#0GhR@wGK-}1kuOYA7IFMtb-;VS@i*vY)N#UJmEGjZ#=b5(`t8(a6d(Ic*$xxoxCQw^L)Ai-zXtys&aZ& zII1{$2=V@kdvrV9ZG(5C@&WE@QNdQwfaJ@cdi+xzl}~s2CZx^c7m@CN!(2!Y>YhDT zus_rI?wL-7iv9g3Pkz#Wcltl5GBK0M2FZnjm$v_RjA)?Qpgcso>{LMN0|2T0RHqPe zb9F8+jC96`D%UA(r3P`=lW9}7G!(ZQtR~^E>6M_GR1W3`?<_WAjh^H2A6=28zuR_i zET-alTHoHNFnL>zJ?fyw?QwkgaaKfJ*71B|3I!|2EOo}bPIxB{Cczl+j>xq_=Zcu6 zLabggTmJ$6?k`|9v@gJAo?SyFrG)8bIM%4(^OjNvZMks`qR6cd9bLwb{s4izU%0C8 zd)}&A5l*r^Phuvolv`LWrEqv9LOYcZIek8jip_#+s_akdOenia|JkDGh`u}n-QYIg z=<5^pCec;5mAzykpmn%fjma##s`9*jCB3a+I!D$tzN3nt0DIS9^tH0#ahrmT+uhsX zoinx1w;!C`kl82HVqMmkI?{uMopE8_`K1v#v%UJ~piIN0#Tx4G-)+SW64^h0__E^rlNj;aX^Kx3Hd+Gnkip|yp7eL_ zUnL|=TJBqe-X{`mD7Bp|V#F?9j<+@xAU=4{&4wvy4i`?{HS)Rkmd_L9yl(UT^WVFw zeQVZlk;sIXyXFkBk{^aW!@pHevYO6XKlIjj6F^2GehFMY-GNfhL*_#cu#K;#0uwP`T4zt#0xMznkgT!y@Z97lPn;h#=iZI5%3&m|qvp-;+2cYa=pUhv( zwZ`D@$i2=Sgmoxf;mH=wF3)9T+Hh4!KEt87-g?vI+5Fmzo@`Ch({EX5yv0v8xly+cI3?axZvh=Rjnl-AEA02Rvnc8spc`V>pS`H{xQ+MI?gbJ zI`cb@91ZcyB}z81$!uwS=b@j8SC7Wf#QYf1fti?55c0Q|0`n~I_s%QYblUtQ*>~r$ z!4!5wAGZ0cdeK;hny_eEXGrb&c6Pq=_8$g|nqi_loI;|P#wic?@J^Kf#(efUWn~Pp z;!NnQuJ$UlacM-o1An%>?fktkdcDIQ_-iq3*Tn@KoNY@i-6{hY+#a@WkC@!-MVD}b zUk%n?Yg0~nx(VnA1Xk%6?Cnnqe|W6^I6V=|RX@{)M?&mk+tV4vDFRP8mTf?MIK4W)EW#`A86M+(PQ2$0~G3muq z{SleGaxIh6UoXvKu zQ8g|-_l6Zp|NB7?6tF<4=ybtYMSpZlE&-P@z-{`blG=7!=lL4{Jz(Q%!|yH0^;vvA z5V`6+t3EY;zu}YX-thVUqb&*QrLUOGOsu}>%(4MFQI-9XlN-G;K+-yKI&`z-@q@Zj zkBiSKp58<5@?;|a={{=hIXIo{$;Ch$!N04kFh9H-OSRoLI`EvHpPF)jn-Fa!Q%#Q?Na_ zm2*EWQa0dE;F0%7l_$JUuR?_tFO-Dw5ta`I<|Bkg2rIZ}}G*n<8$A z>4mBVFIJLC!nukrVMzi(vGzxs4`nK&~$mxMZ} zy9R>LvVF6HQOW9)Gzy>qh;azn+gO2mZ*t$~$hfV4$xqrZn${U+G2r(@bCFs9?pJgq z%(gSjis9Z0I|1o22?7%9RX;vL$Ys#X_8-@Q=a$6c@{fWbgTVLog$_zKHI^j~$`Cf0 znXhIwqqC5q#^1BrH4fowB{PjTpJC~!gHUASyh&&5U`a6CO#_e&p8X`SKS|%+HCy@NBi1(`e~@=p z^3S~JpDaYVm@MO1TVEs=C)^siFI4i*(w}%+4-v2Wt7|sfSJ#b)^s1_Ia&E>Vjl^Qv zxLN(H>#J=_h*py#pUp)C3nkRD$73HOl!uHRR?lu@e;{w`Yad8F&wd>8o-mnNy>RB8 z^8I$QFb|QJpu{pGx(>>EwG)V|0szPo6zFEN)W++3WZ|iFiyfIF^lu;nSlp*e9|YZ? zXYQ_>yX;tF@MK1erzM6M^5f07Be0=H{+n~~!+-*GaCw?Op18m(-I_!ynJp}}OdZq` zE~-#(FT1EX9R_0QtT9`!9Ws?Y@C4yVtvvZw38)gIdHd%d<>l4!)N`v1H}MdjP%&Y!zE>z1nQ1N*$@~UDADf#RM%i_#CdB+DyY<&rW!@QK#)0|1?_v z304EHusF+3;E^Uva3G^6+p0~K{m3$$_`L$2Wfptcy6c?p-j(Ob|x3=GXFQpU+ zK=wZR%{|Z0a99-@P|D9cUb~#-WCZKNOjzaBS=jI`7$8tBEx$I%US0H}GtboQELqAcM-Anlk#?#YPi=3Gag1i}U`t5h; zErn_yBqoZB6fmD#{*787Ml=Q=N4k0n*+Zm;)Lz#7Pg)yW@4T3JIF|XVPqb>Kr+f%Q zz+D%x-}BgYA?XCQgxDh3`pWFh;m0V{{mz(qp@yWa{-1XjbJ0P{ziV7?{hRnX3SA-qDGpQ;iNx?Ei{4cuS+pqhouMSyJSAPw`GWH=b;n%Z;)$qM#n#Rq!;kFfR|Al_U?)A0;7W$xPkG79lBAX+mVeGK@2prcgOJpK zESw zg{xd|Q562v?saJ&{?fgFt*wECawV@s9=Q<8q59WVWl?p)87{%0jK$)R>a*`m0Lw zGr?Hgt3Jqc$VQ^RXApy-g}Mhx zf#48WvT%uYu*ZRVlqL9PAFXz+RYgEhc@9)hqEpX`; z|Ll%`ZZysM>a`*h2|z*mUY4_YIbO4bnp!QBK8m*lcJWqVHg%Wr;pR=p&NIL5>dEOQ z(!R-=!wvD<%!X+Eh*9R+G~XS`V{pe!x6Ax4$#Wy46 zmkrTUPMO{iC3c>kYQWcfFU^B?#e7sKbw znD`_|?b(*A`3aA!x=909k(5{X1%d`3CWh(6z`*;jnh)VgwoL5uSyTO!U6t95;$h5$ zrKVaQQ@)c#{BE9yFsOees!=!bJj>DMuxH8I<^#0%= z*sV7i56~x2sUXn|3{FIISjWVPVcT}-c|ej*)edSGH3YgAx(aYx4UanS()7`5PbNbg zhXe_W0KnmMl5PVd5OeuB;Ma-kjd2Ti3kQ@V-Oe}rbrSn5w`*izGXlHt-4Hdr9=2CS zy52n21iR{nIno2rQxo-QD4?(yT6QQ)U^$)BANCX}LfzAVZ3Be_836>q0J4H6su)xM z%eJj@_RY2vbX+YJSnz5t57~pDAtj$yIHbW%BxSCs9OGHV!lbIOzai4!w`t;1ek!i_ zJxy)s(&!Q|hM0x|b~9qCHt44@-MaM2HcvYpT`w*z;MKYnEiQ-4vjy!(N0GDnuf=$- z2=xHT-q}<78v=9&mxrcZTDFZz-Lt@(^jR(sO!yqify7!V0cT#_^Piq2&r2ypzu75w z2AjQQW}(_Umoz{G@!$;L${^#@;!T;H=|zkd<<_tF)@eAu7+EZ_-$T;Z`~$9fxNUU#Pr#6uBSj3!ER z90(lE9b9pUsFfEMyXi`V9X>ti)|Qcwxo-$f_@Q%O-ZT0|Zjlo_Jj73rBJXg;R2PIi z?B7`I>pr<&;SI>H@Yw0APn(^Dy4uT)w)J@>C~;o4Tpx+Fu1rKJZf|CbYI8eY>LL}N zm!-DNqY?7e4yjvIuO}SvcvTSaGjtEH)rS#0ttHp?_R&6qRrX%s+WSk(4r+IDM~iP1 zc=wK$wbSb!Ph&Z@{Z5^AGOyjSaC3@jNw)pfa9hp8Z@#Y_f$^tg+nIxQ9hRLEr)Z|u z3el!~*RjA44;yWaWlni#zUtUJ-G}owg+tRFWGCb}sfYK6>6=qa3kfXH-6n z6B*KS)Fds0w<^Q`#_0ni{ydB%+-sj+K?#ypeKXE+!~_&`YSk)K9Qff9xo`B|D((0f zvD%4mx@5S1eu zqs2VeJ_3vg^D0pqS~YR$)ws@i?p0#m9%3`<|5$XExO%C5b8O*IB1_4KC}bZpvZfyB zG%`X)K8`|yP&E%ne9=o9#w`5<&Grg?%LLp3BnVt6_z&Ea`vDoFsH=1GxcMH@2%+IB z1VaLvi2b1v%;3+4E4&8+!>zyA)G5)QsH?MB0E@>CH?bW5mg@cKp*vM1yCf=&4k-fnoen_Om)ovJq#>;q)E#5%BM*-M+ID&0cyn%uI-jH!U*|#YFe0MQ5I=Ko#;6BXO~Jt+ zp`oFcp&_5}j$W*#&U~rWKlp1(Y#HqxQi$xf0cOJ0hr-r%qU1Il@Ny#wFZsElkJ{!q zFYC`Xuc1t%(sfD93~hk-6lQ#8Vi*Fh&fizm)kTXNz;&@KN%V+plEgoWn)Utuc^HcX zxi(c)-9PC45nf@kL&fr-M66NS~ zKDVWq{mOe1?@|o%BMX<(_L%Zu_Xwr7Xo%}omte`0%iR%)&5|t|v&^dWk8m*97P%r7 zt2$0p(4B>Z9shu=2v>>$TKcMPEB<~k8UNh)Jh`dv=AHQ9tm%<@+d7q0kaQdq{Rk9f z){eSiQlV1++rEe52?rc(>e3tK3|*M-d0;t3 z7$$Esc)vo1Kpb=(w|(>C6lZCARQH6!JWzr1hq$zx)9LYH zFRHzaEJ?b5?-IxKPC=~lwZN+iGIXK`>wp4d1B?7^ zDh(elL8}a09U#-Y&HHq}2T71vhx-%`1Hm4O8-$vs6;oI2(T+g^#|fnm4W|P$ei^uY zZbp$H{XjW!Ia5|WMqVnLb5VHRtB%jIW1>ep1i|IDtfmhOsO{Z5{WBpSMz}8$1^%W9 zgbJm~=89%aQ@{3A(hkkKsDWJe7*VUO==#w4$I6{8+xca+^7ghpNVQM-Y~4Q-EuhTRr2T~Z6xUEBsv(lyr} zu305v3MKbKPq*_+C$FS+Lkj(v$&~6-`2}~uyc(0uIo6+xGBCW1$F!z53rJV**r z-Q`|bp4A*jx=JM4_%()B(*p~Ha-M1tAGtEJMQrNb(E?^n95CD7t)1R#^k&TLADlml z??n2<35&tU<5#7F7-guch+?N?T*LP{3~eLQ(@{xdb5nvJ?FMRMM5P5?d^@ZpZ;LH5 z@?;_tZ0#L1?rN`i8R|Y>xVFu9bgjh6$0WS2U7z=f?r{ZVkX%tzYARyp`(gg=1#md* zBvj>dy*b>~MOK|GWii7h`-uzC(iU@%A$gP}=jG+K#75AGONOg>Jf?qoO$WTazO}pK z?5Md&@wObfLp*RytwBn?btEN!n(YaxC}*(VgC5x*^=vMJT>ekX&!>`lX~l0vR7&;8d%qDW2I~8?lxlM8k!o z;G(>)ae*o;tn^`5yixq}_YbZtK6*N0d)k^!qAR?U(W+N4#It|i&XNVYUgG&4Y_&N|Pjs2f9C0Qx;i&CC4>%rjY-;7U0Qmzao zE1uNV;=Ak^rf97!2p<`qkD!i$fg+giPh7h1vwArA#f9O9d__q~Bkn*qzM77_@H{+# z;F~f%PSbfstWx0{8u)Zd&CN<&QNwCZxpPj}<(Qkfj>~rw9;QQz1QSH6LHEl})ct{s7PZ)hwEwCTZ&YofHpV{^Qo#OM|#zQ1lAoywc*=eNT@ z!wPSRDgb3v?@qYQj+F%-_Y&S+r_C~~hFaaLl;_l;PP1qoPdz6ttwcEJE+(>4%{5sC zd~6G^b2nNJE1&eaP#eMPUUay>J)<}`XU9@Iyda)3Fo1aa>sN{-#2GKMdcoL|Hi6u*3&dV zmIjaf(c$om;Y~-DYYn*ZtoSXEqVD?HO5hFK?p4?Y#hnJXgHE0{M4rIap{e?=p7zoF z{FEW7&Z*RIjotY{~1oN$g+o!gmaT6#UjH&@j-XKGELrr|+zLLkul} z>-M;Fnmj0ez}wjyd6-TWjA@Mc#rw*@k)8a9tAYc;xAxb&+MWiyt18>g0#JsY{`Z*W zuXfS0p(Q}rI>y2aeu}}00p~7>6d=OWd8x`YtkAy6cq2dx$aVM4Rb&~(WcIj{x>>kn zV_vom&EH6j24xIiC85>FE|@}s0Rg<4vqSch!N_96GUHj<>%S=pQz;u8CIf+JwVVm# zpy1$xChR`}B%jWuaL|xj^vffYVb}RQVX}M~nVG48LDh9RlsZ4Qx$p6m0B$#dhKH8O z!dP~0xoha>M-nzZHh#9J)^44n-DT$Xnez$5RJo*h#}_SkaZ~Y94Wq5s7`q#C@OQsl zd#7e(5nNh7%i6$|JET)}%G?po5&kWFTM0d zhP!U}?z9Oh?o|!))q0MoXs_IV9^xNV`$DhPQ*;C622$4HUo{b7LH3?)cZMj+!QBe7 zv=i^bWdj0GWuPvfAKo~-2r2@R+RkXs*f@wk0jdaeT9-H*0(c>x7myynSB^dpZw_h% z?7Wph^yAPn33EvV5N$(fw1kR_s@_5lM(m%klOU{J03ogalZP5>CPaG^#TwZfsu4i< z77=V>o~e;4GEAY-zu!+juG%xxZ`{I@?PD=QWyIe_O@W>k6cr2+y~$$b<1nShV|(}? zi~NhXK!YK?}O@H;bc-RhbEb1LJ@oa8& z2u8-^q~2IlF)(OPW(ynV=1CP_L}wWKiD*kBeEuFHF5BATk> zl}S+&etSkuip^6{`>)&K5|rFnZFM(4F>w*2Uqk)(H+~V(l(KU(yv|xo{Xwa~AKp{-e`zjX*X#DIjJ==-jON>$6JHL5JMIj5a|FF=%HbX2*% zadANQTv3CXLk3zhG%93r~YN2*Fc5Sd{&gv;~KwZ2Fs6 z`3r&^_}&Jeudi!EST#%CZWY!|SkAM(&$bvztsNX^)h}_%*>wf)40C3o1WnwF6)A~x zI`IH+2Iu@biXdatbC<&&*XDN#=EsVbH!? zZF=g$tF=% zUL7nU>kdp}fyaky-cJouNwfxuIYHn`Os-Ho9wQUvyM-YhB_sVI1=pI)t>T*pF1}j) zuEMlgpF`i1v}Q7fx+-#pMc_R92eDKVQwROi_<(@tXq>o%X{-8;uvKl@B_oU1t;@4W z@rP{T6OknZhrVN?Wl=>>6 zU)60!nMhYJK>kYVi_1DHCXdVd9z$5qKC>KUd|Dy*y23`H((q_Os6fV!b0aqru1PI{ zj!NR^_Xf$%3#Tf^I5;#SJb;|uP~}pyJL{|6=+3I@u@p56<1<&gq056IFtUUTj$fxC z);3W}v@B3x&ZX*FYmkx` zP+b)KG?;|L)ME5+f#n?BTX2GwzzQ$Jws6ul;M&o3#-xh$p4Q{bIMZxYBAvHx!$lZ zt`PC`oK*1m7=G+#b7gt?<>?NT20P!HWz1Zd#3ke09;|U-vM55 zZAS?)mZ8#Z%gXTD+JP=yA^aR^&~57Cm5FAlTzPO?4Oh!L;t+iS9rw+_{8GvW@Dpha z{B?rUF>%-EVz;Nk$ELywE7nYYykP`B8Ju#LlfLD3&&G?qqF0k*0cVd=SyrL8fKpwN zx1Qr@;p*{vy(?FPkE+Zw8G?rX-fchf(OoI`KE-UDU{|R466soe{`;tI4&-oHhn) zFczuiBLpygqu9*1jk23|nBWL#F5!9Jf9|Pw>h(M!w%aoFQTl*{VnMakW-t+chY}=h z`E#$b`U-xL(9(uF2Ks8&q#YXRnpgi6h8gy2(;a(Br(3aiPUbr9o0RAnfqRmZ?H?fn;3cS+c%8l?sG4wDo4zKoi z-iW5U9292Ht#%N*TM#E^R3IT7Pmd_Q(=9eVek<8slfR_g=ukc`eAE(o=C0fTS+#SG zZ*-b??53L^Dqq>6a>o|$ev~t9g#wVn44}2qDcmCcVbhU)YgOn+zl;sb&&;E{m`2X# zUR{>I8pUsWcFdYme;M8-xL7o@J5FqfHTYa+kt(3pvQbq~*BtNhqFto+>}=1F$K1+UZb~PenNz$e9&LV+ z?+Ez_4S#pDXFecLQ&rlR#;M#7c&9qgfrn&x4QABvHSX4;vL&FuW>{5}${ zXR_MT_xJC0Gg_J+!!?&awrF|j=?fTqTtBk2lUDb*vusP^fmQZ686*?Cd8H(lXC;2% z)(r6mElxOz@Z6EdBUufF1gcqPUSxbTGvrMg0MNjP$2Rl*tVW`AfRmQf60Lr*JsW6) zsOoUe!~DmIBB%==qyL+vV}Lg^GlP(g7ZTD$e%Wt&*c9knS!o^Yn+zfIi`e;WS*r5E zw)8{`5?X%`?bo8un$y{A6Jl|nv_k%@yPHzx8ynq7TnR+(k7)V?%q5fl5P0Qs0o9Eo#$BXN2X(!0coW& z^`>h}=^&j9tnvS0>#f7u`j+qE1PZih1=qF|cbDKTUfdl@ad)@2KyjDi#VxoLx8hE4 zcXtcU@3i-R?!E8#dGikmPo9%xpPAV+v)8Nze9iS71^+39^Tot}JO01pJn+j0HV!v> zU^J&_znt6@o2vC9(uV4NvaIF*1$SH%af7acRbTd1o0bffks0X=L%u4sn68mPhs6;eM z@B@uPUeMH^dzOjqSfHaUS)#r zZ@+arb$#*Z z{t0;I-~QD*p9qhW*fyTbE}5B|Q@d7ADhFIPrRPrveduBVO+-Sc?8QvK(K%sgdA*7f8)WVqDE6#dtPt)97{WarpPt((jHVq%F0p{s4DN@b&1N=MLClb< zAAG_At0;)vyQrTP_zOx+^HFgn5ykmaEd0H5^zc>aWjROYA_!BqER|9)6?>EYs!f06*V9nlXTQUraP!{uh1y>4ZaIsF3z z0U4(fjmoZth5KGFZEbDmgIOFhvZYMX5L6-pc2jvz&%3c4>97zIh%=w-@$z6Ym&@V& z&e|H{Yi@4t=FkGAJcMI7oQtumt4p`uac4@;2M{-buC4s@nSXxs_h;zn^cl20bWnIP zEv>wtjgggxF`1Q}aN38IxEMmOJl~@iBkhBk&9^dtZ12-c72khkuungoot~baok4sa z8k?HD0BMyDWWp>m2eNA7f)<0kJJX!4`mo6kzpV=?I*1k@)k;r>h2E#F3nB z*4E93*MzBLV?K~HdT@#+<282S^l1e&${djSajt}!96X` z$D0wwd3ZS_T3p~+E=$En;ZqS5{hX8(i;MN%qFyflzV*zhsx}};qhMY(_f+t*-7lQO zQksDTkLx;aK#&(_%4O-Vk)#E`{I9jV076ln!xl0M3fhw|`T6<&Xn33J>*xD34i~!< zXIsN$5a;5&yse2MH7@IUZjVb#AD>6yZNmb$A8F%MUG>c>J>ZA@?&tX;KS9q42(CXP zZ3BQxRmzyouCB}7iK6!K!6Xjp(y0VS=e^0n*;)7N(~V><8)FS;fR6Uz;o!tB?ff}j2W=tcrZ=+z5 zehUb&UTVHSTxgJ#luU)Vkn+1%?I427==)b}0izQSCpI%Qth8Bb?jE}BrAiMafdKlF zW$n7v)(h^y61Ci4YV8pIk!0-c?WHa_2jrm*4H3=(ud!KdtaI8^m6IDDyGKMsG#N-d zS&I;yot`G-td;$a!e_mMGUdU?#^QS_5dDs4K@$pI6^<*x)$&1ahX@~l^3 z=L4NjFOTgZ4BAgFNF`5}H37 zSZQ%u!a_ZcR__z(-+RH&HjIy!V;#9lnmvSjr28#SQh02#OZJ-ga{C&|oc}CKb}kFD zhOBXOQAD2JYc4hI;#zwF@@(WEo7$Dwn}h-qA3!KHG%_+WGwWUBcb79CP>ApS3xwO4wzvST-1DcQVZ^+S+aXNN$LHtsKmZTMq2qNqq)+JIgG~RmdT?22>Ouy* z5s~gLL$|2f@^Ep-X)fG~9_Q@vU2-2^;^{+55KI8cc7IuqXwn-i!)0m3CM<0{LpW=H zKXk#T7~)|r2*M0xvmT>ry1}fjs9=o7zH7co_8}hvKcC^<=XdUy9{(NnQ?X#(jLLi> zckNei`qdQ&Lp1h$c!Ng$Lux@fq=8fY_ZB)tmEsFd!ASK;hBI( z_jC;4Lze3^zx!szG?iUh_OH6Cmn=94LHdabYXbNJ#?jfQHUTp!{fEBDHq^(sn&x87HR1IpKe2wyxb(EdKVU@qyiXU!iv;l!gx&>WN3JflN9!^dY3XswAevHCq(Lh2)s_UIOI=6Ip z%ZGQTF4<~mye@k6S;=&C4-H2*U!QHBi!+@Qd3~0GXRhf*aHm6=4zi5`FV3i*blbSE zMOeJ-lknz5_9&IRzqvSr!vWFHfnfmB?wWfh3IcXkC2H)$>GFd7p`oGs`<7~IY7huS z>TmAyoySqz#$}i_eT=9I84>@hY96(6*P#rJ^+B5-xI`ALD2;?= z-o}Petu@T9t+u7}gSfGAHiGFA%lX#E*;}S`98`wI)VgID&r<1~^<+h^|`)aBJXN$ zF+3aYf;QAVYI2^y%~%|5?ak&&_I@k;jBfYTf^@fk;ghD@JMx*eTiK6N5gfn?+Ryc5 zhKKkUZF;j}3d#4^aFyyUmK#;67GYXn@7~!wxCrvo@b04eav_Hj3tq2H(BaM3Uzhqa zVQZ>`jxTJQj~kZMO0|z|uFT|^mY8s%W9A$_);jl9FH{c3`Q5R8a00aA34=|zZb1f# zl1JX8_~U(vpnC)ZWjBr0)v;O(LL{yFppEbIi0=tC-fJYrWQ~0V@>Hu%lMJIG$6S-_ z?efczlLaVRyP+%zy?h99Qee&Q3wO4Cu^OJQV|hzFj?^Jr`^!@_rXyH`S{!rNgKF?t zBSQE#_Oh}vH7+j-yi6cbe+XlTXSeE`sSMm&CaFGUaL7ARyr|ZCzL7>PA=0+Cq#_;G za{JI>G}45!g#qq18nhJqo;jwbO-zUAVBGGAerk3$S{GVK}JF%QsNyfSLW>E zf&?fl099IT?Rl3audVg8IV!z#b{<>c z2C2Or3P^>FL8meRZ-PQac~eWugwTaj($9*UyetN)D)TVoj5X6C)jnglm4iI;Ig0p2 zQr(@}5%q*oQSs4nKQLdPN~zm_O3@QSGelqqNeZW_W{sM&Kkm;SYnrwcJqGMJ^7@N- zn;z8^ux%oam@0FX=p0U0N3ei-sn_GO8_GL7Hyc(y~Jsp)cZl-=X>w3yoJxui(i# zZI43$H|StCr1UJ?fo~)*r6S9t|0th%VYtL!@i;8K6#ijA#bA3^6-E1L zn5N;Ujq_>k^+j&usWXA8A^Fjcy9aV(CKlw58AWZXy*B9Uity*gsv(gqKUt-F)iku5 z-*h%ZPo-n`z9L8PL$c>7^fMyRAhR}Bs~3RL1H>$R z0W$>b7<_fL7JJtptt{Rs1WB?3$2P?(_wvgns|9a_p?y(pAqJOU$;RtG>9KCZXM(swJ!NfsU9mnf zJ$-L~^>W=BX;qssg6ZW99_}XM^N=%bZ8BqoG@d%2_E z;*m)PZOb}MBVF0uiGGik;J!&jU)^o4mEW=9;mDxSx9&P{9j<;nKTdAWLxCc_(xF4y zx2o}R&W3x8<>f=Xx59~kA)5Es18xu{g*m!$;d|q~LqiS%; zrj{Cu1JAG&TQDTOJ;4d=05jiamFF16Nf*Qh+ic2VJw-o+m<|L8$}(x4?pze%4L%;= z_E*r;E^3rc0m@ol3mH>8{s;y;nQa+cXV-Fvxv)Kut@P~po)h0<2kUgy3lo7WR|R?7 zxnGHU&^c#q*2k}fNP2lNyE+{B%m;M3X@Y^c@0I<{4^^k-drridc%TkuP?hF^A%q#- z~<2v;*PKK!eWLK1j>pFs*E19&+fXZr=ObbLhYlkPL-#GJB(fz#j{%%)KkYPDkETyLo`+M z3ib3cNZ)}((Lr$g<=EHTBLjy*)U%4{Ptn|8wK$#EjE(o&+AB1qvA#2PcL{p#mRq@d z|Eft)yM&g>yl}Hc-CnX=FAc|Uc|-q0E0@4Ks~|?x^e|16f!)p(&8giF zmIvcbYl66FF?T$Zp1Ql=E?9whQy-1&43hPHC~aafJ{_rzIVx6%QzIt*>)fF}j@P*@ zg##t+r=oXi`E!j1yGSKP$*RG!!Nn)VKJh0Nv0@UP~3eFjEHJx@h?)a2?W6i4-sjH}Uu(!4(C{}9@>IE3w z;MO7o3*YL@3W8$thU)}QGNp2`QT@?7l#`x$ajmE(JL?Rk@*c0}e)-a`&~xlCEgGzZG!;t zwmD~=K{?-&v4S(~&WhFyPM(<`9Hh1N?YkT=?V^6ELnj@qNQhjy-D|NcrmVMU!M-+c z^jjTHEe853lwjDzQ6Yf|d4l42M9HNEhe0whwzQIPk)AV(V8R|IlMHwH*DS}{2dPhb zMi80Vu`af9yBCL+Eh??wZNM!Tdt)4~(g-e~JOJZvetTO7LK3g;z|h(@l!(iwPvs#w zvKR>ze{?=;!taU7c*nQ0P4WzJdBCrDr$PLZa1ZD5B1^Kpy*(NfF(a6y=e0x_xq>T z;ycTB9*> zjB7Qvj^T9^nvZ0uj#SOka4v0aeoLzZulr;o^njMf`Qyv%hJK_J$!0}|?S*kMK#QA*Zf9m^1rI(nJgUzXSd@;gnM^~t z(VpNkmS9M6=``36HAup|zjmo>YG|oz&hT(gnYUivH*HLW^CM#r`CP95+^xNQ_lm`H zY)0u81Gxh)MAgmh_Sq9$jCn$LuFImv`&6RERF=H=d8DeUoR^rg0?mG)hnebTpmkul zbUo5J@&0w$@ri}84QsRtceQRQJbNKX@rs+0!i3+3(nvBYoI-^3vu9Ca{;(**Ay{&cAxFOfF+Ma0vhOn|I5=0Pv z#|j9+Z}%Z}PUHGIdVU;2qd}V%(@<$-NJX_rz;Nu)G^QbN$S<`t*i$UAXL!rKC?=hO8(RJR{LR|E`K zuAUUT;*R~E3kp?|H{=3vDvIH{J@NvJtmG=Rj-K#%aE&TO(Ew?SNkf1p zsz(CTMn_hVT!*+n6qfZeZ0mCsQXTxl_Bl@+3475iy%x8%kMpVfTjlw6CV`~Vx+{yk zuI4gn8Z*Q6#oZCsYb0dNyF+K{gZ~udPu(yT{+?s~H>PB#nFW*O;F|v%ex0Z?>9Vv$ zlzhc;?=`0oC*^cKwlhJud35WMrKrN^c|IN?pqHswFm^<-AemUGNQ>}wHZ=V%>T6b| z7&IuPsm8sN4ZQmg{we0_(Vz20e-BCEzP9Id*4Ea zM>!v^dwl(S^p!Ctfq-9!ZDqxnYH1=9&47<4xeDYWoS`K@wv%j7cXxj^{_J&De*TDl zenzM*B3#UGP!o6*DNBKzFJ%Y6X!3CL8f8-r`7= z{viqh9UU2Tde=VYK75K$Fx1gGn}n9rOh{FTtwyC_+7?`Fz8}KX43gh*1ZBLaI>27H z?D?g&y=?RB*nW*I@i0v5<}F2Aas1n(cl~oWRVS@0+EV~3cQ&-FAK4d8=xAeiz%aGT z=kn~TLVP>Q%%T@8CDJ8CA%Rf2PZ$$XuQAm{aNNxuv0KwwmBl2=zJfoCwIZtbtyK;eOKu&U5H)wCJK_oTY?uGDWiKNKy~l*y%&+iB6z@0@7)^_A9@FQ%G4RPSdZ?zRY?V__O5>FL_ql*R@2$29^5MY%h&?jhZ0q!cqn*Q zikOz5wlE<504hg;x`);@2B7Coqoe(}S8G)3U2AK+v-Sd!H0~{9d_VFxJ2V;J}* z_e%~eXssS62Ya-*x_>w@6ndI}6@G*dIr$uEn`nF5Y$Et)UiD^$lRfNRt+Ytt*>o@9 z-x_aK;BBWyUC2MwiD*ItjEdc~arHAM_Q0w|Gz{+tgz{itS5W&tpG~4NCP!&y1T<7O z;h?#RA_ZaKkX*(Jl$Y7MRBdhBQ+&c`RbfZ9+H9?mO_#erIUDnGZToq7`;zvdsol^k zhH@d8tqSwh<&{L!d+jK9_Cw*i)1kl9_SpN^~;9`0dkO|YRb-Q=_$mLlG2Kf5Frdim!b ztC3@EOFzJdhEyPYKPyB!raD?3Gg+d4VnmM*=6_oi|GMcHp`&;-j7Kuf+DrGY%^K?J zg%gFjes9(&y{-noVC^0yE5H$kV9kRx*9#Umb~)VTF6&C%0{W=%2m-> za+TwM2Iy%qi8W_h%JCk;ntZ>;zwc(ct2gQ<^(e81^363;>!F5ch18O|kcxEl z;Eh>3W|(f`eoI*-a4@Oj?sU_)z31-ms!gM$I{8)iWW#WjYR;Y`{TFX-f>fDs z(rUZk(>~G)_|tcx`)BtrgybdKwo`S=ryQy9xgz}?>%|8q36xZ#<*#qr4?Aa$wW$eE~a46R>NTNa+S0I z?baPjx5D!5EDY=5K8JJoq)57`v_vgkc#W#X$ntHZ1T8B}i4%JJmWHJssy6e)v_?Qr z^26*Os~DNg81yXMTh52vzqa zrjk9rc3#hp`pS<*8bqHzsR*NQ_HQcWR%zIb=Um^Bq)@3KEE+?j#j)1(DCiBApi7?V za7diFe)0m8)2#Fe#_b96jD{A0l8{^aGr2s8Rpc-ao7@xb5uB@f^REt?wb;#h=~iMm zxQ3NE8=vD8Eo8bhxhwelShqNcsX`--sZlX4As(qZM0t09F#!Y2ve#B7maeiq3WBfx z+_Yo+<>3~fqCd1*Gf8Khvd2@Ru(_HFe@c1Le|PvNGw#82B$=d8{38bhAA;+zP#J#C#5B|Hpv-V=#a@fu`?oZx&ig zZ7k>rH{gSGK1+?KKiV5=-*;ibO)X%$*wE(s7;$(iUwtVwVZt+3OqTTEf>uhJF2|li zX{l#GBwa%zC8+?HTS-310oMN!GVQe6#Yi2^j6MLhn3R<+}<~3di)U499qUw*L zGmwbu(C#W`r^~~!nY6h3Ej@9OINzSnwh5QSJNGDcPB~*iSYtR@5XUovMS(2-8P-H>C8zKI@37*#V1i0g{^dCNng}GWFJp^V#)5!_iqaCfyEE z)h=C8sKuyUG%ei)1Aq5LZ4{+0R$KwnBp;V}xARBeg`pIT__TYWEm|B=$<7@vv?37g zc~?%L4%Og~$DcE)J&xh`@?>PUecN$A3+$n=NzGx_qr;UA8Y8`&(&ld%sy45>&Aafo zWU;N*3sEQ+==4YIS(Ks^ew~N~J!yPqgTmmbOu6{N1^rkeer<$eeIkM&dQTQT_tI}fv z+JvH%)cj24AfSb#t7&;4;*Cb>7+z33{q@8u#c#PBqm0t@z^faq6E~Uob6R{IUKxBU ziFubT1y!wvLKj3$>$lSlshZUxx8xC7 zA)j<}-UkT1&VPeo`k%=5zlPaP2g;ICS8iituc~1-l9%uKjWom4%k$PP6?sPTnQc{V zb(w?Fn0)~^8xOxsenW1J(YB6GS;6{OmaOhym}e`M*(y9(nM?ssX+X?gryb!KD#9_8 z4JggSf%&v&mtpBs^P%(w2hwaLyy4z0S&EuJ$YPw0`>SC-cq(8JuLr~~yY5--E@~=9 zXYE=g4V)2tSZUI$1;DppD%aNp z3S7hKFd750Ey?x;YNb*?Z&JfL`nY$3n-42TL;krcCIU4ntOp&>#3qWqS}aGD_<<4OMM>cF`vwf5}=V08ZOiH}+&|8{mO|KO#PnB<~ow0+-ZI_-ly*+!$4N`YQlMk&-&i0T~~ZMhY7`j-{#t^em7q3(=q9R1{FI$ zc*AH2$0&&~zC&aPnBSJ}k2mv)b|f9K?wuUD?giTLxf#B4!MeK z7+aABsyL*P&Ot>9)tT|3^Dk|xZhAa?Ykh6};$(Iv_z35-@VZ;r>~-6N3D@;-oT?k2 z2=0+Ot*g8Gha>$T16<+3kK-}3<4Kwu*_bi2n=!L8AVqq%w-Vrh#l^1PA2w%y-N2AQ zw5w842dfwBV#n}sQJSvwr&m#&pOP&~)=%Vn=s#L~tauS2y1-wvd{{1avzyLewM|dh z=-cCYdNt4;G0Z;3Vt8E-PvH853G!8lb-wS1P7#^6h18dg?IE;6s~=Cg^%y;ku{Avn zC-JyIsXjRtNa`CAONsZt+Bg*Qr_UF1WoE_n`=X5pBmo5+B8&IJr?tPPES66b-Fwp~grL(i2+lx>sY{GAT2+6;vNzU?~^e25F zcc&w7ZvFvp)QpxCLE&4__NXfzl#7cP`7Icf&e3$K z5(thA!2+oQ0~mD4bWxuR(*rN-M?`F@dV5N#OEuQ3K8wEFeOPPs?*2b({GYwZ6vH%J zNWYzS*i(@nX;7cV6iJ3_B-CWhlBPct*Iq|YGbs=E^@eLpi%B{>rsLVr+6|HM{Da%U z0(gnxxF$b%wsE)rfCd#5N$3`#aon5zkZOg#J+}W{D-by9JXC=4skI%vJYG5FLUF3m z53Ue#A$>mOLg=arCTrnx3T5?6}yd}`$GeX(%*7cDNM>%jNKPXd%RP64Va>jei6 zxty56qy>uoit|#v&1tr@)N))Lif!hd7#=_Gk{aB`zJY;63vsw*W4vOCQ!mp~LcUU? z+NtR!UfoHHSQN7N$cO?B$k9ttF@oq=!% zF~P02$vifT=YH0V#vc{D@2c0d;!|Y{O>x%f4syejuALJwNzkbas_4nN>>uNrZbPU? zmNl5d(_u|F$DA6mjZZ+W!5Vc23)asv^ndY&-|Fef4LhsGBWo*f2ug4_tejp1nh%!S zj-MRmxvLJdoR(6MgMDf0Y#rOaxQbT8)bDX4i1{aHy1tMz(SM(Jae6G(*x;Qh&U}(f zt{R4MUdgyn9j@#523Eagqrcn_uvqDh zn{ORW_TIAsfuw#tIg~(q0;qWtl&>j!TDeec@HMJYWn0!ObehW}6X3ZC+Vv5=ztOa5 zkRPY@`0)C_+K@GW= z)YM2-Zlbk@h|^iyDXJoA&(1DV>K}}*EVk*DMtJ%3jOpoEAM!d@O|ADHecY?Hnvjud zJ7isHI6!NN6-`}mqD-zJ;xijJX@Krz*bHxhSoH8`q+SusaZ7^f7=__l1-gt10{SmY%{t$k(G~<_o$tq(8f`I zu|3t&g675i3R{lQ&b9My&$7g1E=`V*bUzF>;nnZPj~oHV`Jmh2%*9ii zVl&5N3VfRbf>#M?l6kM)s%{T9sD7~~^y&>Boqb-S5?@OMt>=`RGgL%Zz#n#xY*6x7 znGPSxMF;koJ@>1YCU;vd+L&sq`|PXh`>8K_x|m4u?V+A{-jJ%FhotEbO_V*2@9xHD z911$Mg-y~~3~xx9JPdI>XV-lJ$~ts$ps$Hi$E>-><0z{?=N*2)|oLTQqlC3!D~ z(|p_q(^#AomqJ>;!Io!QDh(ILohi5??(DL^AF+52RE1f{6bh)5Y5)BCzHc7S>h@$o z7U@kM)?G~2OGlE2%`apO$A<&3ItCd_)y^|4?Zy&8Qap|_E1$^JN9^$XM9@$^ zxbY+TN>2B^-$CFhtsTC#nG?UWWql_KmhT&9f)f3M1t`8tO2osMyFWN54c=aZzfX@P za=IX{aNYcLn0DknCi-4)^YJVX=H8R_SbtgeeTUO|Tv^(gL=Jz0Sm3_Hi?P3cZ2)Hx z!-Ai;fM6YE334qSH1Ip)?06A?Uw)%1`i%9Kb5Q1!!}QnYw6TW$!_wrKcc9M| zjpOwTbQcY9r))H)P9sixG(;+XM&Z7TgZVIzH|j1;@BDkZF|4p2h2aA{y|1vLxTo8cy;v?h83_q0h|U=Q7?Q` zQc?i;9e_jxPRQaC_TuR>0)Q9G%gcRzQWsj7fv=uFw>_M%i=VbJGuv5Nc|lA(FCu7L zHxB?HNCmwc3A_T)30?vxa?0bTxnK{E+kt_B&54))Q!D>*$#0Li#H1mgh-O?e3oBqT z{c)ajT3X_2YATM}0pE~8gqGwGs;uCu?zcFh0O}%F-p*&K#Ak@^0s+yu!un|O*kw`v z_61~Kn|IYX`qxylQiFpcy!m#c*%UMebD@A-%A{O6Z=n^fnZ@!9B=Pq&E)Qr)b1uds zZX_r+FWedm4VJyF>@8Y4EJAbrVuO%%5=mYpVH`Jj-8}1KJ=8-lErct(cN9i8oyl%)sLW+>+bJ}z{oao< zJdk^gmK2V~Cd7QI@>MXfnEk~3YQg<1_=e%GRqk2>>?Cf0)AOc!Xy4(kM6jxf(`@yC4JS7vYm<@%Hwv9t>ql9+gq$DP9mzP;Z^N7iLhqAHS>D1;r7_q@IZRh+S=Y)DkP$d@@y> z7?U%`+CRCCI+hybl;#m27k4N6Y&mz`G0TS&n;^I0?d_J-4GS}riblD$eEvjLNpOCBM_RSA42+?m(h#&*>tyQ%35J`&*H^UF&`}8>& z;-@Zz@Eg3KX&EPIi_wGlP6#LR*}he3Ki6$`d$ z!55T6W<)Tk=a}Rx4ZsNk6DB?bscWr8DlbqXMQXxC=j9I)geWsWeZC#M5p^%WgC|>< z^<+@qf?i~RKC~L@2UstT5IRwT36H_n;0<2{>O#rXMG-+nX3$&n+-5Z&ACCQI)ntyN zsLfwvWxCKR$#v6hbpQW(HX(Yju-~APF(qXVP8{~LO2d~G{Ra9bM)C|o*%-vL$-Sp+ zXS#Lcc^f(Y_8GrEKKU*!mH1KfU5RgVGm9_gLgN6=Xf307p_cu-7bZtWqn!Lr0O!N< z&dzUYPwbbuldty@bWp=RZ@v~)OqD&{_`{jf@ha=+ITJcJ3A+M2r9!pqU96C@C|2!p zrs}nPon$7Zqza1G2pkQdH$RLW+)*Vv)iFJG$2_mhZzQp&QEPh3nJnvV zOBFbg)6jsq$`W=sCWUh39bT9n;*(k^!yz;J*1+JEnCbJc!MAfhnsu(Q!mURycS>QZ z^!Ok;WudxXA~aBHTrf2gUPsTgZ$O2EkD8a{K=qoNa&=o9*Wq^!lKI9fO_H}2r+Zsn zy^VIPVqHoebH#k``21N9#`>FSq?!T>u={CB+_v7_>=T-ZWE!d=>!=rb=l;;fWPJq_ zWv*l%ZKI9rwsny;u-~quSdJ{^Vi2{`cLN(z6lBHlL~>)cDaZ!_YquMKk2I#-OM~!M-F|$3&zgq zFMUkfnrO@n1L18WUV#I_7i`p0jy0Dqt>}peiGpezFRpMS!6qQw=TGFrsPEE*pK>Fl zgKZGG{FeOApj&hxB9PKDul$pzzK6aZGI7Q>+xSi%AWmE2{3q@pTC5A%s-*8qIlxN^93}^oB?2ZR!=8ugjYH7{o=Tj$@G)t@>9UZN%Mkgk!)5$nHI7mxK z^z`C>m^m>4iOFK>NAgA%riqN4rLB7dCJ#`^krwS`)6 zCNSFr%nIZk_IxV8yT1p}T4-o!ffz*2cUN}efMhWd5o3adzW(ck{`c?S=P1%dbe~;b zZk}DgjryLMKRrENPZO`BqXPjj@KF-qM4ZI1Y`Ieeyl(J`h>D1SJw1X<@L{^#kT%YI zxz!J3J^!mspPBMwqV3()No#9sY%Dek3W`9wo`ny?LW4_fWu+lAJ`WGiix)50Wgy(# zzZx3YetZo2@RE?QP@ekA)BEw^-i7_r99(`BNY|2rFQJZ3Bjj+`Gd`!a_p> zFiDb9Q%Q)3_I7s6fE8G=t*ERd<8#fBg+fc>dOuTAQm(A5nAcX$?2AW`*45WLZ;ya| zm1(WqGA;gZMS9E>_$3%Qo5DYu&@4T>_`|2YjJ)AMb&>bOvp-21-U;N1OiadelBgU! zswh%WOBkWr))5o(`T67IG$tqa7u$V#$H|5Ekw{4z^Hli~fjs=JV5v|^ysSdx=S`gP zaPukI)-U=!v*Pb}hgFSU>m#1V|FDd`gX^^k>xEvqB^*uBoUq8fJo=Wx{XC8dmcxXR zwzC|ul*~H->mBk{Qlee|`pk4PMlt1b5?6}<5lc?oN4+I?Br(fLCTSsO!Wg0jk-|+$ z$*Gi*;YtjqdNZYURlH8i1FHLg{6kw599amW4D5I^z%XEH_rW@KY09C}UWJdX@=_=h zH&A_UhFPEgek5q;f4hGi7OD z^Hf2~+FRO79H_g*9Un}u!KNttPd2AwwVwnau=(9HO?RQRMzor?zEpoUkFp6q(6XFB z)F-yB*@TiDKR<@e6l%3HfLhPos&Nr1Tm6!%3K0FyTEE>OZ2xQ}6~a>27EHEByXQl7 z#XANhgWlLan?||Ic`rVAX0AJ03-T2NfudULb1fNtk1XhYbnD^ZRwPw$5eP)&7vU6h zc>m-MgB}UN_qG1J=ZGnw_}YL{N(9bUB>fMmB3ns#NliZu$4`89TiKRyw6;w^JRAiu z2*L|rahbZ!g(=I*P5v>KG$QR%)!BFU>>lqOEKN&*aDzwU#3J+@v<%6}4yRSK7;nmO z2sbWDYHF#DG!&+*PZ9lljN^)l83-%UfiN+Z0gvyR@`407?mfGm(*z zg@uL7%gd|7FfcIq`S^y?_~~MLZ_anJ7y)P-Jp)6N+Zkm-KMBjH4+aK50V*suHaZ#_ z7IyZ}z&4HC(cYe!o&BLeSrNPQQ#qqf{qL=_uV23GqGphgkPHqE4yQs;TBoL_W@dOO zC@83?sL06?g_fGV4h{~Kl$0z6VN+9NY{nmTh8=+!0Kj|c=t6ow)5i6E|Nb3-MQb;E zYUmIFS0W&IrhmRYdUA5&WHBMKWpaFc%Q59kE+j-2vW|KJvc8yt?VZt=0w!5q(R0gtFy$!X6-BjG{EB#WEy5A>8jZEsiS2} zuOC#+_>IbnsSEsfO+X)c*(@)cVa1gT?|?EFga=Go@ucprsb?sPz|z+cXh#jY^* zaN*fzv-$>&x|*qs#kBm_$7hCKM6dH!$~BBB`7$#3(sQdwFrFZ+Ecp*1Z0q{sUt#1o z-M)Q^k9^hS3a-*DRC$)~dkX^5WGsCuvYv2Tz|aTJTvV2G61fP0i=l2Fh~E(!{ptX1 z^j4Hh1mo83)=SmXv(L+P8E8U?<*k_dl1i3;Q`7@%j0g@fae#rZ7G+s$nI!er7<@G4 zy2+5iREDk&N_kRJHJ~Qqi>wV0$%G||kQ`CNSaumXbif5@aC6;v2yVWTgIa?Pr8CBZ z9@&O`wKwj1^6QORgG5P&Srk!cnOTRzsZ*l9KQKF*NB+2-W<`rRu*IS!Mf|JbODIyH z{0Bu0EGJ+`fDcq}0F$NE1)R;n zp@=Lg{>P7k8odJLNtqQ(Bcr&qv=Uu=bMr!AJXcC8I4&+OBm_+c2FwEkV{rg&AU#kT zqi1AhW&-m7C2lk{G+sC7EF2s%Qc{Yv>4R%4ySwIQW(CT$b=r)99l*q&uh`31uYeo? zmy9euK7KOy&HI0yGym6#bdk}6)KS49gGhx&$?P#5Pq0yGK%9Ytndea6i}FI4ztvoS zWhyhrse(d3Qi~NBNivE#-B@#WnmVcjLFnt2wINE5y!vZIb&bxq1pPC!(P6}zIu)WVAf_Uhl~j;F+$cGf!&gZ(?A} z7jUd996j}mjXo__RNvs2_`~bl?BooxgLIj^c7296f`#s4W>B4@f*Bo6yyUo4QMk*F ze%`ORw#oU#IczUmieU!Z_5nX-M6b_WS)BfqJFz%*EWsgC`G@hG0lxY5zq^J4-bhkQ zP8>!y7Qe*#82PwuP!&L+`BLySJ4Qj0DhssYYqit}8K_LqIgi`VV;Co&Qp1NWYg`b$ zp>DBt?Ku661j?rd6U}dRtASi~!}MZHGwS7Wxa?mhF$OXXy`(YtwD{tCAtOue^4?+- zwNfbQrgOYR-(-`Ljyg}l4*Q0gNx!JUzHnK8k&R0~XR^zOZQ1x7d1^INh5eHGkcj3421M&m174)a`0(MIGA#o& zHAr4l6PWZGFHI--d8iefiHeG9J)hr%h<-3v>!ZuVfE$vUk^+>CovZG0yzp1FdQ-AMqM5c6osclqsJx*CvVwIbhH&tu1RB19S zm7^#pCx^kA-`n30htG2T4*ngaV$w0x$|EK6n5A zot=~8v^!42%zS)zedgr9^LJzUpZ4#{#n)gZQikVbp(sgKd39*Ly;RhjG9hxDJ;K8Ml>uh%Z{XQ#sZnogP1kw6iL z?+S06iRRNu+_W&26EV)rU8M_7XBPLinua4HgDXUk+TAWd|!X-F0cD zFD8il<%Zs&T(T*@>*g;;P){QH6zjvlezJxYFnGyRzpbiqVXsUt3_+9BP5b36&fXmn zyLP{s;J}xk%|_JR!c=m02JZe|iY;kgOHWmKdTnv1@`>jcpBH(?;t>ZQCP?yNjW@Az zjhdv&I#rfe#7t{)XZ=|;o0|OmB~{Wqm44a+6i1^XNyN~vs`%K9JSZ{X+3@#(Z1A&6 zUD5FcNAvv3WPUHfx;H_U1EbJaYDw656K_ZP{De%=0C#2cCDz|^*ch$HJQgtxQt1Y` zdJUK9BT@2wdogIDlTju{=4M5J39Bo@L=W^_Ed>x83q^fEJ2X zaHl|V_u`ZmC{o;lySuv4MZ9LTz}#mL`whOu8O34v@Uo)0^GzGJ_NPvl5V_i zO_VN>rBP=7EC*k9U|=9()9->~Rqk|k z4GnWUJ92#d!R?E&@$n-yTzq^lPtQ{3n)-TG{Jp(Bem*`S-=~z`zwjQjufIPrIhmM< z$nkT=(9rkryw$ZeK9UK}z_b0~@o^Ui2dN)F3M^WD9&ZT=3AI_&DP!RmZg^gKMa5id z3cQ_#uflC@Z{K@&qypQE9E+1PVq)}-jg8@86;BkT!4xdky?-SmBcsZIn+@h=FY##* zcd7VBIkliJ@*P0ezO4~~(Gi{XfqfZGx$8;qiFZJiMxZ)9OXA3rQDBV(mk=OX&y zKiZy+p7Q+A?U<Y2%@Y6c?oX@&uB-Wti-Kj5 z4ViW|678J?M=GvMsaO?o!NLJA33a1yYrGb(K@}wpO(?g|^`MM6{ubQUP<-U1IKvSw zF2?JFiLViUB?G7}mFC0sIr%!cPUAr~iqqEfTbz64+3z{!k_bXv9M$u4HE%pXKCy^R z^VHdjhgV}$7Dm)aY})dY+W~L27i_`-#6n+h9>v`nfeapLuL`0_HU-viBS_q?Jz$jO zd}NJ`lpaFVCwV^FbLy}P%&N6(oOiAXgozV30EEM)@D5GGe3T%(dIxZKl@Yf z?;Cs|THld&D14^G-jEIY2E=aSa+e@|NWc8dkW|lyKM8U>CtyJ!$Xl%YEer6BLw|Yl z7`9p0icc2!77~mr?Lk8-d2RT7vr_mhpPBX4^ot#|F;>NB_br+6Pwg>WdoQ0KAtlmG zkad33B#eaVN~_M5ljMALyr%e}5oW79qdfBA%}*W)GX!V)92BORdD(kw*b~hx4-#pG zWz4!L!eoWq@U>d4^sCPZYnGQnNY$?Zf9BeJ0M@U52iOJN+`OI~*PUYC!~^J)vX$=X zQQ8w~=!nExatfBXh?^(**RqU+WMzFsmW3pzAoBaRr=rWEcp^!Ney}H6MANR^t~7m= z9#_+6{KqyLiLJH+r=H!eP7tDbu#Y^|_Lycf-zLe-C+4z4Eg8ikP|U}}wZPLrsq@gW zQ|y(yPcmL-?5Aaj%ZCRcL;nv(;6fBvoGFs;bUB)bJK|tqV89!3cu5-{8-ts?(BMT_ zabn`*;jP1sa@4DIy@mC172r@qTtY%vL{nV6AO0^-PfvhimC}_)JM`e5p`jsz-VlT# zFu2q0EUU#tO)V)hG7_#Bb*?K+m!zViYHx3cqoh7V2Oto*#!#a{f{%|+KtPu~3Idg7 z=iZ+G?S(x*MiX&|7aqc?MVAnZdM1rbUCmsD*&hR|1c!%*g@uJwl2Lyw_yq*CDt@vs zGQ#)u)fp1&Q>*jy^5FI?I@;PJqoZ@v=bEdlt8gPAc+LMtsrbksEG$e%N5{?0O+jHW znezp{zt53cuI8qz^>}x&4R7A>9v>NT!k00Y;D0Dc7F~D<=PJU((crYBnc03u28pEP zAl#H=dU|>p--cA-KOfywqdg(t{RMe?JD&G2$JP?b25&WaN~nbOiyF_!G% z5%=l&khYM?$;QL!QMpdt+`HlNpOn-9T=05(I;h*J9p^qa_Rxf1Zc6YE33+EdiVrjO zeW=7K-^(iiX4d^n9-cG0aEtzzr?L{#xE4&HD_vKX6q3RlnM0US6RA>+@5F8F6fcAer*s@`y*J)z2&Be*J&<=r;CiS!Alb?iFPg* z&wr$Nt%&n^{nXU(n-qs3xy{q|X$_Xn+G1`rIK0Ld$=W?e)wEA=*}Ym?vd^VMi0JfdlAmA z9ExhX?3LG=P}lorG8qiz9ee!J@HF0S*9m@Kp!;H=JbP?6s17aoNdIgG3+*QdXYyOm zuCj+n$FlKKpFUN_qdM!PmPkER-iR#uz;2sTv+e0y&ZTcfE@GEeNi*tVuBPnXo@enf z4#ni-><;R!*1L`$u6mN^3`}*}E}Go0F9Oj!{vv)~5r72wNCAGcGU?cMY_+2*@7?4u zI&SyecDNO7a~4 zprjg1eKuUs)6ZjFsECs3(!&P%a^!j9~jnf-0OgC(;MDDBXQ)<&lmHgbA;vG0f1 zlp9OyNy zhZ~!B%DXL}OgT~S&56%Kr2>>g-9&osq9%x1-;Yr9ZycObm%ymK0Uj*M8Q{G7E_9 z65g!f555X06J29;7Z>H*oE@@Gv&uGOvPWD*9YdJ_1aJqQcz;AcC;_Qr#-Z8+%F-Ii z0qFpm$@Zu*b$k}YUP$W^4ja)OdI~at;7ta~jz2vc;b)AW02sjkeb>81^f8O@>=XYo zf9h&Hjg4Qb*a3}jU`_A~xn;ft4Gyw(pKsygqv|}(21IoK@vhJx7rHuSPySO~OZ>s= z2!5L~`pke$+D+`7oQX>AhCJM^Mt`hdNODMemKiN29S{_ic~mU7kBGlsdme0JRw%k* z5g?5rxpEuYTs^g+xTD|UX#kb6&=-*)`*!B*KI1%A>r@pNcNuOK?K#Yx0nV zwKyvagljNlFZO7<1%A({9X54hibVzEJK({&BDg{?7+!Mo^Np?^AatQOmzUCVa%6Dh z2RKUSf!mb%JUGMo9k`p^LU`f}H3lg^m!P1w)upM}iV|;;i!ueGEgU)CB`YW>Seu(q z&8xK$Ir|#DjV~J9Iy`K2+B_fzym|2eddP*$M;-ByHW7MWj9omBr#GgXWfw5=$P=$L z*5h4v7i|n1*PdvpwKIV~z?caL(h?%{?%dew0n1sy6lHBi#-9nOoDqzw0Q01p;6&6S zIWK8&Z6>{BiZiYFqjI~}w@5&msfqQ=^Ju-e3g_!dE_Yd;ia0AQD()l%BEUF2$X*Sb zqvZ8uUq1@ysCPAX%-&JF2duuz0x0SGK7WCung8G{I(?jT0{2%b zNJ#$LU2hN8z9C#+J-xy}q(Nv9WuGLUM7iaez4ZC`Dg{j&;Vs;PlwDL3AgaHOc#z+v z*oAdRRBcy7>jG4KbI!u{n(fs$QAJXpTo!ErGLn}6p+DB=wvv4vAkE zg8^j%VdW^|jyS)hg9wf3bzT65Kk@?t$!4kTy1B<}{UjJO&*Q&yRZYjAkRh2UlJS3N zr}XPaEro&PqqUwFxcz29`f5!2E?FFr0VE)kKJi|s)i@u}+w z_Z%r~Ha526QutnDB7Eur=TGYiWB=W&S$~b0$zuers^Q%;Ld^TRR)Bc&aH$2iK#Qht$VM%*_L>n)y++n6A(D)j4N!_ zIZM!Fh80>`u3dXlA-{v9eQ2R_vi;jlKa=x)Vaa@dh&*6Ry#|9RzYFJyZVy_!fFC+u zz`QK<2v4!&Z@s`*GYGtuA?9K4i5@s5of{H9vFiBPx>2KdO}O*laaJvNNMl3gpCSvO z(c^dAj6s20w)g$qL3>dy%k|ui3M&#NbXWJ=EgERO8@fKvzXNkyGJp-9kRAu2Q0;fE z(#n^0cP*9$`MDxI6r6h~@yI2%?q$nZL5XMkB5c zU>X78ed%}{V&>H=t1R_oTv7xH#QXpt0O`9`9-EucCc-O(83aHooZJ-IL>xm2^mp8R zHL2S8sY(PO!sW2w3{~Yie`Ajzh=8l`<_XQaFr8PE%!T>dDd^wwoS#IJ%YPY1B?;n% zS7eshnZM1 zHzYme3_grkh^eFeFPpebmOM4X)Our49X`Cve-~K?CH-)5#Nbd^*jl{LtiiFLDUXmL z;^Mw+GARFzi$|FkrUYMC-Wn^Az@R(a$p#We+ z%0_4T6_Z&`By>|M_DXpVc3h~z*$F*nPxP)kDLa%`QJ{tzvzn#g*Uf{`$3k9PqAmjA zY5Z5uQZxHVO+MQr(N@Anpk}*?d;9O3pM`{;cls7dJ1eE6eEX|E6 z8L)ky*86$_S6OttI&LPm-jc43_UAF;%aN=;f*!2WLkO3wZ?Big-;(h!_o*l_Fx~Bj z+EMn{@$9|AziY#ctX$(kZfqL`5Ai6v zQ)7FvBfq8wKE??1c-maZPQu08DV3~#uL1V+vxr_tQeoiVf<@Y-t0$vn`w*S|9KH&P zO#Ar0&WH`j2P`p@BjNXZkOy4hvvA{URA^n)w4|ms@2+=N$&)V>X){?yX!tI%rC1{e zsSV*0or3-V@senmJREc_usKlb-JM5{$OgaBWJE1tC|h(JF$x>;VdNzqL<%I(+939Ls6~K=?Pu*5C?RPePc-z@S+a$>Cqi@i8^|igSAl zyV8-SY;DzEPu`LiKmNN~?3PP7dw*MPJ1*Cf`o(>MZ`#lM?f-Vm^n{rIsK^*Wkcv4Bq}9 zkyW>+gIFvVlJ6}jmFq`ZZM)C|xqL+A#T3rcS~2h$@Pe)CX8QY#cV!TJYk6I*Y^4=5 zW}zJ`EjDl8vK@_yf;V6+@XiklrSfRq<6~{~mxvZkdUNv49HurB4`4T-OJRq+ zNvFE=Tt^VWt zS6E-~YvcuS$hz~ul`jqfpQa_35S z$JgLfo&UapS~^CrH0Kk&JYVFP)PG(}^J@RS20qUA_U#_-q9%&Tm8sN8Q?ugZ3cpn! z-T}~s*V5ND*SB+aa?*YMQKs|GbsH{A6*fxuY9xDiy1v-y6|YnRfh;}dgVoZ3 z8f2?iS8FTk$(4&^`fREU)c!1Mo{G}WNY9rhLfMR16XeeW2;4)CO=a1C#hkKL#0l}V# zHoh;s>zw7k1-*|gqo#*Il4i<6q60CedcfN44&k!3rr~(c1-FE@&&O&Li+pQsBvsjd zJu3JO&@*d_AMKvvZ`sU8CY?@t+)y~PGdf+lbPE@1%Ff0-$ce3iS30gj2A_NR_1hV< zv_;zaJAYfv8KrcP{jgGFVcWTnru<12wCslUYJZm7}!fveZ}okfw-kKIE0h) zvR_B95K=ZY=PRp|n_1lk2~xin4f+b0w7#N9-;=$J8OUyPq|P)DnH_|tzZ;7smC;Q8^c$a-6j##K3`HGtsj^(*tQWfd;N zB^zy7_+suvB`m56Fa$ImK8|^5_m?|VojV{<4K242XKBw-QWyjx4B^&aDJXn5-qc)n z&Z|ZH(!7%Hh+80KF_xw&WtjBFc8~Nm8jgh`gVQlHDZ%yjEEavKR$(E2LM?yuGG~?t z0~O{Tft|Vx5VG7V`=28{N%Uq7zcPDeAt$!gh2S2)5oS+!WwMr4B!Fzh!*auERo(!B zRh7Zo77{MryfFVfdEs@E76}?CMQ7DstnaL&M}lkf15Xb!5Yi?I)cjvHP0gGBTPyxQ zKmMu)_5TGZ(BnkA!W4ihgN@au7fm@S*210JO_mf)@#FJ8qR2t)HuHhx>V8=n1W20` zC!3Qopbi8hGV%491zr_8oSryCK)?7sbDcI5OGP;o%KHz#SMxWc&uyXeXgNn)n~S-2 zo!v77(6Z8Ufc?&7LoOA~?eR9T=S^M97exay|F|tgK)hL-*KJ>J`oaW~dmNUZ`Nlk< z-Lrg!n60&hmOxQ}Z~l5F9~Unzwy2zIh$$^sLx!oW&CG=D(66wwaiXluMC6=2ZKgyO zZk`U$R561oasl{4+IeYgY}j`dh={XxN%eWOjtD}GimL6S$sGC1DpzB*r>`5lu7v$T z6YYCWPw#kfTuR%3`_GYHeGdG>-r{;zkfiP9d`5b+`BvA@yr8_Usk}*yL6swNtb93@ z&vk$LufuCFe>Sp~zHLv;9$r$XY(UsI$Ng_TKMQo*$G0j)xWm5SON5A|aLjCs8~jj9 zBk^CPoScbe5q797mUu21douy}iHz-U_IeWQ%mb-hCcZ&d$fNO9SH1C49;+$QyhRsF z*brU7LqPX)= zuOKp@VB_54Yx9K&>~6FJ4Irwaa=2wzhv6>tZB4fgndt8S)2}!|-qM&gU=1Vu0X4uuS6KVmDe;KV49!!W@KjiCy z+mihLYjA*v&&iWjsw&{U6O612`6Kk;*nlxY7j$%{q&BvlPF!>&W1TclbZwkIb2o?<0`N57QTrGeL?;0A!_@DaL^3$TL`JlwMy9Wj|y60 zZeo}#LR}X7!q23HMEY^h2R-*edW)9;AUqI7dSs*zP`vM5&~fxhUTvv&2!uxI|Ig~U z*pWU_tINq&1vB|RwX!cHzDlmX{=-joyqVc?vw0g^D@HiKi1Wk&iD z)v|0C2QecqBS@63?rRUA03RpNtH%MwZmzIuI-c z-tj2BpT`Tqu+a6rml=J*0Bquuciy{CwFS(`R-Kiqe;~Zv8D|x^cI?c+dkqkJCRLGw z_80+}Yh{Q82Peo#hMx!DMa+dOweLhYh=Z07A?#8n@GXL26OMz`x^JHT1tleDyFb*u z8Q|ip*DTpO8)f0G1~=HBtU@zMNK<*N$GR%_TF>wpKOU{ljp`;ja49kGpB$grs-#|m zJy2z;9*V}wy@tg(Tlm~!LrgETE9GMS&a(fM7Q$9NSXnws%f7zT%Ai+(2sFDHs;xzh zCLsB*mV%N6sLnRqaR}23Pyn)uO4?YvbN$0 zvREa`oQ%%3Z~T4iXpKDfZe*w-dUmO}76!Z#h&1(X4XTCn#$P6WeOCfY4a=a_CYalq zPlOal<*3Kukb8_bF%(KK01*I^B0u=$p@dtxrXRO1eomG~kXS5q(N2EzxYe|ND=S@*^sm?7&|?VV zWR=zXE&#(O0$W9%z}5N#@po9XPAcwNWAyMKFPMDe*HG)lg$^J5mwydd;r4*qPxnUL z9VCX6mKA09ogB~Q3&otL8w|O~Nz6%KckDj{3Vjwt_}EW_sZ>2if}JGP%M?LIjlOWT z|J8P&RBj_nr^3_C!43c0{kbK{x4p5$e~>(RI19-_{#wJXij*ax1RIpSY?>z#2)$S| zxg_a(j!h(TsrZxJ1Q_LCz`K_K`!4NHSJ)sqO=3+{kAyt=O5<@d>5-p~wTX?9Eydeb za?N>|_rd3u@>yAU2v9*Ys$)XGD=OoXR&_Hy;icOuHF9mSAM0M!%WCIk!#U+j|5DMJ zxcs3`QTz$a%Yp>%ljwbKp+$++RKTJ!#(}4%W+;)A-@4w5 zRO*!~qQ{dwM|*ykho98LjTVhUfnKks)wYsKe<*?Cll|9xMvSGNHvnk=Bhn5TUjuA> z-qtv#^+tQ_&oVbeomVojW2YU%nL(&)mjlvt81a^D$0S4b+|wRtUe8c?O&)DfBsXq< z+EC~6*r^#1AiTYIuL`U%^|au|AUIIfBn5oXtN#o3A+&`MLuLCiJpi-I)BDP$Q%Kqr z>ZGn)lgVN8dhh5ckC z7Hy=QHDE?bCI#$&AsKOM{%U1cKYMz<>8qKh$)>4RecALg@yn+q|0X{1J34|)*i+(Q za+Ee`3~iee5>8H~$n2kv z7X0iKg>jiz=s%K&w^M%UK6n1T3SKruk4x$qtl}A^gh@I3HpXLb2QE8vvvYS6dUxwj z66=8_zn0Ba|0awc#fuC9k)W?g?gm$w0XR;b$|Z3-ye5>&-Oms+H4ai3rLPE~VF=^@QbpiLWM2 zjCb2&+~yO6WH3KvMtk!Au@FUv^|!0tqzO=Mowb$(U+B@C+}@tq72?^^odqw{eJNso zR0UUGdb!O!zO7VS+pDe3EvgVcU5zxPhcwv`ztg4{rmPpNSwyZ-y9$pVJlKI2R%AHk zcxLXYmd#a6>F4;(u#lyr9Cw&#BQL)p+N_XtQB^9K1;hrwS4e70o?@JFB+&Rp^4Z#1 zX^Y+8q@0AgDQ0&sf#?K74uF+~}|*n)yzQ+vA2ndodJ=*luIkD{e?`~m#5*{r1$+fTh9 z^7H+Ue;vmzrSe7g|F@?7V=oNnkjKFspfi7e7rTm_8ZRZrH2+V zNg{rvXOA*S;UV=*OG$(K=7;sG?FzKeIw&><=nFWZd%#99?EH~^tY;Wg%t<=l7c}}yOd;Teg?G-vAH^D+JE5^q|`b@`k z`S+y6NlnG|V#uy%c&T1C-U)V05!B7*v(CWl#u^~Hv#GJP*30O(u#j)&`eJC`9m{RS zds%sEXKH%7t#0--8%!I0k~r#k>2|Vm|G2M*`4$6W|933C;_;z-3q2U4YzkK|c2VEf zjxmsf^M~pO9W2YrthNtSwzIu2!8xWwVT0$DieP+~W%X^EZy`U4m5F;t3X}*LT{-uK z9+Lv2&MeJrtj#cBCq6&0#1E ze&%sK4GyrCG($fyPPH)6-Fm=8WdtreX{5@kJ?N^DB^CzM8~unkTjM#euv#XbEh&LU zz7gGzHz~^WN^zzJ=HsZ_oWkF}Oym|Dpa+}UGAprh5R?Y|RoeV3cOra*P6$vTB`eRy z#$CFI^E0!oX+^6hu5ZgqbHP@r9WUPJ*3yx%Bj_JvbDx6k$9l3RzfpO1GvA2&{UYvw zF|B(~+7eNwZ&^OJZD87kog-yhcp8JHE#1xcg5u{ty^4b@-$+~EyIjtmeK|Y&way?F zM7_egnmyCDxai{DDNLL%&q8i}oh`7tkUiihyW$8kiiA2)1AWuEKM1H}ax>oIhe_ky zZ4YeD=&6+)tbb6tpac9To9kb{^-aCw&;O$Zp!li6mRN3|Y6hIf_E5^uHJP2RM}gXn zO-R2Ot%PkfWT#xS?WO1(5Lr=`=cybI+bvxVFEFD}N$ zeeEwlNFn5t`X_t&t?zWQKO2>_2Pv1_us-|t^rBFJ@XOb{xQv%rSDum564+38SFYcI zBxz`cfUu)19TdB3@`p^dGR7IAv1YXUS%>F3qMPaL#xo(J+VdjjIiI7Qv(i&yg#yc) zKq?Qx^(K0Bw5%WJLJ+Nb-cLVcIx-^90akaK-Z zw6KdoPT{WzfcAY2Zz;dZCHP6b4dm)Z=d|3sD%q#P z`mM>G8o(gTp+Z%v4w?X&q$ld0_>@P!l{yb+CMd__KAP==-XNP)UmOViNh{%a*h3X> z9>bDM$Md8P!r$Cz3)NHQxt$>N=&qDZ?lCF!{qoEdf&AWG)w*vN=liC}@31kBis_t0 zI~QBX$x!(|5YboD*u1H`MtOAkHkfnlQkFrL-Ez%jUd&i&CqrgyCXwIBsY|Lcd*@uo zsu48oSIT>3KD~-Z?@%*r@!7$X>OgiMLzJK# zlX_?#l(N_as)-| z$qoB)o2^DRm+)X(pPmdL`mc8kB+sQ}piCu4c(;s;&EAi)2n;r+9{`zp$Hsp>(xzIe zSGj6sA@~REv*na9*OHNY87lozfBT%ixOJG1#7^_^XN)|umEr!ePLb(a*8^tSTqUq9 z+~%*{5DgjT5wEr4ac60j$LKFK_yY2yRHPthC9`g`%luZRK<{FsUp}UoZt`~@uQR9~ zIGf#dZMVE}_FIgEPHQ<9rqg^&JJw0XRW|5Dki<3A8y&%vbP#+s zx)f>EtTNQZ)g$5L$(@+BQp?!5xcIGfv`kLI!|k!5YPiUqmQF0(lf1J@^#5Ew-b!&* zSu+gA^SZu>PVA$#d=82h1G_*c+{@P>$@c1t*(sr-s8bJv?dBQ3Y=LL!D7QAh+x2vF zegTj3T@`ca%W~X{``?TyakrDDj)u((^j3g5SLu(=m!?A~x!xifG4@n_Js6$%Y@^-d z<+LkVi#1m~Qlj2rtv2!~iI5YO+C8!m*KrB53sPsz&U|q`DKt7P8NQG9aw&o8^)fwh z2{C&ty~EbCbskw17SCQsZPu(?%hO1TjPOYycu6r42{Y-vHhlNOP}rGG~UP4 zV5)7Rrk$5JbO86SY{$)3{bQy&^Z*6`97g_HO&LAj0mL7=Ot5W3e)_@YHXD$?Oe`sv zh)=)auhCQG#RuHlx-zNj@2!tS4zj`HIQ8O74Logn=c0&%LTrytoe#n$$~0S}@WiLH zJhRe`Fv+pIrdH>0-t+U=U%8=hrOjHEq*@BLy)VmFJzLqB+h;Lhue2Q(aoaXcuwW|e zh!ltdb2>Oz&bPI|gV`6ugOaldaEEd4n_2~&5IUTwh20A)15~dtF!GZI^@i!b7Y;sS z-#Uwef@u$*7)N(HSON<@GEH}M)3<@ zW)+G*hNvMf&21(Geq_lX=Wl#)%%5WpIl}l3@v;5 ziQD+kpIMS&p+mR7*`-%NR+bB&g|+_n6lF-5f`K|`&DZaGv~6B&BREOaicfcq2T zbVDK9o6}a%bdZ*35v!(`M#0bWwmkZlH~6FMWqKPR%vTiNLR`Ts^=kDmY1;)+-=&7O zTj$PgPbj3Dye(DhKKfmlS8{ou6tuLixUTVW?1+X}e~IvakR2%visnqoGjOi7a;f5( zn`X&x){bQ(0gqNiW>Wk;=+%;7RfFF^ zzSIw(T<= z6yEjRgDc7K00z?+Gg4UKX|G5>2@lo%ZNaBs8N?kGof;~MK`pnokDIVkbSV?o(JSlA zxpN{(FzFjPk3r7f8R1o-=@m7qOKy+>4R#~*xtUESCc;U@ESW})mD+GPIWs%P#IbcMV5MVhSUO{= zS$C?&nbWl3HCvB6(+t8s&znW33@WkV(7eqr)`4FSyyjLWg>g}d3d zHv6TY0x!Bpr`X1@o~X$ysdh^*d#uy7CLL$cs+)He`w5zG-4)S!D1O%gwisSLA8`e4 zX=T>E1;5$(j^~=Gy1K+*R*F;MX&P&m-xD)7{GN14QtyU)jfz~Ozw$2^B3&Slu9MU0 zw95Q?#@cDQScNP3A=Caobc54Vj$CE#uI85|H$(~D)5BG|FA`}*KD*V}8gHb-ZR0|y z|2`V%?xyi~4MBe=*RSF3==AD6=p)CmfwY2^`q!<*N1%8Q@^3jZ{u8r)UY!;f>(z>2 zfJb=wPg-7H?MS6$`Ssuc9;Umn$_jivtOvR}jW~a&?b%wpPeKwr)|0J1*w{R*%*=n+ z6XR)rk6@!4lCL$H>Cmzbb+*QsDHI5`)k8zEUwd4{(f-W;A;4<^@>MG0IT5(Vt0_&O zO~xY;seY-@>(@4c+c+HJ&#m$=y#J-)Bd2*8K&rlVD_TgWjkSI&9{5%)Si5;t4LpqaPULLz4w4Id$6fh!b~E1BaOZotxz z8hBGYgcA9BW1t`$H~j-KSylloI_TO1NRP@#zSRgJ2q*}PnHt(b4(V9D{9tr#2~WBe zTetgt>oN`Aon%o7L3={F6d@o;*8PIXWW+=I9k=bNcU;Wz)cmfGJjioippGWN_cm6Z zKtGqBgR8p-fNqx!M7zh+?tx)kRWqm)f_g# z(5!&z^yiE-mtQ8JwZpibDzKt~em}_ebiLcv<<^JKilW-bH~_PzKfJuzYm&38K)(`A z*E<|dcUy?v(3PT?zOl5++t`rQn_uX$WJjNDZCi{h76~mi>?CLR%48tm<%P(I`Px5v zO_}aVQuh@$eH{2Y?Bh~~?{L(A;5CL3?X})}hVvRJya(m@m_qTH=G7B%* z_j`Y3rN-}3)cj)G94wUD|5o_ud<#!`q*W_lc9*;KR(N|ti*Y>JQxqPN->i^~>-4f} zcf?C7@9pcT@hhWKpt)o_zo7S}Qqa+P&=Vdc=;GOTYl7_;W-LSE2tDaGy5EI}$8`zaV3+4U0FB?u>`b=X)uXeHg{j2NxP^m5(FOHgOQ=v=WM#2B~DY2WwD zH$WKvkUktO4_@h9DlhQEy)XHt=nwt$+lW#}WS5TZ~Y zCt;{fN_xbA`Mifo9(kWvjFMwVQG74fn0XE-A zXDH43PONiR^s(|ljb-+0wDM(6aM5G0`n-aU&;V>hO|8X;!$Upfl5rC#$u}j>TR)5C zNC~(ZUg+>Bb+_p=TU@Ik#*CAue>T=UZ6EEV;0~2_%k|zlA9pB=G|}~Zce1@E(i=$l zyMm>fIvo~ECjXe35dS!wt?*K%)xr*vO(nzj`dbul78Ie|%*WF%Z=JGt;kJ=Os#x^| z30IA-FhCNw8o1R+q6AQo2QS){1j(N5Ef}jiL$0YVoCS+J_S&5!7f8Go;Ju2@k#FmfM@woP)%wz4XCiBX zNQjFAw1vHud;Mfk_Ked=O}3L>of`viwD-K?F7vhWq%|d>q_$+Gb|vLSmHTC*5&f)$ z5Wqxr3+1q0_Nd$4WR+yS?1VzAR^n0Ko^j{R=_N;xd}eT%T+H@6u^@A~ShPa{5$+GP z^Me=7^H1u|mE=6eQmbM9Q-RMB`_D;~)}KwEs6>QZjODpLyP*oaM-0s&r*-dmAOOpO78!~CbmKo2 z^O-sh0zug!zFrrx>@(eJ6-TvB&$>4Y&t5N2od;BYHn1|-Cd{Y9(%IAH4SMVm?@{ut z!$e1|-wI3?`vtzqR;g*2$a_0@)qFq0ZPYs}>bnybIf#{7YBTV|eh(?w@7r&PJ zCIp_KQuz1k9V&NLyQ$wAo9j^@(g$sTriHt2mIg)XO06V%Yb!qM`Y(@CwvyE*nf;P< zZ9cs*LA-aOFW@a;^hwiGi_iXBr{)I3-QmxG-z@a2=f}Grk86C#@BCL|s8HnlOUj=$ zT~pg~YVEh~=At9-(1H%G5=T4U4z<_2x!L12xG3U zI(!XHBrdG`K&$t;-b=D=WTZoFNkLGbC`tGlJu61iOS z1G(LpF^56{IYIpqliu#*NsJr|>v27rZZ+e&9HZr=^_cy_%IG^i_i`ixAEo81i;-oU zw3Vt#mgCI|nHn^Q@#Tslj10=vr`{12BmjZ?O=$|u^DlYb@n_qZqZ0v1zaQu%3xB+- zVqy|gG5)9)dGk6pt;P3td)baQ-SWT%DRY;&nV2jOj-2ibl@rRL!oJ1GFNx3czr%j* zyOix)1>@|Zw_g9YTHxLvyxR?CInVaWLJ||!#rglA_y2P4J5-po8Nyzgj z(?6?J9-p4ryova(Py^_KTpAM|a)?(VQ#xtU3ba~sH?@Sa6WVn^&b9hg`@c!_5fBI* z-Rh_GorjtkhVLZpb|4|UqZ+>R6Sx`k0YEm7AgL>zmLjXr`M^AML6D%`=%StEsZbHX zpL4;sE9IIK5W?DiH*;;I=(+hYPWl6JpUXxg-ibTmb&S*bQ7$o-Q#SK%i^FI&3yk{(BL}0%Betpk96w)L) ze|qi1DQ?F_@f}EE#aTE!8>S)UggGY{}QXgPsXnjT&Bo>DH69oK`KK zQ=!E1Ita()D5f=~bgE5AB6YvE?9U-a(PzEEls;=6Z|lxilhh+^h*20-bJGu7R7Odv z`+`C>UNeRIevR`#9!%hh&NBlW?JjkZrUxL^r}e^>UzMtSxe=!0OKBtiLQR}vex8ae zHERu-OPM314#Zq{AnC)wkTDIHALwwBDy!8*JUviq&;5p2sLpg?oiKlYb<_lK%!l1- ze$~^V_nJ({XZ)9M;wP{`aa0gwy*A)TPRJ|ZRD;nttBQg-q5}uKJ za$X+`3qm@Ab%=aJ_0RVrfGu2V)rPVJ96`iwh20<;G&3y@@^)In94`KT%J_nUowiRm z*Jh+0eYhRxhQ(WO@h#jB;@PZCIXNrN;C)Mr2f9O5f|KabU<4xUU(_uoJxBn}Vx@o| zI=+AJ)PK&{$m`I*Y?~3+VTx1o93W2W;dkWJjQXcr`Y^cDvb5)WT<9lmDk=dgcHu3# z@^7P!xAt5W2fm$zRhBmlhdnGCEtxMWIUPPsOtJM9>gwgJo&@<39gc?ydB-=LOKzul z<=*KTDK^!h2EQcb*#eOWM9}+-J25%!KINdr?rs?HiyF;%QotNsn7m_b%8B7<>?mPP zHGg!UWs~mC@u5g=@XFnrBAOM)afUTS&{PL9%fph%Ff;kzG9l z3F02Ve4Uy76tsJ{2bV18rzeJo+>)Jknvq2*z;k6zAY%Xy8ed=19@8~KixX@tzFLJs+LR=LRl3n3AT5(|vZU311(YC9&dAuye@0_0p0oyvEM0+NcZz z4pe|>cfAc@IZTv6y{d*_|FnW6RsOk>qb?d?WQbdU9H>m%*`L~2>` zOM5a+@nRU|j!i!9|56YMQ3eD_%-K*%RCeV5Lrl=a7k!v=CMFbw{36s7<10#!O)X6k zn;`{3^{I!cw)1j(dxCT>CN@Vx-D1XOCSU^F!tYJs!2+~VX~Ybh@>fIj7&jc&5@N_u zmt_T+-H`n=0|+4MPrjZh!GU&wI{w z_+zeXE{0)d@3p^s?X}lhpN_3T?yt1*e07Oy zLD?y;(`;|v)yNUXYNGMEKueBjCP&8xhw$WsdO5>F|7b zs^3Cf%VI~;$1l+m0a~L`YY$jl`@@;I3IuqVR%JJfyl43BYg~QDT5m$^Cbg#V1t!;< zt1w}zD8THE-2;H`^9OEio6>`F0~%_-N6A+_+QyAkZ8zfZnjpf(z*&oqV^9jKhX zd<4gS5B!T~d$;W_kRon0&TsxH8iOKYvFyLVf0<-k6`n|GWB8`KM2&;M5zqajn+eM^ zAfgLRXdG}RE}@ILtDH2vjA;3pvO3|km}=CpT6~~aCnzBKW5T?!J}mWCtS1J4;K!!U zy~VF4G_eadJ{3`M-bTG%#&0d_1c5OoRgDCUdeh3TD_d7f2h`o1s91CMqh*Ve%+aQ& zmSG1S+iZ^AWak|fw?UztzhfR%`i#f&_oizRnOv=MV~aS=v@eU1qmf%q)mf~PxR6wD z;f)vRwCOb=Hk?(r9ULv9JV?#_3>@9&no@1sjp+b?t7a7=!DlQ*YGgX6BOQH31oGRK zY#CvCeYM@%pLH{auLD6DY;^7M(I|~cP9@!9c#d>!XnD%YRf{k?+U*cMx+p z)5CyP?&5!?{ZGulRiS61Wc4@z#osiRei_P_d==Bh4i%z&Z>dK!d6}Z0)*G2!PUdB1 zCHPVRV0e2MqYYS8DI+yZejDoi%3E!xpoClRJDx815TW(bWmbM8x~_!@c;F&Q8~>&D zU9vlky5jGz+N8`hPCuCVsa1Y(rT-QUmz(e}x#<9AX`VO)Wt`mDOjGCU3+AZc7# zDNk{PB8yKVsNeYdnV6WeL(b(0V0$wJG=wzaI|{C3uuO*dK(&dHl!zw;0LIt+48bGs zXFKDW)<^}BuJ>~44;g=A70+;@(jnSj1W`~x(hf^pr%t(E4+dn3k$A;MDY9`0P<~y5-`1PpIONcnkiPG!J(~Uau`uAcLn*2(9+;wY ztJzHsc#n7T=SAQATrN5}7lvk!}r8SCERVS?YmDtvc+ ztu^+yqT(pA9XA@Ip`K3y(I%ZMnTw&Q^NcS#F5H1O$Lrz0tW(5Khgxm_mTB(R2Oe*r z{;)ouP)gB%e+kYMd|Q89F_3VUN{ih#4S>QSkbSP?eRcO=%@dWqZjeTIMLSiM&J z+<^I>3=cm6Y}anRoOrMNqF`=vDW(Urk?@wj{~T;yS=r>Bdp?N}lv?XrQaW zueb?9jY0|v1zkNnl&n*9O?YMaPzz^p`}h0F#agcYc~^LqHQvKo9H6 zV@rq8lJ9Bv1vw_7u_FvKd=MXja%OJ~ftj=yLP@o@44)sYL0WEi6;SIn857r1o9#_J^s$c+% zR1!esNB5kG(wANals%`wsA8c{Tt%p;-4hC5tPqW(;~UwUbTKz2lR9p=7U_&FHj;;a z!aB>=4Uf&bm=U1jVF;sbI47DahVC9oGhci;`qja&h}T^7!tyhEkcFj<+Ua-aTCx(3 zY7dv6Nha-(r5i54kz$|Q-W&KP(YPl5+iX|n=rsWW@}km{-RZ8pZt=$Mx!xLM?k;8R z_s@a`-boE0x6u2N@Q%1qZhz99AibHgWTi+^0UL(>SQ&+rvpu6E(1iS}c%KzyWMwsC zCw#m6d?Rt7VEY|T&cZF-|DNct`k3&FHJR(YB3;g!ZH-z_C4LEhrA7uiK|(;isZk3S z^qyK_jdlXnKRzruD>6gME9u<7nAY-9laYkX#UY=aj&14w5 zI#%$;4t;fJtSJ$Pd6gWqR+;)-_J&>qurDF_V0E}YrJIG8A&(w`t`;qgbpPx5b$4J! z+s=7>gZq!UkvUpwp@q82+-O(F^RLPjK;I{y`XLdAr|BK7l4Vz>OVbkX%Q{epTdO)C*#>J zgO=t8lj;%5R3#YZOL8TfyYXI*7}O^#ZBKNYi||Z)<;0{acILaL4zix;K1EemEt)9u z*Q3}>BEZ4GoB#jbZDLTrGFQ*gQa@}80UtWUEVpE(xpQ&jmG_ckiW8BnaID&*#7t$(LJ%I%0PtBlkZ zBq+KqHGN~()>slpP|L|um29PI_RZ}k3Le6O@WYCajKKMi7f96C-!GG^Y{`)ja_|6Q z4wfZL3I7i!7)@5|wzyQuAtEZS;Z0q3sY-ek@|LdII zp3i>YqN&H!oKu%@hmMX~QeV^v(wv*~xF&rb7^v|vz3$bs&2HFtMa<64Z4Oj@XDM?@ zC1!LcF0DQ<)K1?o_x$kH&ND$u?w(6GMaa;&YbY=Rw;WwV+;beUKvQIlFWI?s*(5|E z8h*30?Wg3=CPq3D!6hGIE>Pdq-TU06|CSt2%?7Ja58lpa(3aFu#UAK0;bAOIO+)=V zH9+Z`mu>v=F?OIfl;HWMsxULUcWf3{n(mUq8DQ@y?!P#AVjH9@i6}~Ot{E7p6lksA1?qi@mL0G152&p{t5%y5{VVvF4(AbD;vQ!=8PFw*4D|v&1?TsCr5QZ}aHI z3$Jqug}4D9l1B|DYekxV47Tb8zSxNl-UB$C)Zx~G-p6K8`E2c)%PJJp!i z`#E9fFY0kf&V--AXteku7D#j~i`SY~y}~TiB2!NP*Zm{o_eiw4O%xn*o&1Z@1aEtl z9PBS7__;_(F_&v4dgVFJ2jfM9gbT8mY$l#EH_d+Y!DF#}&oGs*(1Kb8>eMU*i(+#l zGi2V?U+*612xpOm;IEZPUNbEw2-ew{#Vxfe>T2g}8I0BC7eBM!JbwRTBQ-LDs1f)l z3u8^m0KAquIJnyYGm||lH4DeK)niR&svERX5y&i~7sy2u$UC&vg1jY@)k#J)(2{e1WC*#kPTwLD6nA z^+L_BDY2I?1uxo~y!ii|>xG_zy_DGE=k-~L^xEfJHJ`;PJ9p6s)&{7gDE^olv$HaJ z;XqRA+r?4ecH5*uZg@jZO2xTJseUWg3vI65wBS`26~ZJj$e^;Ctg07cFdu6qBv^Ob zE5Fvvxtv*&TS!(DO3lNC3%Q3aWtjhm%WTrN?r0IF`*(ohotS|#F|9@|N@}1*bbwyp z8vo^0P3T$K+FFi>XH#wE8xuE5rB4g;%qNY%9{%~z4?OfHtoMG@HTn!;F}}Kd*WD$P z)>LZ0RH^$W=yZEDp8>)jZeO~>Mm%ey_FI2_`f5Ebl+t;{VpbT29mUSwVUYJ z1*z7SGW&A+8e>dTD+`xuCYXf>y-)Hy&nRa~B!{C*edzSdx@jU*q!pu5o4n;+39_rI zR0UidPczg}FaV2MndBY0nD%sMe5${Xy4@13SN)%L%>Yos;17rI*atT$8*t%Ia+%~w z7cd{Z7vHLQ)-)mizd02H+>x17AK09M&lv?I4imQJ;dGD za#RmUn(V(bNBFGkic>n7ZeI|V^WZNG5K4MW-18KCMfjbDM!9ziW3=y6GuO74dbi?sP{jDb3`g?gve#bnY$-}DC% zpj1ty(Gnmk6riV~7Lq-;<}`m}8?L=;y!(42^Idl=X^kPN8roMrL2*%t`ClIro?GtA z;`g=>(c(pzN`>+^A(THyhv7K#;`me}s91;1nFqfykt*|j!6_PqQD-% z7QA@n{mU2*Y61Q$12MO#$P2Xmsdwd$WsVb;f@;>Q%hszDPmSL^A3XtBSh}O0e+v)- zJJ{UnK!D!0@1Kc+tJ1yxD+{rg^gkQaUrt+O$jVG*u}@)O@q7n9`oagk+Nv%0>*@e_ z`}AWvVphw}VLNIFkB9MLpo=u5!txps=Oe+R<~r2MN3#`gw=e7T0aTZMBiv zKoSoN6|esgAvO_TlS|N$tn7ecdh67E70;Cd5unB2^>B=^UM<(u!lF<$pF-%tt<8GF zm(v`e&W|_KAS!JpD#%?qx5{vT@q=Z+^KvY!IEvZkhcLYX&o@o+zf}g*p2gid?Fz1ygD9vQ%D`nw6H@1-oiP*koL#vRxF}*`~asd%}|p4G#OFBSn*iOWwnu zAIc3LJjYX#biHaOlslL$6VK|kny;7ulM_+BM;i24)7kUDEl>g!{n7K+9mKW$GNf$9@=PUg7wK(?@UV=4G+ z8ka5V<@B%+o}(chV-$e_s=1mzqy3ZvjP3l~_lgzoZUa+&*iAR~dfq5~!vkdE%e7y! zAr2B#=wiy0s(mqh=Q#{TNCUK~t17U6jv1S$#`2CbI63v%bq=JkUFU2soiS@pMAiIc z=9}#Cn=jlh1P%|g?uNlpOzk?*d|`+Q*e zMD)kIoH3NDz4>&PoLr;pOd;7kH&R50^A!a-3aG$Pf$j9@2M7f*wcv zkyhEJ8!CCK5k6!OTY6_FM|#$OH74TwsP@#ijhJMa((NZR49G@iAY(MSOlYcH9viGk z(gEH5y`0FGb@lw2g713rw+8>MWdKvm;qAFw*#C9#0GD0rs}vESC2efs)~Dx;O6kB( zo4C|Vr@vIN_jJuN6{lY~uy2!8?^#eAF^ltjU}c}EosQ}{?F))UjgzrG=4(uLPEKZ? zY`I2LBN{(px7p8`?dpK-fo>Q{wf5ERr-J?p?EvrlB4J}Ti^f1k^zfnncN3PCk0m#% zu;_0sIdMY^b!RFf^pfx1bl$R9MT;^a4d>ricj4^UIUZ-K=7;%^)mrV&rg7^Zu?;v!;6%#T(zOxM+fDeiJ#mzBRn!{I}Mpz?AJW=7T#7FxM#WtaZKf#4^6;Eh0iaeO7yP)-AoC6!T z%jjF6xGW0{Kf!GbNLVsX2@_~8Zg5;&{HKbE7RR$uw@`K0nJ?BvuPKi2nMuc=Jzju9 z{?!x5+9$Gq)_{dVpQ8-jOiz>{F?RKFe+=0o{T;1x&&y6Va~mTwqh0qyIs0F+Xcx8) zJOUSVi;r*)0eh#5r0UBHUZo)F_O3O`_Y-q*ZR^(44%ulB^qzW}^zt%t62n}m_~T`+ zTA50xEwAQPnhURlwh@LCDvjP&Q&y8rCxi%j;y@#Ptm3^BM0wB-m-Ss&6<%b-pHU8;-0#xY<^Q$WtqHy?r8vIEVYd!_@GW&u70}Wypl%>p zn!b*vxE)3JP-y|B<)!{FFFe?tdgF>f^Eu*=zXWEv-)1Y*agxY_9+S5+JnEDrOgv)* zEZS638Vfmi^k>))+Eay9SNoo!7EabB+|!Q@d4}<=`br)ukRSpoKD3Q0Mjb9T3Hk2) z28Gu+GY42^A&ZycOEW81q-XuRKCJ0?R6wZ`!Tdw4yNlM+4~&>?GdG*^VD>C+rKU=X zBbX#RVL84xUAPoTet%P=ueep%M@6_|pW;K}b23}YinoE7D7ZL!ZJz8Y3tIAt_~yrk z3-zY>;Ad)Q0#1gwvuahG{(P(wSil!*`M%cvx^EU`+C5Of%9eiPbz)q8;a19DCnjZ{ z$@ZeMP-!X*r)N`XNi4?~Jhji5S!K~!@E3A%9`}82tovfy+32}Eygpw>Xh!mdh$zy{rEjpSE`*||o`ek? zG$ZeE$d>F7WkV`ttFxH@_5ko9xWR7t?{OE5O-#(67)@MUV;nW-VCYMTk5_S)_*3=b z_L6gLY47tKX3*x9#`O&&u|YM}hyj?R7aP<0Dc?y-6QWr-h=-10M?=#y7-^g}kvDeP zI@5;GvP9)zA##2wZmsW09i3kGXlMq~uyM2Ft#|+l>*_ocKi3Td1tc{Sq;Q&Yw>|db@<)}v-2^WNF?9CCunHSj2}H@X}UMvAkd3QDLvRcRPpGzL%H`Fx(by2aD_u` zXLiVd`#H}Op{_C4R2JCFL=mGiPaMd~&hB<{6utfqa92A?Enlvds=`L;8n?nVe^ynn zr)kn#ux^nn`HVXyAQR_cY4oRLzO5c6pWlYLSgH(s(gFT5z7{nlAfqs&J|h`lK0f7U z%eG)`W{DOPGvt#0YY#D2371 zm-HTog;1oF=X{^^#@$XYEtuVOlWsE30Gk52Rg1Q?Z$`q#s zCLn{Ox=8+SY0pR1LbY{brK9D$FqMY9`_lbSQd#It@S%j%jU>J*wG351LD$%*axYcW zosp0H5oFVF$6uBCP50|VNR4P;W-DC=z9E*dh`4-5`%Nafu*;GP5`PiCcyT~Hb7Gq6 zQc_WsEOfZ}fsrO+Z=uB2;_s@SS+}E_p=OePgBn`eV=Lc;7cs_7+HbI$>plkhnb{cS zD_DWfWC6m03n101C-JQW@qOiNV}N<(Y>JP#yV?c!*4Z=e``e{)1JB!XeoE;w_B&pu zlD>Sh$XZG?#T?y@9mU+D|JsjyTG99R3fx25rx`fau_M}c zrk}5v5@d756g&-;>K-Y@4C*2}4GcLr=6_~$J2j9}c|zGmesH=U->Px|M`p}nR$uBM z0|{|k>!lfxzyY4RGQ`fb;u zyQ|)|WCB`RoC?|9J4?927WNpvC;gT3x!}z-QKO`~6R8z6keoPkmDtj`XlkuF-*l$6 zc~8$ucQYVrJYm@ChE$KJo7LPh`35r6lGID#EwYSHj}x0Wr+kaRBptBerK1!e;#$4@ z)KYnYMJ50F-)uw#P5RCy9_uOukgsy4sOKcH6L`k=Lof081kGEuEtX!ouEHRwVOrNC z^){}_=lpCrosnQTWVeEntblxy@d>Sw6d0ceU17s{^lJoUi)Wcz zpF|&r48;%@+(NC-!$P4B5dbKV$kMRiqfFzRLXQN->Cx^`Y@x43r1f!zt1DdFAZTf$ z(sMX~ASAoO6@v(tD6;xo%M;?|k0C;zCRl>c0?7t;Qz_r$*I@DhzeFA{V{<@7Vd4Oc z8I}w~<-s|E#R5p(5VTcOA3lAgQ&hTJ3E--EV+T#-0SEF2Sfhvl@RU>?6?lLg4iL1c zh(I||$zWkE4_?$6ugNrxa_K*fM0<<$2qwII;cXk1%PS0!V;%X^!yf;{NJ7gdv}lEX zM{|beAz|7-G}kab^hl$9O1GH*ZAibzNd+#@+=JkR_j-7|je=1E>`zH+pf}h_97XJZ zc}Sn0{1b@sG>MU-LP_hXe~J45yuZ+^uK`J0`(@yZdKLZ(JMAYn}^~|7JzCbGIG|3b?5}1zZoUV)s-fbTS!yqB$aXn;HKnR&~lx^k?Y{ zx%?A=diADr8Egu$Jdkp(&LQS&q}sjsMEal)wN`i;~rJ^rE8CEoS< ziQsICDQ~lSUF^K-z-C$S=#UT0+)d?W2%_CONO;v3jq*nyuTR1Wq>;+MOiz>-;pyow z>Q5GUdPr~;HWV4O^p2$<(?q+veZ2urZ|dkQZaNmU?mRP(ZG}xe%|fFvu4QZU41jke zjJ!fQ2CWmJj~u`z!f^@YO_m0Dl|I_V3#EMEx+zR-DtzqZK89k`_e;-{o%rHzkx0{H z`AA%T@{Gh=xpRcQNOtoeac&;h!}C6-%dtr9`e5H`Hyo3={z~&85euQQk@!N|v%=;? z_+E;v4rg~_iT%p&jW5_?e}eWnvDR#2Sc+95Jh?fsXPW<#xX7ko-73nXDesz+R*~fP&)@J!;A6s%oj{I+Mo6;VN5Qlu<+#L_}_Hn@iWn|smx(oZ}k&~icX#pc`N^dv`kOsYIKAfjTm?L-!<4U zqoz%yXMa??=Pi+vTdQqTqc9=1z9p&n`L(R3Z`S@z?i*h&i?CQSt*Zc=n^dEq+tcrk zBcNZc*(^a^4RvmMua7c}UxG#eRKp$(O|@g82%&`w3y-4IAR*&3-{EdSflElYZ^|1+ z4a{Yv3g{rx0N}O!6i7pV%3indtrCR`DZ68A2#7@*Yzcgx4^2g#g(CUyx2TIeU_d5- z0;&)^pmro|W7C|!nQl%8WS*b^)=0oSN-P00CrSy_6+nUj{w@Dr!Oo4ISSrCNl&1vL^dgB7+=ytq<=6Ek=eNu_Km#@PGnUH57{ z+(R;j+Q67ZXqKPZKlrZK+4Sg-mxl|lbFQntGv$6JHbMouw`)pm|5a&&i2VGP+Ojgw zGfWdHC`|9NnH7J5{I5R~T~vcUidYHN$!GWIIc7y~41TlTd$70|Sn?SeALB?2irOHh zOHm-e$)wv;TH@sDbOvVbwXzBCaI&iR?AJejSFqp-;p%W>EnuMCFnp}9YC_gVhnFG0 zq)1YU>u7-Gz1@+TO&L0hG-Y>S{+K7nqAelmzT9oeKtbDvwB>L{{7|LtNvXct-Xkv> zR`IaL_I47Zh|b3u!3%4l0YuV-*i7a0>6>v=q;sT)vzsD_#casS)|B}G0<_}bK{6t~`c29ouLC%R<%<)P zCV#&w+Mm7{Bd2Q1?y$9+zMpax)I7n~RhjTUP_^N&V;ezJ9pFvUp%A+c%g-t7?TMil ztV_-`uUcLQ z+ia^9g-du=l=J2{EfDnskeHj0kh<}E-98eEzgCIj^RzKAg z+}!-0AO951Ks=P^U-_#(<+7Pb$z7POHog5#cFB7qBLl4z%#F_hF$Q|E#bydm5>vX= zXZTMl$u$%lemp1;Wl%_a*&J!`{`q6}?AUBef-9SPgod=8HimEO>hAHSr?*g|^Pwt1 zu|{8ZWuIAd2;Ety4;2#3e@{0ha(#}EX#3PjGgmotxt7CX>Wf}2)Uq}ltr;?(ar%g$Q6O;t}`zm$gW=cjL@K!-#Iy#cgfT3MBOdB>{wX*q3=`6#PN$N_?7V3CZVMnAeky2g<(1T|%Ga6DAKTCX@@1^*=Zh zF#B6XfcGRZ4KB`of21f=aXGlrpXR)&#;0p?g1@TBcdGnP{nDRG|1Oa>D7;lbqa$M8 zm@0rxg7D1?q|YXp93;TEoAh4Kp>LvJdw+Fye-34nGHs&vC{?jmR85HeB+Pfz*8mG_eho^RA>NG%L9^Gt$Zx6LF|x}+r1HZKl1Fa zUB2D2vK+e#Hw^je)G^dU-UYw8BgE@28Y-%;jt;>Fd)lM2_t{EQ3507ZJ5^5yxw4@u za;`;+iM~cp@k}>Gx#^n9KXN{mku%h5t}55~{yRPT-&}xEe@1#70GJ4({uDW-k_b3+ z${|rgaQsh1xY8iv4Uu%dVS3u)5MGft`Xp&+^Uzs93{X-a(~MhHeS=wm91Br`=`V}M zP-91;?D_tXcJ2uEyq!InZC*3cJy(s(ophke=M4#evDsX*&ajN-jIse)g(bGwiKGpl zr*w>!HR8Xd-eKa zsg`I_F&7g^Ej_hFCCBdrH{wqv6%u}9iRn7`F*J+lIp1D@_mFOg60O9^MVlL;9kxup z9NCkN{U?*kUdLwyndx@P1fNv=!7{ey1yXd8YBWcG0+p&SzHjhkkP@JR{!f;vk9Z@I ze)=0#@)p%e?XTn=K!hQt;PJDq9I|EDf_r(6U;Y?rW5~7?usnJ45e{B5A-&SYk05~9 zieRL{U;9Ajw!D^3Og4Hr^g%m+w70TQapjR*1#%FU7W*S)yBJM^70RhI(J z^U+D_O(FHZNM%rK zK~V-tzv4=CF>TsuL;0`b`##Vte^8-+_4KMU_fi(k>O<+mmE5gLP25H^ z?kR`Y6oj9j2)KD^36-e>45!KaHvTkqNeuF|{k>>Of&#?nugOD1z_43x#+3AO({gPw ze>p4=II-S4J8MVU5aC~bRzk{xq6wW5v9<}pER z@oV_;l@Mvb|EmbgGSH7&v^AXHNw2=D*=U*Uayq(uJ?l2;)USSqoI~*c4`uoA2h+uo)wJMlGr(p8_eJ!tn z7RNMB77BC7OC)d56GU-FQv?rTYWl3hkY%VK#A085|M@V|H0l@B@fM5a3S07Gh?4(T z*u$q#xc>wIqx8ptt-xNq2b`f_bl(9}OZe7+0Pq{2Y{9Qle5B~7bkQlf{8Ar-0V&C4 zy7`L!XWyU6*>&`}XgWxK0l7nVh5A>3i>&wlIY;9lX&JK4j zcD`ryjMhR2FgbqHY*P`W$SpEn)*{%#OJj79{GW4Yh@2PE=thhS5Hc5cs=|7^XP&T> z*-SMBld6kZ9E44airtc{rG3E;?WW|@|CQ%)t`^mSO0j_E>=GT_B2#^k{X^ImW533* zchN@IUyahZhcPKhkl)oRzWh4l_}30ei2mdfZTT=arJq3BoAaBr*DQ|Kp?if{Q z`R^tDNTf~G6|A+(8disg%#4%62GuO2*wRHmQGE%=NDm3_`5i{VW){3<=>1huH^F=L zb!=uhhGDG3-qA$q63%Q{IAS#q0zyX2#jCNMD2c9n%}y?Qhl6&du$pQrlv^ za9;42EEa2;kvE=RHJvAygBE|i{`~J1zSY-tQ<>*BGl1V%^w5e)H)rZcTN+McO|`R=E5T9dfz6@I zbMZ4&%cDM$GhL60I1KLV-WpjN$|K0nvw3k8xsyI2Ek({`_+r-O5}MR&RLGddg53!o6#!5@Nz|$;i!|Iva!rpvS-B-&A-v9k75?9@?A3 z=yuq2YJ49!Z&pO+blrV3l+jDKShmbPQ*zUHCvdj^LQic*cL8P8iIjq;Fs`j{jEc9p zSm!KCeRXTbOR>IFi0L4V0n-DaCZy%XrhU}!Yg6|nekrT6Iv|aM2C<_j!QE-BM(dhU zb+K1&ub;}q75Iw&a}9aLO7XP?8M&6#_Xe*MoXsmlRb}O$wn+WEgj^^2t zyRX^nuBs|I@m`4$Ta1|9H`%|E&4d*oU@_Oi_C@M4l{GB8i*59C_vrFz(OAzu-PkE? zc6G2~>5xuy+Pmm0D>^K`FzCed5TnmwU}uFC4Tew3>1wOe5@JM+j)g=YdXAn+8joI_ z5QomG`qCa$>;%aHk)Dpj!{#k z1hCa(OQz1k-QDd@7GhvxYL^*5gvllIF=~{7 z%uXsMCfn`NERZ)1LWYDrPtDiwX4&CzI65XKa@vS_NN6affcw#EgfKM?jT-sP$OtP7 zi~U%(tVB53@x?_dn@I@BE&>tS8v}`U3w0_kLu!Q+>0+T|L3gCKs?YO;XWbx&QeaKGNf*7k1J$ECptbT`MV=jWk#c)MHLrhNZt#e>wr^{~B?#++WrI$uQVY8|#EwXk*hK-}*fW>^-w}m1(L? zmP45|-6Q_Qi&f~rA=khB4XN-@MV7Fwa1IlW2zO6?6FptTilZz;0*>{DN!VqWS2Iqx z(GCf$Dxq2bjHvAGb6#UQHs@|U0Wp?Z;L#Sz?iKkcR@9j>?=Lq}~HD)@T zXZu~)W!_{L3y@^z*v?#K=yEZrE&06P?7aDM#2$>W6o2B%xK^b|3QL*HSiW0C6BSgt z-U!pGW;0VPwn@cJdfyKWtJk?Ck9UUz;x&Qq$7JO=U%he`1!Q1&Yvyd0+pEUR;B>qG zSSpCAO_QVh`g&9TUM7--vue`bE#5PnkE>(oM)I=93v9P(p|vC=Sojv?F;z3D)IYUO z)WxVib2TeJMC(F7Ay#6J;3AJN>CyUN=rIda$jWT3Cz=2?=S79rp4^c_mo{rZ>TByd zi^1968l0b(dHL9iqq3WaBvJicyV#-U@58{-V2^m^bk%0Lg`m3AKR4dpUR#`Z!aGJ5`*Rn0T=@^3mY?_E?VE!ViblP6z~Yetup|@(U!jth5HGEZNSK ziDfB*D3`&dB|p#>Q@!EA0Rbp5uZx4MtSrd8ckY^xUrioUci2`-PLjGyxV*l@-5=qQuqLUASBuYKxL6!o`AP@uuzUR` zOunw@>7ER8MWLBI8}115)4Gv12p&&cO^Lx1*yWpb>J3@T_9gfEBL7w>acn*3r9kh_ z*j!g%N@9E=DnNiLJIj~$>iJ3v09^dKO+(X1sNVY2b8yKu-ud}u3F(Qk341P0BvO$r z@8y1tieuPibv^%hsJfm9$QjIr4Ojr$v6b7SKVKd%cVIjmx|dTLDUdYkYq(BK7bI{s zUsUk9FdS$;HCF!pT_?`4Vqt)cW8o{(_9J9Vg|r}ei#zetB2H?(<@ zLZ8&PLrm(RpsKBm*cQ=HXo_ZKWiPytV9FyVT+;P5EzAFrQ>12E$ZM@Dx6?N^pw0E- zIbHnvtj1h}|L^gg0@1G(O>%``GmYabq2y~Jy{zvq{K)X11z5eYON0;E=Y`EDcsi+! zNIopnoJwU9auH$(wwlOKtcK(p5pLQZoSRrg+{wJ_W;;DNO~B#ze5CY}w%fzwnPgas zQoY>9yGBxiUx0qvmpgX8RJ+ZwfyV$wQ-q!ol9=VCMX;-%n7zAmX-j#2&}-R3&d1+D zNS;XtK6jiV9e#cqUGvyF`5eO2v1|xcH9HmZ6OTOrg}E8`dG2H2^wQ39g>~YZ0Jg5He}k=>LM+jJYlOxZPYjP((~+ zNZ<$yH-jp{hB=lc9jA~XehXUDgfntW8ADK;gNw`lbSa2QItHgEmHz-#B9M=rKj~hS z2kHaA0d~-QahBw~4xWf>>Eu0iHj94ql-O8+Cp$9Y{FanXtOQyREIQWgaxUJX-0B zpgbJ@(f0Y#3y{ejLB^{%6aDQQNbE+2{tCn*t?)52G<^9u81z17%FUL}*8fRPvSB^5 zo~e2Sh|@B4OGynqiV6Bm3=EgZ<5ghhgLP{^(Z=l_Zn51?9xy?v85va4mCoY=nkvIM zpGHYqCFA=Fs-6%QB(;QrT1A13x>1N2-P_+Kc#V#cj<3S@bl?`7baZu{Y)>Wc_JL!_0H>>%c6V8 z`$y|3X*Qc{MYUO9(Z-ru=45SY{0w?_X$~XKbE>5Z|4<5e8GA_IuDk<&t%Zh+7dM)h zuQGa`RiVR%$)jyzsp1X1K$pSV_{cVO*WHEcF!7oT)ej+%<@wHu1pK9%7?cLST|0Bj ziT4j%S_aCm&E1-Kgy57_H;0&i1f<(9Jg{_|<)kZOp0jv%W#otC^?{`&`%CaH_u>(F zQQ53FRPA0M(UNgLN&Js35lFuLw$c`4rRR5Z+@-6l8zT#9f>x!a%EiHgA7VWY+yjf% zOVdsIVuNrfCr!WRZ~x@99RFGCvVA(<-mMJUlFNlob^n4;LHr zR;@Nu6B5uxvK04cE9sv;O|x5S^6>D;kxPaibr9+ik&+_u_}J!4~C(*s@mPNT) z$se<Q=^CK0;uoSsAQMrE@ryj(-$TQ%47eA6PT= z_y0msc9irLH~DHNsKm_f6%%M=G-@e&Pa!w|I}`^`xy4K8bDTfNjcDs<74kawC84-; znWzm@$Fj0oX;O)#J{0}kU4z0Wiw#tMjZ4A=n{hBRQ=8&X*bwuq7_*bkgfDt{eQBP~ zG_n)*@$##oxqQwA7-2ENv}mKx9pbHmh`p}r=LJu1E#rSiuV^HYK*h-LNSz3%(jRvg zb=?tv47;Dh=qjRgYDP9{+zH8L%&Nr zR^~bbns+V0r@40jsTY99R@g3F^)WX)ySZ_LI7Jqg<>M4z`MiNwp&f?6Y;W)Elr~fK zRk-G_U%x<-GzA3(2)@36!%6s@e~M=)9S05xUH5l%n1CAwHa0eZiG@{ISQr`_3fbD- z&3KN<7}W!c3HSrp^CV#qGF6z5h(5(NM|+5leg(=}a&j^b8BdADST>VR-Mg3TIGmG# ziIsLfK0ej93!Vu0I>^c{(yny`^#{~8QbEs(!a_#a@bG(^I1n!lfNUzDprF7~044Zs zdY|o|uL>S8(O4oksD!%d{lI-uZ+^I|3(PHMU@rZHlF{8qZ)tA3zW6?UkS^8=(?%z` z{|BUP#R+gp4%fy*Vi!tN?X5hNnzd2qScpKgS25UZCd1RvC7A6$IT045_bl-+c-G7y zP3I*|%e*HAZuuUyn)eQ@7HX%?rHzxpg5)z{Zr`=(2Xv2m}qR(>2+R&)CK zuBLad)g4^nnHoeO<%iq?R-LC8+sw0SX~0**-4T_2x<1Nc_>ILaQ#wT|&Z`Nj@W!z6 znROjJ+8{1s#LzLQhlZ;8u@ zVs{m3RX2@M`#xKIY_Hz>fth6)LYEs2Nn4payYg5Ba5!PDc=y%bDhQ{b_8)}d+6>6< zs71CqLfahHHWqhl<*c>J|5M%B-AweK@_+`aL~c7SZthfeGdb{32kt-lT)E15y;IDl zaiF-wr2P58)?aS%2h}JxHWq}+&oXn^RCg|NPLsh8h+t2IS`u<-NGO-@es_ZP4Mj4@IWB4Xm;j^DNm zbq=-jvWZ+K#>Up*J_b5Lpwo1Ga+1$S49vc&Dlcb;fy%N;-CtI~^+dO)yZa~H{x7?* zQ;Ng6s@R=A*~F4gnQQWFOUZ1n@u}x(+3ociZ3szQ1JhTFZV(U21HbU;g zWmTFs6p%dyM6$k3YCldyY1W{TPvN1KHnf_EG>C8g!jNsXNuAAq7`)^1XkCg*zkS`w zR__YY_j2sCUB=~wJy5Bb3$3B+2ACpVMDX8q+}ZnHy?Iv67!e?P?PGCP$obxl(aZ$bwMydVHF7#glmAU~43U>)RPd0*1{y`Kf(ydQ&Sr?KBu^a{aG9+&nphj^Qpfz9sPiWObC1z|9r~Y&pT(DWcv0Zw zSm>n+UiULkS5iftot_3_lUo`aXI_7waGNhRl5lWv(A7<;udfF!!PC>zz+k2O>x(#0 z2+T*)LkXEx+2LT|V4%Mrn4X?y(5VCI#@oR8%y@YG7!{&Dr_pp#BgHdsNxXb+UX(Oq>G^ zdZ{DpuP^OY=vDmeqV0`X`tK|LPnCrkg*qPztCt=$CMDip>VU0a^+0gFdgtdZ69_7h z@v8ev@uKVI>Tiz=*KQu#%=4Qwt$T2A#8NLw6|*+o z&}~ebzK>Zmo0`|j*qlu-gjJgJo;&yT+I&%2H6>W#4&^zkevkw$eTPJffhXjMUerYh z6vQ{>KFX(iA_)l>Pg3h|=^u}N{n}ZZ189ngN5T31?J;NDYnMV!idipxQZFtgL$xW^XsI?aTdEShySzu(I;P|5l3`Ju~%x$a)K)xVolWcnCp)C&8T%T!T9# z1a}==g1fs02m~j%OK=<9g9i!jHn_v!?%b2-`QHEi?)^_u6g9=b%$(i3yH~HZb}n!Z zTr*>DX&Y?2?>!av&261?&H5I$;!*fN+w|W#ox0!?_J8Ix{mmyUtqZFrA~38%oy?bRU^ zNK?R-lam7i3Yecie`2Hbr1LtVprVGL5xgNI6ZX2Yjud}OK(M#JfA@zbYYqxM2AZY#-^r7Qo+ABA9y;D<;>;_*TVYyB@?8#U@%m2T3T9i@(w7}Yy77eH8nM`LZiaM zJRPLaIz+pn?#>;d@wzX{E3Htp|wKQ8?? z$fd0@|0_;ZADPdaMd0F}mC`voX{7N@`c^hbm6G9g=x4TkL~2~>hyvAj<;NEI{YNy$ zv>FY}wup5jTI?}6l3V`%;H?H%ac>ZciKY!wDNDd_{^#HLoJ6Z8H`X>(dd_E*KXSZ? zz=c<&GpU=qtVgiOiZv+b)rWu^2WjPDN?L7BTjP95L3i`niNFP6{_FRN1fDm}#Ve@` z0#c@~Tt=(5X1X6FTvQ~y;O^hkfe}RLJYy+BB3r{fJUSifI%bBys^y!QFj!fZb=QG)X)jn-euLi_)`Q1;*c7qU6)bv|0W?TG|F z^yxMp01b5F%z*s5=j#4Z;nlJwy2?00H?^Y(4&m8X)D()iY3(&OV}gAKGmy%hKkte$%Cp-vMvrFcY9 zv(N5WPC|vKsObGA3|uG0h^yc1T+9Uy{pSw(f6I?7$}38xVT8_c^$+@-K?DgM|qZZeIEa(!XqYbf=&{RPf`K7xPN4lQ*eADS&6 zN@F{H3$l!=iEd${Y)xbFG)jnI490lM3(=g<5PrVN-Q{d%r+1jU z-NeMJzf^)F=i~kl7eFvBWh;?dOpKnd!E&LzAJ?IQG*sWI3zzH~wqDH4kS;VB;DLTvwRJ1;UPv0h;*6A>VVFbc(cXudm12ToyX@z8nY zzolJwuniJRWmN)+JolDYPF=x2BGm~y(CTQMQz>#;9oY0FifwJq-mjqjO&9G_B`bFG zKaQ+PK&AuH|Fl!&dM$s>;?RQfJy?E5}3y!(taEog7fhdq`E}Wp z?ettiurE=NHD}mvN^Rkk?9LXf@L^ReNOmXVAcHrquKJDad%!A|Q`+$OVI|?^0HTGI zTX7#1tm=+_$2z9J{2TQ1`*z`>E-n(jx_BuS z{Us~SW(V_5F5_}IwrQi9jXlwaEG8byXHx{^f4gFBJyBMdP$vk_#d$fttj{gb=%)?+ z*1Z;m&t5+2A8liOxaFw1y-~a3CSXIgnLkxvD};_N6CR%Ln4fB8Y-wThI{`Njg$gG} zC&6Tr%M~k0dW)5#zLE{6NJe=gY*~6>^1btLC|5|IZ=+i(bgwArMUzKOuWkmpU)}`Z zI^>4=aBhg%ei|a)sQ(6+O?+6`aw&!hjW4xCI~lx`ce@IQ?!L;)Tg~ctmI0mB9sX{s zkN(PQA!}o~F(E5o!x`N@1H=L9`F4_=61k8f9G3;)G#CmhzGLO#s1l7VknDk=X(nyI zslni_{T68DD)&X&WS_+F_b$k{MM#nRP|fviSs~8sRI|s%O!-hs{Fr`&p!!$1>?&s% zU-1tdR24Fx6a0z~j~+e5c@nxjDdn5mb>?<~>B;VElUm~s>v@}>qZC4VM;>aYQc>u^ z3Pzio{P`B}%xGFEFJ!x;5i$09J2#3@`>NGM&YrBs1w*bDSR>uml|qfemjmU*k`#gj z7@vI@b|w;5$A^U;zTBY=dYz^swvlmu=)d9LHKyyBn$v2HXU{nmMZ6!%63iVh-z zj)nRk`Zb9A2dE)&`z(y|K!iR1CYM_+8c2F6z{C1T(U1az;NQ;st&INQ6rn&B4+EH0Kj_Bi#yU6Am2ukW05RcKO)HJ(9_}=Xn6@hX52-#AXJSJkd`FGURS2MW8 zi1gohxY-%#Wb(?YMXSEMaMV<-vlEJ0bjC$Ym30xK9<&h%$bGq+iS{F^&Avv0=f6p2 zJNwnu=<}6^TMVW+F(=}BInZn|P^Djg$g!-88P=8jyIY&(so09jBE{=m@2w9SNTj<9 zvc$s=icVRI>kCcJ`Gc`;Glk?{E2tr|y5 z?vYm=Mw;d(pww?_rJ+9B#i~EchIZlo^cEi#HB{TwrJZI+jJ!BDt3evbzl8Vq&{-R5 zSh9u^7dUN?^65~eu+?Aju@3|^?*7D-aBr|6jKA~OPnuNS1gDcY_{^rF zzIqeITt2v zro|dGI`gqL+wtWiE(u}-2$Z04_WtNM0B1WsLOTPz+^QpvA5Tt_( z?W~RSqP^)r;#32JvZblcrf4=Kj4!|Z*wh`p6YMcOXQTW?D!S5{M6dGD_s5+yBiL_- zXLkVhGCOBy23Di=Vy|v8C*Mwx*4o!(nZ~b%-lTj`80I`M(dTxo7YS+T+NtYdl4*LK z@=pE6_Z|Z989na**GWJz&*V1ge@kKy1G&L{5%FZZFd;&XA|x=R%9chBNVv&Ehp z%q>HxS%N+R1VjT)Xc!ze&K5XovDqd`^SoWV%6!1ca6pDeUniL+DG83p#k%3SvWiLD z4f+1Q0ue*L6%h>pn)SCgI6(Pts&uXrl}k4DBPoZa!v{+SF)qVO2l~;|eP~apsj8EDjn(e^VTEyLMiBr#lp+uqr(JoDZ9Rn@2+_*{qLs`F4~ z(k{!>u;r;66{e*>_9y~$w@+~zCrlZHsWl#SRaFRQJK6+lQXfsF5;AzHN#&yky1HT; z&WO=JNeUW}t4tq`(*m5x3X%V@e)zlVJs^D;x>xoK0PgcReOw~5{>8i-%vH@ayH7rK zvdU_-qMZ<|L#3dTyLf;mW2Jp)2Lb{!7c5(2Wp7Xn# z8$G>SO#7<8#j#DqIj4BbyB2&)5cC(9)7mi=o+-%spBWc0KA4+JK7Zd8|AP0gY^$V% zj-=zP%x2hQAmCtKD=CUhHP+T|ec^Vi;Xk`w14o`DJ3fM!aa<$4VME`umvTnYA#1!1nU(I2$AB**wXBr# zR8|_iZ+on+LzQ&KquEFbbfr&8KNj2fi&w;mQ{w)$wyU#M3x67B4ZMW2OM|f(9ru;p zN;9S2Z}ZW9$V$4yV+*&%uCZNNA-!Z+bG6T`-40;%Vy8MqN$xH-s4Y{lXnQ@}m|3nP zJ3m3gjmenyCVpg#gNWPYcg&2OzyXsSezBta^}&-x8G zlO=v{kGhwLM)4z3g$rJT@vBd3vOZ;*w=k6P`LUTq`4v*5N6fDeeXoRvx#~t0`g2pf z{4Z?$cTEGzMhwlA3ur}QnNEY%_}OsR#ni}>Crb){TVqkSdSH<%J#)i@**#kKtdCdm6(|IF8lql$5r^ zdX1)g51}4SoJmV+#qm1KcpyOZZ@)V!{STrS^b5W0>ko>&9u%WKYMTSO8?;j8?`)GQ z6ViH#WSbfbn)V6b5U4KP>t@ww4xxgBAt?p=e02=fzY8`%&nbht5qNU<6nY3C6Whn& zYx_~;7JDJiQvaQ)U(Y9_GcBlX#11@S2-*E!Pi=OETtls{uCo~eX-7}zX87kyw~fS{ z8;c%$RXDnwr%4zG3b;;!_dY!PNp<3H&1{;KQ2HYt+q7zKjcO!jqswFz-CaqV_f_C= z>pXH0|Iudc6mE?147v|>6H_D7zZ>)D+G z2bAkcTx#e~i2Ix4GCB))h4q`>AdBxqkl%J6i62gJXXVd>%`-+vsma@R+syq8FQ(n- zx0-}$+mF5SE7p<48v&X2R_RWcZpiSOU3To?%<;H}JMYUv43@nkE)(cz_=4rnZf0xY zjK1WS`dfqk{sIZnZVuVYLSVXb*|A3nA$u~}Hv9%|Pto^lzDdg)NA1E!^WQ&)L$8oy zk2~xONdC}m3bDp_Sd$d-%t{ID!w`O5FIMz*Xv%Kp+2(%kX8h6#s({CaK_Y>s^{Y>r z_vAk24GE1n@-1fW|6>qzme_A>vf}%A8^+1+&4167!Vk&ID`L*gf8SGQvvQaZXZ}s6 zq_9xmW$>`F6gV)?c5|uB1u75q)yat&EVDi!YW8Y4-Ua;XiGZZKjf1mUYrb^4p#OD& zd04xC)?AgqqN41GF=-H}P0#s@Qh=vo@uQp3be5odiOniPtq8 z9Ujv)XM2bZM#m4}&CH^Sm&{;csPFzGXo5)YkfQxryn_C0d(68n!U9uP-;)j5I?O}iZ<_v%OXkgnx^YqM~V+)$ECq?N3AUl$3iuV)6B@= z)Qer$*x;%ULNvd5XZs<290LgkZX!UfaEYeobg_@oBtE=Sg^l-QC?H zC8s@Xge;?(33dQYf9o1ueoDGp^z(V4_14_*7dWfj_lMUtajQoE1+l=$B!V>}8nFb^ z)7xMue?RWV%A=tDx?)=-(l+jP+e6U|1RaL{w))k9R+tv(G0$7tH(r9 zBkB>1r}NKJ4moi$-CY5{)_;0~0IcC2gZSw^gQ)(BA~}|^kUW^ym;(Hqem&e<)X!hd zM&}sON+p_Z;^2z74bA{#oQP&-57Gh8?ru zLLu2H>dbR0bLhnQpAjBG-3M74@Jz?@LJ;NX$PdNGomn-@seUyYQ zDa$z?=k*V$v+*Da#3TYTJ!`XVU0V z_zuyd%a%Xv!Zu3-ij-v*b+0I%|9_5jPYLTAq>TDJ7E?hi!h}L!yX|YLCmL+E3eZK4 zZ#<^K;WG!qr0UA)ZkruZLw$Rok*ufO=hYwWwznUlf;XdY$s&^6iOa(I@6- z?-_1%Vja}8nm~Cn0WZf2X7*P%do5XRnZ&%VcR4=VF6j>?`IFQN_nsT|l!%DEp|6yr z^QCjg55dF#77sBC2HA#i9)@}*t!A21IYZ+;WBs5nn$v=pW!7?Y6J-LYEtzNS$qN&` z8vZ}S7CJyLCl<@z6vQQDfD~W+Z=qO_ejDnb+$?ofg=;!fWW#0anOMk+huoLXU98tga*3WcM zf(FDXP32RjhWom^vcDj4iFV9Z5E7V@5B%x(`BRCTA^h-VA%sMWUwc>0R<`tR=p84h zUlnc2pHT4++^Wf+?UKflcq_lcUsnlV$WeJ+S(~?}s`%}EFL-#FDY%3VG{)+X_zJ$d zXvQSfxLJ05ecTl_69+F1yc_&oRDel3Wgjy2+2+uiPDEb3TDjrTN=}b)M%8#`8U^GH z`N{>--4G;Cf=9z9>{mk(X$eDI73n>TlxNmEem>a3cMLIYEoEFmm4v+s0 z=xgx5!aeo2NDp_r->dZrcDe74KZ`|qRe_})AHXWYzP3h|(p{^hk@PF3c{FN1r7o8h z?BHiUl?{$e=)0sOxinVA`@4G+cKYjLJ~a@qiIsUXy$-Lp)@e4`P(FnEuZ(P&nzg}t z{*_j33QOjzT7eSwEp~q>m2!Zh&t(A?c(R5zfoP(h;uXZ~frZB1G6*YVdEH$7iX4M0g2T01X@L&bMOO)q@H&7 z9`lDo5DpheQz+EUvCEEsT~adX?NWmK3070^D^fRZPaF8nJo7_H^xep>sy-JhYTb{o*JB&f z!nqyjq89`9bT+tKzMm-3#<#GoW=@!0*}=qU|6gcDkgT%vaLtStJMz!QD&49PQBco8Mf_KX#<31ZX#_1s1C)^T#1|xe z*t{38Wu2I6t2;~ywp#5lt?RT=4|ra^V&AwYvS?WEulJl~H#?or-`)?n;0Mt}g(8QA zB6rE>&TP~ZCyx!b?;V{WF{=1fG&3Ssli^t!M5PzqkJkxq?H|VwI-Gy|hQi@?x=LBq z7ADdzt9t||IK5Hk-K(L`{L8OqmuVyg>&)(>%j$nvqW@E))!$;KRob|%7KdZ{3EGbr z<{yWNWhF_ucu%qZ%WTTS{jVskSY8xW#z&o=PEGBoJHW`&rm%vJj`ru>D4!v@3`@UR z(+Fgwqy!o%O?gDc5CO&5{i2*1z3oOe5y2Af<*x9`9N(w2ricF4C*(dgu^pH8-BIsm zO(fz)trEOJ&hgYEJa}Z1a=FEYVWqO>xsT~aO?;K>q;@sn>%n%Qmt(i9ZcB<)gG}1h zp?RT<)LP*jHN59S64^qHmm3~6%at!1$o>+P{f?bs_E=!1q8Ji*c3CPO2#jlzzCIqT z{H9`M;N{!Tt(PaO_d1Qf@3rDA$kN;;B1lth+meclbIMu?$^9W?98uMT{@jE}f*bTz zN0mAxC3L)IPhc_~MiGrY;zQ&7!7DX;_Qd+&P);%K?*~sH1JMmv#dk@+3589z#Berz znAnPk{lvHiDG+$w%Co`Pzo^Ot7CHsDHRYbCrBeXlUN}I!{I@wp_J}(;I8dcm`}h$M zY5&u&B70QW=+&I)DgY|c{T~C3w-*e#x+BlE-fFgg@oV70fbs86Z09yh89)*3gepB9 z%yq>TY&o90NS`HQ+=N@d?X4cGEpnOVQqb%HxIBUbaUpS!35rFDsNGRLcp@b zmGNLMbQhx1XSZPUwDV7&*_yGbHn5o*{M5))yAej`)!>1k=MzKKVDESZIINkB97Xpc zmGTAyPT_)%JN*^D`kRhNIbAj95|0s7b^MgyCY(0!!bCA0lElk+FGb)H8_CWj+sseS zKwKxsjJ-`#!q3#d1W5Wge7z47?H6vw+Zow-2y~0gpq=dv;kC%>$9MZ5yXS7oQHiPB zJva1pQjzP2ACPMCjI&ztweufp+TOjRml7%(ENh~Sc>0sbPH%V++5c|0mK=>CEK(-X zjri_1qO4~8mR?RuT=jcp(3)i|hRt*9Xm4@2E1!M~ek|?RZ*dB!D^c4ND(!O)4R^e= z?jc1(X?3Tr*dRv*B9YNHhf!BV;$R$;zQs3*0o~gfOXq$*+$@CGetqdIh<%%M=ox%h zM;gEU4gy%wRl4bBKLU}7SRK(VoUvc)v774T>W3g!5Ps`Qdc^d=SL*ltWd>p`p7a5k z;@RZCo8nn7z%M}~6Iz^^aRnH$U#c`(#upa&goU4*mH;DB6H7}Jas~z%z(oNVOl`i8 z&7YN9$oeoy1#b4IOO=aN@72k%E{>M&M}?ou2I&9__1(>lPOE2KluS-xA-DZ%2S6og z7hln9^%QhJS#1(49hjfjq{VL2WB7Lycy=xO8krjxmk?S-3eJrUmC1aczb%thPsrQY z%%e#Fb5LV*Fx2uC&n$B_Io8Wcb*Lh&%vOk&w^gT(;Gm&dn>d`*-XIHy!@lHi^|dYy zd)z$x89{UX)aYyJSC7qjF(3NO2+JgCHa0NlLIR=s+$OiHUoy8e{hZ}AM8~W(jV${W z;&QM&3SAoRQjNwjd%rA9?0x1Qh2lbGHRZK6uE)H1z_7uU;oX6hJ>x`#!!yisKlc#& z`XH~EXnR5M!Soj5q`*+|hYxMJvZcmyWgBo3!0x6w);hXfOpbOd(<4&s4OnqJKP&ce z2W7{ERy&bC3VU6X%>KUZgM09r|H}Fzs5Jylsq^a#dghmIrGP=S>wH#jdEJ6;`Sg)t z;lj+cVYwCq<7k85Qj7JEZ2jL+wL$Gv29CYtua z7ttB5j9hHVqsm<148FGW!FN>y6Uq~utWS}rr&TW)6dQKZ74Ft=@)w}KFF)8kltl|; zHo*FIxcQ3v?YA^e-ij-v@RJL41WT1b=^5A?NaXmbIYH)k5%;UtYkEneSjx-e?p3Rs z%^UM3y-#|rYX9K^K$kc^AwRms*icagWksijr$zG{D*n+P6myYVD(ufc)jcE-_s|Z1 zifTArF=APK=$Xy5Yde`spPhR-?fKKt&4L*Lw0NOb;m4>e;C42J@QP8*>hjiw4Dwho zB?1o$h~28tz7X8V*rUW_l8)g7XUXZchF;%o7cw{esnDqTf^#|ZFdwHEF~hm}zFhP8 z=}4O*NoFs8_h6r9CGByc?558YqmE*#oTRHOe#Lbwjss+1#Ri=6%v`lu_dCU|kCwj1 zt*xDM8+yA4ougE{ORY;S9u;o=9Z6j}qH*2b(Ej}uV3y;Omgu*BadkdP!-y{>$si-| zec{RwnUr`}EJ^J7qI^8Yg%@LEBM_e39_U2l( zYePqRTu<$4)(X~O?E3sVTd5q$PlM!aAz+-vn=Ku|dHP2*b!)8Qdq(1zk&IH`3mslT zhdm;N%2y?qtFeZ#^GJ{Z7{DPX67jZ&2!&jf1d^0%r@&i>i@qhlV{KRN(0*XLTTGvQ z*v8y_#Dg4TisbIc>XWv#%^`yx2IP*JZBy8fZcEJ=A@N4P^|@7!a$3s<6SP9f5Eu>KplBd~btE{`Uc3zKu9*p!4kFRbx8CVk&WRG2l936C zqO^CdrCJC_DS`I4K#Qm(yP!sE^=7#U)$5e`RF~3_B(LY!U+a#2TA5Q0P((uN3v7q% zsCJ!+e6vnl1JkoF2K9xtR-_lzKw6V5ryKek2QYd78!*v=^i@7l95wIPt@zY^^i1+H zMiKEEr@L))83c3TLoh_#f}`|qcku7wtyc8r4Ip1yR`W-3=L?t~49pPI4)AX0%;Cg# zZWO-1GMq7a)*TmrQymdsd+wyu&93{GApFzDV#CGT4B+zry$srEC{a#rh#Ki9i^8c3|Qq2_yD z&sq2^X({+|pDU)Ub3s1F&9qH6j1nD6QEq|}vNN`Y+7BW{tPG{pQ;X!Rw)j|owQD~$ zpGpi|=)#$QRJseTu>@te+yJ`eE;RmusZ2WtZ}> zJ^C$A?;g}* zyAQ`os;cn-aoxfsNAc!nBl>u$W!9VngNO?a1n^HSI2NXgRe-;ol$@-grnYr>IK1UJ z{KyT+F|rB^X(Gj~tgVOs{K=smfI|6!S1Vs=#gSTD%MsYwvE`VXo6AcarNeB-3c0Ii z#zMZkK32eb87a>1bLU*4*J^EJ1H4=!LPFq0QNDWzD10;-)kQ_Sd&2SLRIJjZUHCFu zTOYrn;>mW87_*(9p93%Z1w1^QNa^&l&&a<@wH&0-7X^WZS%TAAx;l%H)=rce+qNiY z6C)|#KtfvccqRDm^k2G!P{QNqxJ47uWiQSJ70-uxeP0E3o8d;$)orU4rZ?FZO*gu( z5I9hOJ9koBiU7N_xN${fAN`SKOIiKI=IjVgo@?qEf1b*pMUv6qBczjFu%maEg2~8I zOydht_-{s_8D1`V37Wd<%fO*i0{scg_BY(yCP z+H{_;*RSn`AP4O=yTTD^3lP%Pi2GB<7G;)RdV{4)IGq2CQd(01F6dX%b@U-s`JC&f z{e0alhfAxdD6tlcj~f0-@2Ze!KdJ;{^RCouuJQBWCZ%aFy=C|6FpQYg$*27@GjRoJ z$wQ%pOJM~Uv3x80&r?U$X!Xo_eq;^J6Kq*RBOub-2a;PX`Zk(FOGY1ymZbWPTK zL8lHrGw*R&JsCNpNCiJclk z=iG;%dAUAA%tEWt9qg=&ykAo5Cj>DLKRT;pq5Ds$2_KA*2a&R(iTrDX6ol1`BpI77 zR67bWnHQ`EOjDOJQtmatE{A?q^&J;*fdT1{NzMZ zUVg^1E)Ws@?E0F8jxO~3cXL}?W_o%+SM-)fo{^cEtHRpI=q)L!@5$=N40${p91|`4 z*RRLt=RLNE2)NP&0rC3GjC$zj_0`p}@o_^VBhlP(bB@%wI2AyDBo$jQUjZnVdbb>D z(xmCBspJ+j8yi87v&|#Q zNx)wMAOqjsdGck*#~RDa%L6*3(~Um4d_b=WDBD1+787|B6B9@I1=;7EUESSradDTP zf*H%*vX1|5P=c1Ui6RE0ir-71bt2>V90MXQGNQ^HL|**Nv{nn#Aka&Elk!8tuQV%} zleBH^ugqglq6h`MWmuOy2aOBJ?jgBN7b zmzL|*a5wk9Nbof_h`ZF{`V9N8)ir6J3m-?$q0vO8M1o3h}VTQ+~6(1LuJ8h}0)l7OX>jQ4tYS<{WBjvwC1#3yX2f zIuQ8}Nnp2!6CuaK#>IW%|0!n|xZ0`%0|PY}XZD;6YyqJ$oPT&YAVWv_>($%aD-H&O zt27SQ*Sn0_`1$yd1F(=^0ja&iUYpWpo`ets|In$ z6+3KNqrsS89pX_xFgVy3MWU^xrKPQXuc$!qFNkPM)KgdgLai`r=k>zMbg+~nVn*3V z4PAz5c#z`Dq&`GtYVMGXS9LnC4D5Ok8r$K_IcX+V(!{1rr)U|&K#Cm>Z+PI$IZC`p z2@K}uCqL4})lW3DxaeHn?VAyABF(h5{yM!{uo~pg;xxb7gsb;rk%lnS4MS8FULA18 z$jTJJ`BMmVMB9}O($})yE|AEr=x7xY3wKTm$1u|G-kwqQI3K*-vZa+}f|bqU7WaPs zcK3k@l4f{^D5;MJW{3&W>`S4fE*-3By@K<+Uwr1O`@+~iX9Z?&c7V?YBm~?`z{oLt7I|5OMxr4{`I5i zjXH=gY>(#-x>FEhC3Bs{Ly_mv)7M+>KL1)sw|y|6^#w>-H&y-i2G!@Wj2RKr1tP;7 zBQD?crv#iqftVRm%8)NKz36(|LfzWtQ_<2!NDa(57p7%N z6#B942r}d?gMoxFG=}iXXJ18_w+tn9#)p?XuY$kGQJUSNp~TJvwp)kGWr$# z;Jw$SA=RAz=_%eFWwSEQ>&V0uc@)U~rxKtTW&JgWW zPhXeenpVDSbXccJfZ2dFo3pp?bOvK-?^XyzerzxR5&lp2GS-?i3j_H0K#8Nl$irF?G z>n+6YIy}g$fX1HsX1BSE0P$R16){HKYuIe`;8rTN(bdVx>iI1%FE6+CUkEStnj{y&ioZb&=;rN=M5MS{$Ssa9;aR^_pY;^t@@n%V{kOq;|Dd zO#{Y^LKn)?@lyM7YOR*g7VYMTDQOD_+9?9jwC*189%ktJ9azV`=pOl_hKc(!`!edQ z&!-3%VQ^8WVy2oE{v^M^req}59}A>R60tfN$jkLt(oAY_Cr$M)YcExGn_-1pD|A!3 z`rM-h>g?@0IB(f6B>kpB(eCp^gQc8Fh!sq4A(=;&J!F6 zmsLM&Kp+te5?-!Yx`oQ^dop8?$We*XbDhBuyMwP^R^@vsG*IeAKsERJyS~^?y{w|Jw5D-M#@UD5?hypup1Z?#;jjnVN04QcbRM+ zhf|-8SSP2wOTh1#9f^|v+P1^4h$rNI_g-{n;(|KEMX_N^ygs~!0U`Fvl>`oi9cZ{? zGMEP+lWlpVSlX=al>D%`KRBxJj_Aydc`k*{v)|*dL}hpUbFNm+?8u+CEk1So3m9E1 zsjI>>jHh|Z#;#!55$B;6aP0thf&c*KpLLz%6-fSFv&IwSl1 zYn3O;7pA@F+uGU!o*){6A?SIrTi!Ceu&}?irKYUhwPp|a=d|F}H8e`XDt3IST3lR|mX^jw`4$<89^NxBJWPfB4(Ky_zUZ=VTwbQ8roMRb0x0}w z!_ENFWNE2U%{*{WNa^&Omuvg`mZ_mlb#>E#+evLLP!aO+@HpFb)()DOm;lx%KxQZa z0euQ9J{q71uBn+c=WuM&@$&L=!ci#}&#kJj=VAih=7(=1^Vaps#Xysj!fN;;X%GR# z&))*%Cjha?ke{8Ox9~l!R48h3=??AvN6J1XOc~@) znKi`M?!25W#r#KDN3Fw}ftfCYaG3?@N+v!#ir({y8cADRlIVF)S)hZ^f)m81n?wWQ zUpXvz(1BDkbk!te(W2_*=oK$D#`{g-Kr!I9pf?7fHyvT!)dnrWd}8366gV_oQSidp z=U~3(N;pYMC}kNps`MH2^n_gL6d8l1XXo_Yo%l$WTUdsfME=peM(dx%Jm#5z>sYC=RdoEXXvl4D_M9$JOGron6qWEu7!TWNcEOlrq<{}fH>NNUA}L@ipe%PgTI~1= z-=qWSG$<-B2LhwDjWb_4B_VI!>e8#kF2PbeMt8LrPY53U-N(j$ZH$kdlT)MP&kuP)rH% zA}AmzkgGD7yqG`?%(*un;-BlT;kx_<+q z9Ue{`MFcut(&Sx}Cccn!dQxA#9iSuXKaSi%?yoo%pWs|wdq^0dKnW_ke_niBr-~_J zRxNj2Ho$fGyjFDC??w~ktS`mDcj8;G2p_LLtHW6Gb_puW)l(U&Bj^=o*S!Y5-sI9v zqg5l7Ty|f@&@QlIn2fHA#3#YIa+12=iroxP!1^cv)SRMlC+ILBqE~w zjJtW`f82hT8f^r7H1Q>i{!v9wL(l%1%;hQ{-`u#gh~#PH)$$rh9u1uJ_z-RL9b;5+ zrP34W8O=pnq~P*6)K`fJ*`3*cv?^OL zl$n!z!J+$4!O#U0CeX9tvf0fSVh^A11=HvlvBr^DX~ziAa4(WRaIIDKM@@O)83)6n z8#}WRfO8=^KyQ*`C-ScO*}FEjg|-%asC+~{Y-9IePI}S>R~y3)>{)|aryey3ME_=f zWQq$re>n61S^A%@hX9WjPzNAXU-^3pGclC|?PPYgKl%5g$OO+Z~*B8(4vOp)G#1q3h*rJ?f{pzf>|JV3SQ+65BzgAvy8(o z3l*~3OpzuhCWb1a$%K8wpNP;9Rm?!-+}zixF>d1Gy_`&?swF_Ty=bpmQIVPT_p>=2#Ed_&w$Jid71m+c~2XJkW+&{_SHtr|>WGUUwMma`~`Wu)bh5cLsr46JHvz zYQCg(IddfeiJ46hi0s#1yWN?A3TowTLG`(KNFwkZwxhImJFK7dn)yKGKhzGGMq58} z#J}RsOrAHWy(kbuX>46~fBN<#_~4qGDx>|HUn9*~io?iWRwvz97sS^1{(_|A=gnj0^6coZ2z#`>;ZY%my}guG5-qxq z8mGJ<-H#?=s|%WN9Dj_tJ%S>dM6oA79KrC+t}OpNoge4D3stpX<-5*mv9t#lQ>Ey? zUCIdl?e&RNxbcUhazQZ&k}-N`YKB~{%gi3&Z`U2PeKj@V3L1zqvXUdaBEipx;2EG;Kl{E8kMMpN_=J3>4e3i5t;jz7%&MTwZCW}r0w&P^R+3q0@c+d? ztAK=0!sjeW9jAn?rm0yxyHXL}((tWsqj=gf_C0`60=T1xxbeTMZ%i0Lj&e4@ztb4r znTnW>FMOF(n-NRA?i1>j4iSbVM}aaVlG9&3zwKO3E4Z{tRlN|lLj)UK(kW?cmH2^` zZ2`7`(}=L0*R`Ss?Or6hq`xRw?P)?SfU5akKeGHulr{E-YT#&^g^9}%;HkP_=8K6N zsx?`g+xwA?MuBLIrq|F0x9QuWe(b$@V~c+FbAKRHdrTte!f|T);dC~19WunWMaFsk z&HqH>%-RtX&RpE4RRh82*RRI0FNC@iOY`Hw%2^djgdotxdr+WZ6sF3RH3OoFXHd)t zgSS0ds`*B+{)`(5{_tv>Eby>s;rj`$0NB1c>L*R`U{sXgR@HEx4O=!ZvbQ8HjPMKC zjfhnenrdex85*_S)t(9m5rnsM2xi4b|MKDW(ee7{3hX!#$T&HKA>6XW`oo1Pil?SZ z2CjIMCzQxa%K{`)o$j+lC#sp-SSSpy$U#H{m+@BM@bGcl{`oR!||%%2&~1Y-xB9 zmbgcuD*DVwlnw}lVqPyFMJ!FXH??>iHD26YHYJc3{kkU0H`ULKUlM@XE~(MUN+SXb9m+#EzM!eSE#w=y^x_!5MY>?0mB zr?+er)@cmdMb{O~^C#k^T0C{6%dugmQ~s#f-ap9X6?J$h-oO#5ou$HH;wcv4!rDqz z?voqB_t8ksrJKZbprxgU;M3%ce*e+S_ODfsE(N+6WMoHqcFeNJyH)W3Sd^m7IcPVw zA)sPaRu$NF=;C5V?-g-0S&ME#_EdG+YLoPQ&tDTJ-_H>BtNk!qupFf5Mc$%iLv*>0 z1s*KGp1sC+<(fCuwOxNNxbhbvnu`27<2B=@Mx*a}WtvqLku#`X-spUCKVSr-_alYE z46m=(vi;e2C%d}a+wmN|u3tR`7W2H4xlW=fj0e8Rgw7_B*M*jI@>k`3VfMEy;YLT< zkRMnPPxb33qv5SjrMDlYj@^6(G@mTiNrZkQQe4n>5M=-B_5=)jX}$v}j^qWfME>p^ zQ}AAGMumBghS%$Q03HLlQ@8_zy5~1RO3RC5+Xu%u_@C@7z_IEtz+Z9b80?f>%d@RZ zj-8PmF^>k#kH#Ecj>zlycqyIxj0(ep=JNciTb{293m=+0-1L1lTV~qa`V|$%5{VeU z8#yvt3$mMResA&<+VfML`lxa3!8j4UFS(#;)M=Bp;G2ur_STZ7oc7BFK|}5FR!{ZW zq1j+6#8}q+iQ+XS@n$tP=ugIB_L05oxJ)bJ0#OCuyKI&r$TTJyE==z2qx!^hdAD(U zf1fxiEnlC$2;8LI@7Npr*cg+%yo`45B*t#ehi^(z&f0}S6hB^HeRjDUwSu7oV(0Nr zw_T@JV!BH79Z(>0YDWL={|x3b_vYMUK&j!B7o62S{-2CS^#odXW@^P1-OiPsSm>%cyg(kDS z9ICpTbSMTHgt%s8)NTy5hILm|7B>i#FTJGKiuxq{s{E^3boR&HhCXZ0hb~zl=;Hmb z)0O7;Jew<9R(Wc$|Iyu}oU1)$l(X18DdiPEJNn7z@F_scxq9p&umF|+KU@Huj2DpM z3yUg@^65X{#1LPA=5Q?9ZBab#6X@&?u12M;Vgt%oc3GKerybv9Gvs{1;WA#7VasN3 zk{@Xmzi_4O326YYw~ioFIHO{NSJ(FT%)5;yBM~EY7F0DS3-;+>P|wcKLe&hqP2a@p zm6}`NPS?|(&hG0f)kw5)x*5iw_0NzLrb#NI5Rth?k3~xT6t?lEX|r7VKa{-%P+Z&A zHM(&NfdqF55Zv7@KyVEN55e8tJvhPLH9+ImXmEGe;O_2!pL^~-_p7h|SMR;5u3}TW zsvCN(z2=-_j5%iIfaK{&o&on_dPb=(|A=9;?AL4i>8@3QK=cRou74_qzrmra&* z!A6~j2EoxJgw`^qT71&pOYOU*q9}#zJLd8kPnLc!`|9wY#;#2QKzYUAAJHV=X4K^G zNh+&* z!el*Xrzy$dGGTUIp@myNt`PbbcCc(LRK0@_D67h`JZb8V91&DvX^Ho_+7wxiPVvNe?GSPkK zX)n152Ja2&B9rCUSjI`05qC)$A@H?HNJ`B^lomS0sO;K9({=6XCOEA2+J|XLzwU#e z`ANJVyqni-N;wJorjImx!!RTN{pJ$0h22%MxPEYNSo~O2vlsKgQuTVlewqT~bC5Q# zx2)*Ko3{mKWpa;ae=>D$|NN8|$Rry+xJ*lAE@TQPy3$5hSJ5)}s zzaNj)`_a(XCyFXR4;hSjaA45G zd~9`a@JEh;t#ciIPto#h=JsvB<~bc1I5&E}g3y;(;g05aN_*u7?eq zERgse1H&;Vr#xCS&MR|O*CtN;%fP`cHYS#~=e$=FS8nbH7?|hRCx@}F6GU*kE9VU| zk2k#7D+MOA05*TS<xu=*v=+D%pQ@GB& zBxM$_%s&SBKY}wPiQv+U6`0Cy<{{S1BuUC>%@D`+F%w_T)5b@-kOo=mN6+Oa)dO$J zE6n8ql9OlJt+SdjuPnD_r^rCkm$c!zCZQ*r_1oN~yMpUevi;}LsMD;&r|Mfi$&=q1 zA&5E6E(lq9w($SdJAdb_N4NIU z#-po!2lv)C^$OqfO-Y}}WBAKd6dXa9i>>{O3tHX`PES`^SJxkPbrA0=S$RQS74oa| z^K^Ea-;0aV6cl*;-Kfre|g}@+Nk6ak2z6q+(wO4nc@H2vLAnmqPn%_ba6HPUK~D zMx24$1`~pE`!Ix2qR+Eo<{mgRD6+#xvMR1P6tnYYKnWkbNy1UqAsIZi5xgUj+ju-V zF&cXu2Y|gE5`n4{b%)>#=+T%U9+F@e0lY3Rk!=Nu6HL(Kq zVN5+XvpC7|l%%O!$(FP9ut{lt8+HhP4=)buhDg|`Y9c1zJ6hqx7hln$oskbGcs{lM zN+Xo|8ghMwlZlJ@XJDdkmPGe__CcjOZLYu>L%oJTddNV*)J(cR-^O3c#@T$moP z)_SuP1Y((Lj7VnjxEX(GZH1NRB(-3a6KFh-69^(oe1iz!;_3bzYk7W3CR>Fj5Y~u1MHf54*De;8>n})b6eKD(6fv5(@uq+4U6%Vhrg8DZEOSzGTTwa)&Ek>NppW?)xiKsQ4c1{ny?@Cl>uW3?e zJw{uosKkTe8+qr^X@Bl@`mN2qCa)B-HIU*pz05y8kG8U%K6SY4MueT!7sSh%KqfXP zQRJ`Xg1Su>PSdiK3FS6XNe-7%n%0*L8-hwdx|fu?v^9N~37+)Nw6HSx$mD6|?)ZZs zTDf0SD@?`ZrSd@5p&=x(QtUf+`>Utt4+1$g2lwM$i~8_4Iw1p;`UC0Rjfz4F>cmUq zx+y;PE4>T|as>i!5rJ{h-QhGYi?61>Fgml^sbHJq!F*&Ha0EIS71#HlTHs$t4>6g^ zSUNNQ02aw><3q-M0j1k*rIB9;go1~d_GwhiL3ld~iilK}%_^%UXLWTH2#^{c4zY^G z0wDC_@v&8q=sQ3*HU7_^jusUdXUoezPVYHMYI z9}!0wI5-Y>XHpQJrD!Tew$Q8@ImrI@grZ5?+Q#O!8w11Od#-$zAPN8ijVcK8Y>($) zY;PyX%SS~E31(7AsX&rX0RXm`PVm_p4yMC*-vm;wyc%Q9Eh4LC@McPy`lu6p6v1<> zhSzs0pN=Qvpe?X(O^YjPNXrF@S?4eDx;EL?G95T*xR~pu3v4c7VIerbR(t$mt1*!$@w|FO5x8lpxwQ~#y-hdR_S=eVvI?w` z*^60+>2md`9&WPDdoE5&92`{kd8MqG>!-mmX?wQbE}LU>gawMdFbbouDu%c{D+{YK z))%Pk9#FC#YjoZ0J?e7$pvaIal5bz!eTu8D_GZ^gx2`$@X%wFCO0H{|e6rlQcvO(* zIT^j%h6#u>XTy3Asg&CV4)p1`Dw2)q_`xm)AXR3=<*g(&*UMGyGrJNfcS3Zjv+6aq z(OU~Co=TR|FX&uu3m=FvhS}8D+=ba&-c@*48~@UAJBS(^bo3J=(G!T15!tl5TDIPj zQ5ge0zSLd4y&V{1)VNusZPclYXLtzQSC zDcdd+&o+km2IZU(Z|N9t(aq+ov*w1;tfizL z3|#V*#;Ml#gDld&y`<+`yI_8X_ep(PV5eFSPRe@ybub}+On%kh`M;A)Q=Y68NO@vY zes|Z0wtBB8uCXp)2~n9@uxkPk%5kaA;&r2k>EYfD^6XidANU8r$U&Lf*q~K4ikHst zaGyb1BOz|PIH8}A0U{zD6)v+n-f}g_w2q+a?CgA(({EE3`dz{K`RO!tr7gyJvR zulqX?Eb_I4w26_JSWrZyw=OXBxV?Mq*c9p#M7 zqze3Io0D$f87Jm|J1QxI#z#uo{g5I1+up+I-5^>2iS@%+iS(3 zzvao_JFR+GHPZQ5+&vb1Zmx|*WNjo|wH9khQY%!42G zx7KSYv1WZWFVCcoSi8=ly{&NVYDTy9-d1E%C4>m!%{duz14y@8XBkU;Nz_D-gPmXg z-UOqqe;Vj`8~SH&Be__xHXvf3OLGEE_UX-e;glHz*g9Bl2((2 z>k{i{ymsZg+#o~w`diUdw>Jp3xlWVpOA!euXnV0qbZ+|Cyf#h0!iTEnT8*Xe3h{_9 z-yb_rFxWmsND)txXEC|)e36HlZ&7Kv?%2Sq+3<0Ftvs5A5q(!ro|U{3-oNlP+amCt z7j8+O*7@aJwwMR-I)qY+_SVlWV!zDO&`Xbp?x+w43#<)SH)o7aZIz@6KGSH43u~Mc zM72iKyStm8uFc^PKna6Sdv-ZP<8>4~7N0DR3k^>BGr9k4whYtF&{Q7x%%3j~>80eZ z#yo_WxkWpL#|b_?k~+Rgege%6WDsx_!8dpJ$zNR<345cJ=%Y5jj(hXZ>>H7kE9^fc z)W7?4pwgx0y;acAhJz>aco{CiD_5elMY#v_U3`-r{Y%-eoUm{9FJ9VgZ< zGj&8aGGvNJT-t*@|{n?`wr z=e*9VLavgNn@Oo$#X_nBj`Y?Pe0ShPy$LQ7{tPX)OSuHg0wH?BY=2I49Gyrg*effy zu82_13cN!w%WyE8k|@0`P3|iSqNp2m#sk!IVgfp%B>nP>PR(f(Wcv_NiRRN%w2h_1 z-*0`Xp_szN8B1451LxB)S^NL?G=cXe22AZIw!NWx3@<5rI@NNsN^vk4_iT7bO(K?| zx+J(uSR05fzB4Zr!qYNkTBHk>>)|r&M-;%w??H~3NC}66hH-bpr2~mHg7R_@ePeS4 zNc!*v2KsB*RTtg`2)$g8vcz=#kDwPkGg{GKZ;Nc~y0*&sX<%LDJ1>BqU82Cpkd9%F z%%alK)Z}twH_9=V>CBfgmt?r}3+$|yk)b7n`ZoMc;D$ZG zT>IlqQuA*^<=WKPkDInQI3~V{Ck*j~&#hPBM>HFqq-?_sBGznf`=Gb^Rz$E+$|>oU zPL-4%Pi)X5#R(-ynLe@*;7-?MPHx5a+NK^H(Jn3l0Er)|D&+Y_#Quo-20}x#XM9(h z^UJPDVL$jTPT9@ZjQkl=#kDQ?_G?SLK{5(;K4O}*!<7X91o6Qj+V7uE{d8+(C${G~ z%UikmMh5DeBo;0~q=DA|0}}sN-ZV}gGUnoLe-kbl>#{Oej-FIhPRmpw`x$qCdz7r9d?ee!q&I^c)7GhfOelVRP;1MeoOUTWfYqQ4eKqGa7{*zRSJW_(qMu2i# zR$l%EhEV-)-1fx*ky#m7j8a3Lsry8JJnrhMrQsj~%JChzJw5D$2(acX0AO)3q1Rhq zP>^v1F7lvxo*t}{UnZ8zHm1C^Ojgi13YwLOnWFh~CdsJ5>|hyKK^5)f=7of_r*6WV zHYg6ik;-2H=QQIEj9WNnPZ1yiyd?KBp!zULssSsohlQafx8{`C*q#1wz6t{jQc}4RFlLr7!hYZ;*WcOmUA? zPmku?R z#8D}-VIg$~#}Q;HW7cbD662eWmMQEB zXU@Z68wv&@fNWGA*aot<`(*Fe{AmKnTkq7PbQsbcuKNd44>hgFEu)>J03gy2c<&3# zYzhU+=)Vt!2q{+hrZ#llcj8u}!|B3)(J>j3EW4N0wm$M++!U5_eln@?Veju!bNhR2wx5@f!+Y%Q-lYrqyaPT@epOWD|0XQds*47UjFcC? zUfw%%a`HDVdpcpIrxQYCMz&;gSF`*wiJ}lA-!H4Hnd0IF*v^ogd}eJ8=RY5{q7E-$ z2Fr{K+7dyU{094Nd=C(=O3%1|W0lvPu2xXOTL5pPt|i#26zL!=gh}t-5RtzR5x@b$ zCRc+ar*fM+FE4ZIyL3yK0DXnVS_>gS4{@i3&$t2e7Zf<1B1$Gc4Eg^osZt5Pj(&3@ zv{5xLg*2fDF~0uIKgF=TC7bI@?yA3O!j|y~f$QncA@pkw*Cb3|+lzM#9E>Ow0bn{J zIqP$8l5zfvC~^9LJ~}>jGWg=5I(U>$RQ3;}EKfwBK9HRK0o%5?BugQJLV7*qpuU>l$@)=JxO7aN1p-k0v!U5`jkZZww$0 zEYz>R9M)Qjm?ChA`6zNvV~VT7zhh8c>$0q;IS6o(I=EkKG6{wKETFK`PST>9!*f!d zo*XkdW57d?yC_HTY0W=q3skIAT?x#-+Y!7QR>m~hDK%5)@+3eqS}M%A)LU@7wq3X(;V@}?MZM_!L~uI{ikBW* zoLi-~l|3~*9`_4@#4%(!F3P>K^p#_3agQ2M5tM$cl^m--mF*7HX8_U(v$1XK_F+p4 z4H4=h*clGiqlvXd8Ilpg7bZ1h_+D{Fv5t)_wV2snJ3{a6nZf}n9-`VLYS+C%B&Wq} zSkfOb5+~>%;Ox1a7(V?_o^r`AJOvU?TR1G%2veB9A2*Q;jH1iG#Urp%NqRcTl@r(T z%DHa7k=!wcf!=s{OFl2=@KVs}(LjQjL-1d<$-lAVVn`aoWNc)q{ks2by7rAUFAo7i z!sABv{`$3eymX5nsgq=gg>2;Qecs{s?r_eArJ~*C^!GdR@0qUOt0AnrXwGK^p{ykJ zMjJhQ%cDP42AdHQ(xo$MYO~2|f<7;~x)mfWA0WuqoOzv*Q9=k>R*Sd^K7MMw%^I8K zS4UM<3K8`8p;wUN`9Jmk-s*?ijQ0$#yx=qZQY=is_g_3-_g_37Z(Sy|MO)sN0Bvz7 zK#`wH3x02AkQl~)&pR7YWbNm~@UpCcZ+aH@FEklF@2YvoHy`2!uMxW|w7jN7JIoO8TqjcWtKGZ&b&ji4 z_-nlPM`3bHo4h?;L0$Ob=J&7-YCGCxlj3N&I2I1fkjxq|Ud~^Ym6Q4{?xg_JIk~M( zZJvh5W*z@+C{v9Wj&Q7TU66gRrKLyf?T;PHRq^9wKe3R7qRe<`Us}r}bew~F=yBD( zMkSEe&5wOdTjGT(CjeScw{N4E=CIk)Jm#(&li!mqLUlTssR4FwSs zc2*00SewI!1d|+uvCPxE1s5$Q9sXwhW64`H4CRueF`?V#vf8GmI*3H>LzDgU)Hh~G z)6zqC=+ufjM0!d~R|?ig;o*TuQq^@3#|FXi2~Gn~Y82*o2c=Fs4Wjz3ij9YYQ8W;! zg#unTD~QU(jpWI==O8g;<*&w%SEv6cN)H-953cW3tIMcSs6aQC2~i%~;^A$!d-GoZ z@_6gRs|@v@#yiyW^-Aj0aD=qnvQ~1@URp%vei_D@@RK$#Xm&L_J#UF4UYgt>3VtFJ zO5QfFB3NHQPFgL>u{&OzL@Hx!+8p*-lJd=ZgN2`!Qksi)1D~-lq$8)j%;1p&=DU!~ z+{5pU;VTEM?{HlL?ps^l!3U@&fy2ZEhWjXN_KUn7XCdG%Xmy1~2B+PG{2I+tTvchBuwaf?|uqM-!j;D|K-WUcF^tUe}P0aqeQsT`?;hc zxn>C3_U8MLXoa0!6pk3dgZ%JmXFtd8^vz|2FTpN#FkGuKn zzCn9%&+P_Xu2@~s}!FTZbL$;(O=BAt&Oo2;M9 z7+Ah^laT8Zx_*%&pE7#Vlc9o(xqu-^WUAl*0N!+W}i3Vr2WOhiu zU)nsiI4LVi8*88EGj&FSPs;y4cZzGubV(T3p4SMz@GpbN9ai@8KXfG+{8qpfFgo!) z0|-8rSeKR!FFUgzCr<;}`)nrxYat#wf?}WDBsY8f;|`-tlJqTM$pWpzEa_;-o;;Ve zmpzw}0;E?p8Gl)@j=;c z%d?E3WBTJLodcAwE@WDA^pXvmqB~4r4)=Z96!DngbNe~Jj*8U9E*+Qd5ihQ@qD7B;V~Tydo8or9WzQm%8|wWoL^7}26nCh9`>t4DLjJwlug#i%*f z$LQFFPxW+b8&{WJM*n|X+iMJk@31V?%5qmzg*+z1C&0n!`5cEUYhH9)_WR`sf#n0* zuP4pjv2*WD+KB$5D(+vRl5KXiI0>plX}v}?N2YhPbT=g8zL=Cft}p3z!$JYFUZb-K zrwjBN%x1-ds)|hUHAsLSNV!zb|jH$a6Z%6*2GrNz${C zt=T(y>%CIAoyFby{B(tcBJqB5FcyQPb(Q&gjnOhJf*i}Wp=O;ej|1Ka#|1X@by|%{ zpcwW@O3NH1XfxfukV9LnJ-X|#E@1h-wUS#&UXvc*=X?^}^g!Sd*-vWsu5C+bECp&a zahj;zB6O-0m+93lQl6l^9uaCjwR_%fa~;En^Lnck0<}09v8P%Mo<;%d2Y{o=-d=)F zRFzQW&2a+i4sAV(?t|q|tHn+QLuJq7E5W|4p&vnLjw_S+r+XCw?F$?smfX=*I%6;rOM+YUZS18 zOLGY`L=bec)y;2PiPCGJwCf9!mtk_pN1y^}lTewK^mFT$J0gb@?HcLCJ`J$pZFumn zt-+*1f$4zb0!qq-!(OU{{onnLDNl2B=emxC6vxfyk{6xtgHAr{m6xHvzhoN z6IG8aiT@}{h!A2QP$w-C>vB=Gtv}PcX6kTDHKt^f$>ODe@O+~6h|>Y(_Nn-9Lh$VB zNy9>!*jbMliJKI01}XHQ0Q3VZMK}?; zg~g#58XFpKi%UVXN_9@tZ*MJqPSlq3PC}7sX@B_ovSJ#ZVoCT?80y)#3(Yfbd^*1L zzMe^1)?pkIxHVkyI`Q~oXQp5Z2+K#H$-?>%;{Dm*EX%-nXy@Z$`WQhM8?_StJ_Z2k z;By&lGYL>J$vzoa;k_09H5|G869rIfcfI3AW|fCSC}?$+S6N@2UC8G5_K01%03Cx% zh!HFtC*wLCTa2%nDXW>hNo={o-=O37>0J%gu8KW|a55M@Zx4+2v)di)xsfsWgKTe1 zO;WQ`XA3jl0YLeO>xWp$G0{>B3|-o;_!5)Cd)b3c_gw41dAXzgLLGaT(VXxqT&RhX zUj;&MiMN)lCI7HVhda!)dZ5`U8J#Y)pZdZAqHM)WFJ}ir2O^6sRXbL5S={p>m>4M= zn+6lyO8SRvc76e~mrF~=t4zgo`EHl7%{=V)8B}gB6s9(2$L(UQ5XZ5DjRhhf&(PUL zJ{S>n*uP)*pT;RG5&cR3etjP5H)M?rmjCb}3o%lHJZ`Na6CoIK&l6trksMAWrUF%D z)Bc*8gL1)>2c2SY$c8Ohv*c;q$&(|evy9*4aewe6NDtYnQJp`OnrXz%37jIk)h96W zp}#j+JE2!07q1{@OvUeQ+Q=O<4lFXUpp;Rg+pR}ch15l7k==V!gU#;p(&7BcYG=C- zn^Gxe)eYXS6>W~>AL54hSGqz=%xV}s=2JgAOxY9`VAZ6zu(vL}13_jj3Lr&ChY$aH z?DH&*Tp3pKo`EAZ?)4?BuRUquPs~Yz&cOEmM>nCaGwK_zjE2g_1o}ma-M{J zt-q+SS*B0JDx5r6@VMF5bM7r)vGW={Bj#n|Z}y*Z@_s7RgSZ()6SSQ{tnM~vPavw7 zi+=FhqB+c=4dsggf()8Hv1GSC!~%5H$#e->PXz4Ln&q~$7u9qK|)v_KhRqF~Vg{qM~{_)eAq`|~fX z#T_dFS;t~rKe|A1w}mGE*_V_LMvY6F6P3$9((xm!+hLjCGz9~!DO+9IbfT5G!M*g~ z_P<^|8LOhvZtY?sr+pFGpx3E)PW9Vc%*1NPY+w0B_dd@6EQ>!I>a{QR<&vkQDn;4H zqi^(A7bkmX7lA;^exygo#AIiWjn(VM(XUtA0Ye<`TY^F?Tu#?FRZ}pD@lw>qY|8{B zQJ<`ZtWNLv%s6m?euzo;(sJJ~{`1i`ZI*`ewXimN+LmPk*@_7VCkiAJrWFRYDn57a z!x{h+^3jhzQ+jZj@SL!=*WXofUiYbS~B{iM*-ILy}`xhhfHXPdahhw#f#o1Y1*afu(#VSPY2-N z5^C|=*akXfBeKN`91BFSic6!%YanJvgP3l8)v0~i-5X=lP?<C|^*!=)acM&0?|ZMRVWe4BRw1-&)Gp;EeGORzw#LE*tXW--|D22I(xN zcpoG>v4|?trpb7yAd9c5^{pZ_U@3dlwc-}8ej_)F0^o=Dh^7e`=J)y#Q z$@+cZBSn4}Tgp($YQ`sNU!#E z{Xk<&VQy7514IC8;a@7Hoe?z!apWUaM-YKR4OO7RaIVI)4oy$8HL5(I=L5;JnY z-ZJr7fCEB7rzW}daaTwNV4*ZhRrJL!WQ4gEL@ zUduk?c7W^JR{3i3vp97evLSj=y_m?K1U#`hgr@re5WI&JXZ;V#wAm6>fyJ~r9vzC*jZ zI_9w7A=Fkyce;K$_u|UB)M{>pgce7Nh(M@>oI$rlr8HMiv7bEAFhfM$<;ACRcLrFM zB;;)6L5tV-j;jw-{6fe8UUcJmSAHP4dK`RH1F_Z=@~4hQTzZu$yy{|4OF8?~e_&V` z1+p4@I~u4+tqjF312KtE#t?NX7&ukT(8y^azs&^+g-bjsI4K)|rq z2n}1PZ+(ZkrZD2&7Z;7p^Hw;glHHxVra4Za7bP4UP%Nm@xSvO-Vsie&CKZmRI%Ng` zzj|d^;JJ%|uWwS}$HI5#rh~F*gmaWIOT;OE)e{=<`bx`jx!4tmNPqXVUWrcaTpFNc zldIL$m2y=5IvXzJN#Fbx;_gJe3@Q`wkip)S5GdPY>+PM@PKYa=sGv}G8rS1^z{5@!l}zAx~EF(t6V^lV#^S@Pl(_kKn=?QK;(zD>^RgN zKyf$HFTXm0NU0j;33c^#+;QsEBXQ`dbDgV5{pEtzcT7czG?=`?s8OX*K>SCY_Ez@p zP}y(3eHEH`wyh^Je$@PXvbK~!&_eH`N$p&*+(sDQTi2Qfvfau>s z#F`Ree)Zw6+ZR;>8HGngHo&II_QW5$rz3DpD}K99GZ+`}3$(?Kv|aniBFa zRy>C%IG-JyXlV+lqdHrIV_N7#HFt~Gz80sA_SO7sPtHMlHVR2T%Bk{wh!Zu*8!Re>8E-mKC}$bv4c7y9 z@e}F1@gn7+I_#u|D61f0*?mFf(?Ac01R+602km8e)yb;W0zxLD))93RG~g=>7c*NC z32g}Z3*)qiN>Iz6v9B+W_07JIqf39Z-*W_zL}n&9DDubdQLRXEF7M4!&2mTi=y=80_UtuxpbE=TpQG{U_We`Wwb`13FU(Dd5S_yqWQuCfzP~B_cz%hW6 z6_vAi66q2EO+#9mhuKjQ)&(~_aau`IsN#6(mVzcjLPNk1St`^1p?R}vy2S);OCA#% zlN%)BLG%28=2Gz{hZ zkaF94%<0`ei$#6B`Lxcz7MQr!=_)@KKS4T9f&X7egWw0qw5@-VjK41g$zhoryEv^h zSQ}9z(S_Qb5B|IiO)QMSIDN4D0>#86cr*H&91}G4=f0*e>SX$|@A$ZxNTcq`P}#`B z2Fjn51O=p80%hAGb+3yBg3pf8yqEZ(D$<6Cz}puKMGBLBNj`c1ChXFG2g)>sc~jZF zlg^)2)zwMgSo?Vv$LU+(-bBE%jaT!XtIk+c-<={sG6Ak0GpMw|CJ|zO+9|1lHN|h) z{L3a`X~9&cns~boSI?VBKuo6t4yz@R1XS2ywiYU}=aTH%1R1*!v3c~1gy4&~`TFk! zd#K>5D?GqS#0c8ppc;@4?3`O(E6x#tP1ti=~E&`0?;}$6oI~Mce0Mcu?D0Eb0545>G%h z>G$l1GwGcCo+fi8Hx78PB`5q-emn(`ZSQzZL;4<3mEnyJ$Bb(Sy~{zI3=*_K6e_^1 zVF-+Wub(hh4}+U|vKNGpBGh4pl7546=>UB%b?_D=4Iw-2xNvKjy)8=*!QyQ}eDtL0 zf1ha7|M^7!{b+kKB7RF&K)hSK&4hP#J8H{n$aMR9-TS_TqJ@9+fJ8=&@k+1SU_tnF z-zQ^SL%_{P_KSrL0%Y;vq&~*NB#9QlOj8mfu|{gEGt_gi7@eElhrOK@oE;V`nQV|@ zFd*HO*#g+5vAg<(+Ol-Y2A~|HB^$CHOfqH!@a!BB%^Y(81VHf@Kph`^$DAW+qJ#(q0Mzs=;CgZ_SVHdV z3hTapaD9>Tk42&WPYK!32C265F;rz*Y#~$1#La%$qKFhQ)&yAjGfq{ZDR2MyhfOx@ zDVJ5V?`zX^gp_4Qvb(N^h(0CV zg6_(x*{TS67kQoh0})Xhh!KEnmEyqQq5tOndi;?-&j0=O?^W)XEIhf~-Eq_5i5>31 z%2UrYmlZ0d(N6P*jJU$6w7Ol;X^wh+xm0*rwIcXsuiQOAM$kKo@?*W`NU~x@Xcm9w zML4Dc<6{LV7AC-;OegEiaG!t~Q7W=Afvzn!^eK7#b5){3^l#l$bf}MEDDjwd7#>55 zLSHCkOti>lW0lA(?>`Z*f3r55#l2Uxz`G?dh2zdjTA_z475d(Pq!ag1Vb-SswcA#y zJCc5HaX_wFqa~hx)H5~vkE7%Mx-~3K%6_g3|7EdIb$fJcn9r?JD*Udq zD3aem-D}UwBfrc*?cvV&Mzc@>?d84-M}dxq!XX$9_cDjbYG86Y#Elz(mlg>^zhYn* z{sIUCbviaC&d=Bwn6z-hp>uUHKw)z`L6JA(M)^r3{Ifyq-S645?#Z+T3yyd>I?bG#?Tps$VNW3`3=+3e7rB*AR4{5;zg|zJ4YBbs zj9_2g54L*jf&NzPGzKG&(G%lb4Jo^V(i)7An4JW*Vs=g9zmoel$1f`{_xq;9!n@HI zo9a?F?dCSB^9@A%Od}td>oqF8Jh(ott){t|C`^kkp`Gaj2^#!vYjlILYVTLD`|AYR zW_XNpXVz+_OpS5Et^DXc^~7@ObWaVXdby9G9tc``blhw!lIgqm1cxgnT-~T75=V05 zX}Hp`c3c{)sEJQ`cQ}50J}*8KHGCTN;`5~6KW*=Q#Wjmwx5z5b5ZF4t*)-vR7bJ$p z)#0RsBhov5n&k>|*He}_)hWuv%tVb`Y_xVyVm9=!ev>5Z968IL#ygTo|U54SKV4rp0w~?Q+ zl)0JpME_Sj!7Id0!2W`}02zZ3H+1(ME8L>fs3dUTXafv~C>w5I{<-h7DrZIqvz38` z3Y_1-9pzJMv<|i!mFFg0^iaLO9PoQd1Dzm-piww~h|=-GO8cU6~wwNS~{nPWDVl z->)LL_Tk@$WzTQHMP*!1kK7Jm@0%XC+D*Xx4H4;Xj=f=--F5%IuQuHXv)Cj5w+ ztr|yqYBnyO89QjU7KBnTPy!ceMqut_tOh?&URp!R!L=Jn^_xW$(~bGpnqZdFqem?hX5I6%fK&nHDJ(ecku;-R$yBjRv?gBMM zxmNz>d9KkO;S^i`4@(VW#R75j>h?X$_1-X7@%TRPA#=_fHkTq(1U+(o=LPbo8tIXc z$Vg3sE7NZUi3evqSK(RHp62H0|?KOLqG=exu(e`Bnt-PW3m!(m(HJK;K9u6du9wDJOMEF7c@ z6bIrS*kgICbI`l~!*P$htHTd6^RM}TGOY101-FRY$G*--VPn~!SO8SUWT}jk+=LVhHL%avZSOp^OA3` zkiXGG>t;XexdtCbj}pv(o^?DVCfh8V#qK=fGT@?~G4OhXR*`Gx`%fkC_eHHBM=#OU z^Lr+p+&@=g<@%BPyZF2J9xeR zS^s*5A3G+WLF(Af>0r~18J<3fhfNwf?ERT477b0Z9D$0#6sIvRD}10@Nsz+uh%t>PlQaT12>q+-=6vqutRXD52dw!cG_gz z^?iHlzJb~0J^Rq5U^#(bH`HLA*m5>F$yUbUB~bL>(kmXsf~N2kh5Fzj+?Bd7<#Jo$ z8;a8UFzoU>obAM>&EVltk!}Q>luzIKxcP1-I^f}oggHv2Kg^+PcBRqv1;)^STH8wp zU&QvPwQ!#62KUTt9c@w?g1V1H2{tCP8kZ;}?Y~uJ5jxxma`54d%HkTM9FVbL6!b60 z-Qvq_C(!5)M?sy(jaR$WKL|`fAY4^k^tkH2(fL?!;$O>3&azjeC88fm9y8H)7;^&v zjm3o&ln<62k(Kjk7sHSQMvsbPYPh64fcOX87jaKY34K!wGnQ;oXaF__7yCG4`0L<7 zZs&WRP$N^rjSX2D@5CVFL!Y)QVur<+Kt;N>heeVmyD|9y`1r!tgh7w%lP}6kV|ep$ zxrsWMZ68#F7u>#<#zpBY+eYf~splCIy4<+Pcr*7YXZ1X3eaNqhBsyA>yBZX{NLRl$ zipTMSiLGWgT|PkX5IXo9iBxX>({S;(s)2Jx-NVNYJEkWah+iK827i)Wvdk zP4*6J;QTgrgS=bbE2_Dyv%r&X-7=Dzy<6WzBfAD^al>TqO1{}*Zw%;YYxib!NI|$RW;Ip;TisS+xi|!pQ8+ZC} zk|$S_vrQM*$^1JRnN2P2h<1ZVeDHKC~;dt|Sln7PJRv%7YW{bss8tLZ7-fdK%vt0wLH(mN+oe(J2v4J?8zA&g5gpm||dD78%MqIT^kU0ODAK5QM8NBAhl%E2oVr z_uU)AgJ=~&jU7(YA1P<+Keam5y=qvCrdULkFFA9xv`deDr4nnqhkS-3cIXa1DyK&} z_GEb!%Znv%s1#*s;mD;yk;Wrx>fi3Onvy1{`50HJr!-2)8#5=v4sLowXYJzN(SDIJ z?3gw#4ILAFjes8ir`r4b;fA zKabV1?-#>t-<+#UGAC&2x0Rh!gURfWfE%;n;4AzCD#Xexda`*wEv$E`3vaH#{+mXE`xlLb9NK}DJD?4&$HvQ_`GPuO zs_`YLds#DSggP+?_9{f3<7lV;UF_NBd2m4QqQm~LNh*c0SNw~{bLe*36*D&HXY2Re z>##oE9lv4QA+15AfYc`uRWg_|9>=neLfdQmem})wnTHc8NJc zc*6tybw@#Kt$`^+G23EKVBE>>g0ZM9KKtTE=dS6U8!uy*%e~3o?yqS`lY`UU<11XJ zvumoKW}W(H12S8l)#vB-R zTCY@Eg^16%FH#NPlBPL}T}g2*^|A6;W36~EmTTz3J1Gd<-0V~VxMy8E zR{R7aHuGB(HGIt)=KA*DQnJGZ<@#onk!w{w0aP!oC2~)bFk`%E1hZ?ifpYy}Y@laH z_9GsRDV(uV>@s^DH{-dhgLacoy!5xX)f|*a4#YZ<@B^NFV@B|Jb|f1Nz#@VxcgM-F-9r?)Ro&|*iYiMO7f5SGc&8Hi=#Yc zZ)WSaDYTusL*Mi8nBQEPe{sKs7Ww3XkIB1^ z+Dw7(o!mGo0zu680|{xE_&IJqEoN_G3Uylr#og@`{O|WzhznK4rL^~QgrG$o%-24y zGQ($k`%U;Dcv0yJ?dH!Wr{p!yYcOMO2Z>mCuewyg zo>&;Xia77eQlcF977F0#ne`bNc-4HbmsrdNYJu7AZkEnRztV)}p z&Ddv`4mpfmLpCuGC*K6?;|42HV?uneKU#Q}VL)o_x@f{E=6q5nhITSe8;McsmhI|PD-5F|JR4ek)!o!}7MgS!WJ2=49> z+&#FvySuw}rSJdUzW1RSXYgH~DV}}9*h#nY&Mbd3^5r*3D zL`5nlDfKrD6Inl?%im$|815mOb<5!b-c1A1+F^1~K&&tvKg52dp!C7CJ~PSbS|c1% zQmO!rs3%D9;UXtj+;G4&ecLWIgPjXw?XEza$A#)RYXpU9;mYuyxy z5O#E6f+{?hPL-nLQ|fL@rK^G&m%m~-Q!F!S9{A>-+u{8#4x24M!$yy+LX5PBq3U}- zO}@lYg(W!x3gF|G9WeZ{$5P~NS~20)3Z1dVO*WCw5~9s~>J$+mVn~Zf z*&RC&A4NrMO)S>ssJlvIGOxT%4>u2GE?%%W({;Yqj_R`B&o#AE+_Hw|zD=C4T+fAz zHXj1K)8D(Ip8uFr_r=CpqQvSJm{a5Z|9tPRKHY}sZDD@EGJy7ElphYcsEjLuPrqR?kgN^di1h{c?R>4WT z>rpL&CbfFDa70>@jUPTD)KE_daBVkJv=JyRSiRlI?H9LCmUJ4mFZ~WMVANguS-moW zzNJc@PdgR`%$RQM^Y*$3R12oq-!!*VDyh%DJ{h6eVj9Bar|A+LeT!3G`@Splbpz-y z5+JXi_CPc?k^Dsid~stb&Ju1r&~6^<5WtMWI*vsdg2= zETZ6kwF{*^39KxrzQE=Z@>g@M;Jq_Z(@%Bk8dK3HOi<;p7 z)xs6~HwEB#2@juLxk%b7_>Y$n)c5H5QR^5AmN=qRi0ZG*3eW~cvnHgiawT;+ck=^) z$>D;^ik4_f*sA?KH3_e;pHLGJ$L0EeCE??#ve+alQm|c4{G3VnZAwE=vry|XLw|Hz zrp3edun;?Ig$g#Dy}G>)(|177+3)rK|96%EzT>wohc~Ju5|o-Hd~$A2K2%hp5;B0* zaW6J#O^XyE2IIX+qPuH9Z#*9aS45E#FjgaNSg$JUrI`-^LCx_Bd|Gi{^n{rbZloP0 zP~&0OTR3AGu@LKxzUQNC0j}4Rh!R*(g|=rqP)d5O&&syqt1HjBs=pYqls=m43#Kdl zmSujT3PU%WUnB&1k&=@7>SM>ZSyDurbQmH>zxmX@9 zQL0^MB*>RR64-^PZ4-AB8iL{LC|cnBv$fexRg=r@jBslJ;_{96!<7tK z7~EL50n)wvgKmjEPz?655b&WnRg=kUUqS(+IscbbQ#GHjw{Sca;V=PkDW+djG|vji z!WKB_eG8yxg7F(T>t!PykD(?6O(0+uR&%}(087HKL|eiI`4`Ff9a*+*NvBqnU=MrN z|Ca>vy1ddv*bUzuF8lUv+tv5jUB=RWG}aMR+9*hX6s`v_!Sf|!C@;rG$qiA}j_;J> zXuBEjp$51?zElrGbydEXuLsHi=j4;9vDVJT;pgi6IE`#?nkm=^4cOB`XaKM|uKb4M zt5gc%kuUcd&6=<8$ee>Avn-8-d-xq**a|~+$>Hjl8G(T)<@s!$~CJ! zy);zsX04F^E1>8=VTg2j56WTX8gHv8sVHHQ6=2PH*ZEhnLI8=`EiH%?b{3Xl*SO5& z(iP!;{se^(B=Yiw`y`!84~^n#wQolKpWjdVlWGu7?FxSh{#$)vcxKC-e#c$K`bkX1 z7%P5AAfY7ZB0?z!C0vaN2WdN*SI{pU#<2IXb?MbLhKl8gy=875E#ErTgRuW+sX4gVT#eKa z9gPV{8ZlTONOS&B*1lo&8mL5lvD9K&;{Dg$R~jH~kWeq{;-^9%@E(|+$<(bB4B5iu z`DiVp&y{i_rRMMffg~3a0lqko-TRM7@YHuH@~jQ=cGU8tNLT;qg)Z5gvt}n z8uQ2Ln$kpE^p!rZRidv@w~{>KH1uMq;t^g-pC|4w-C1TD6rJdD{|Xz;glkYMM`0+! zg3=1}hl_@}d!rd4L{4hFM+Gx04dQdMuq}!59`!gdJ_q*udcsuUjc$1t_xR-K2rP}Q z>XD8oHU8oP;xUgbitndv4Www$?0}B4#rb6PmOxpvoWHolA&m)n)4FAN5fB*B8Mtv; zPu5Mx6I+<*wPp24!Q%$V(`7zhZz?7ONF_b|Dl1W*M{b#w5K7KEO?=i)y;FkDuRcH=ocq$32cYj0zd2l+c5i!z1pPoWgFfff`h zN>zJ{?61N=VzXMak$@L-)lqS2aSNM=Q%|Tqndi}}a41+IHPeakl7b3tZdkzYg#v|u zcVc)r1UP<-dmngft=48Fs*w-@3oF7NVVp~m#?eN>$lIjH)A|&N&ter&)L5(Zxyg28 zzERPu-cj~{H6y^X9E5HrIj|kKQ}vi3`u`0TAwwA|Ynw673`{rqK>+V0N!=(XV6|v)s3p~i2SfryQyM(@0UO!NBsfl= z8pT-Is;+FW(yzn!?y58GEjwl&$$kdswzOSRW+_nu{g{XVD!-h4>MrUB*71SK7?pNn zv(l(p`@Wrw&2%tNTT5gjamA^+B zzzpx|+U#h~>TW-InUeJy{W2s=H5wd>#*;FUds+*LiTiIT?R?R=6QaLXqi}${Jz{a# z`V+}D5?Ce)C19%DeHHUp8B00wW8et12|PDAm$4TL zk!W3eiI+eYog!LK%pdJ&Kf3{UsXF5p7n+Rhh6NQz(W#K?(QAHmOadCNtApdRklE7b56 zB+~WCC5M@&5}%+*5b9o%iw!t#gwKx-j>i+)W()Vpx2_^PfMLJ2ZPGqqckC=Z;8{Mt{un?T zEt|vI5-1i2Wc~(+`;_znB2}m;Q)b(jaaX^kKsvPfuSTZp4+lwVpcc}Yuay=Gm5KU_ zy3(LEt|TH%p^t>oEGiFJvM_ue`~cO&nRqna?u}qe(7J=6@G%`*Ha56tWh_`YCecDEI9- zlb*G4;`9@Hv9SJu_c0(=1`hDA!NVI900n`KiS>F&D3b3q*f$OC8)IjaZ7K>3H4OA}DA$(h7BmCZM=J^~8 z^5}tVUeigTPG{e*n2D1`@}@o{r=k5fEw)`9NTX~9;9DcJ>QQT=@DtC7%3wWb??sa# zYgyuDMa_y$5p|Z%8%Uyjv0*!@)LlT`Js73y$ke zyC(^ldp6DGAZ(==h)crhD~mkX+SoPSneWL{y^C0FQ?2*-IO1kkoK|{4-j1dT!SD4p z)ABw~;bK-nijzYabpSXA!%3dv>_Txf+2r=n!Q{Gw4j<2IM7XE`WXmyRMsEj~oVJFG{k zHeCxP8j_L`-~G_iE5gJ%uXb4C!{Bh()s}KQ>A|wLC!FQpC*5&A`A3JoSBY7cLN%ey z%i4qq)HfNqp8@fIjiUGO zWL#NmY}1e?46U`p|LbL4!bA`50Q~xg5JAZ@#(Z2Z)Oy#K5d>e^tf?PtwAkA(rXwfK zj`O9jmT`ZvZ?0c#;|vv#n;;__AaWqAOY#^JUQr&O^DWI;@^G)d)lzUWBByYgPeem4 z<$OM!l|e@=akxRj!+wWQV!%WN^zS(Ajh2*vM+6%csM>9AF#E=x2ZwtuWIrYG12K-;sgsX-Ga_Q=mMnh;$g z;ewTzLKg}{bfW6|HBshK0TJql9Yid(nokxe--@-(1{wG6c$M*~bNIic%D601F z$mIrx3feL^NK2PgO?Zm^h}I(5t6|8H`Q1&ze5j|hJyf61U6AB?P}m+E#Cqb~Vc3^& z<@x7-G(1CnzVL^HNyIx+^wxQ5F{AbP<{|iaPh0%lhN_nQDIw~GopELSWsyG3wc(Sr zbM^uOsrSZ=xpyO_^GmSY^>pW-QlpAPCpY;h%x4T< z*jY^)>fBaCbQ4cKEE&dv5GFf<*3A%Cj0%My{Q&ag02R}eN|p-f)lFQY{$6r&rFHON z74C@dP*3id^Gxm?K2)*QON;$wMMbg~LnB?7)M4gDi98fIqDX(NtBqU8Xw|&4S9f!B zHe+;FnTj1!wWXBM$#X0wZ?0m&r2_T|!25D;GaQG@^Hb6z!;Hk1bqv`pns}af-_2z)f<#}+c0IlN8Se#@>XQ}OC ztr&+OU;>$gGZRBe#jCUa&-ff0K9{NE`OTHJdat*+i2(u5)A>sJXS!XV)*Pf@A*OZQ z54g#sWW6=FBL59fU2f=I9#V5Ku=g(B9FWJiLHI>Y2OXhxvWWAb8)(_t=_glZD~vxD zl4Y172 zyJYH3&C~B5ADf84BhmQN^27s!NNScR#MaHGJE^J9`vAp>m^EMTfP z=Kn>tJ-A6aBkhNN$Lsx#Ak|}707WOidSbkFAEzeE`@PmOjFKLU+B^SB<*ww196S+T z#r3=S@DGRirrvSS_l~KFM?6-k;<4^|A%@o3OBRg2ox1P3s)|+Hy>05tW1XavTNlG~ zE(%GgBr~|bz|9eOcqE_b)P*~qaKcOV+4Y>$P3M|_>=IgT3PqUuEFU%;veWZs|Ey3rS~lfDr*voZ>!9S_nudp?7n+2m^y|qtF2Cpf`W^h~9Z~>-IKqfB|ow2j*t)0t| zieHE4U{@_f`xjip$>$r1iCNQq2n_jjmG4;_h%>l;vn_W;FTKvJUW#5?fx1L)m;3TC zLvHi^x6)4;7G@P|+i8J76GSMinLyI|%)yVzb#e1CTJ5`(qdhr~2Rjp}{}N#M%SMtQ z`da(Xm;Qp^P{B?G9|cluO`(-#*?wX+5nJT)rJ`5-^$iN!88`YTmp7A+I<_w&A>x6(dIcA_Wm7KVqxH zsrHhZ9S*%+P_Q)SI}4AgI!A(2WgORxpsuwNcxDKtZ?ZJsK0$Vj{}KoEj;EJY5WV9P z@_DhK0H@o#A$N(bZd*iW+pg(Vx8E@7ZZyO>JE`dbDVi_L6~2cj&Dqutx<&8$$UY=- zBvAt${CI6gb$fi@L_ghk9zITeNe`LnU7ZqjwcLOBb+@$5*8Te#PIS}F>WF)XXy8&k zYrcQg_%{^Gp7!{Stis&0LmA}4OCq~Z#@q7^WKLisQ@Lox>&o?mZKnD4&$K~vu1M4u zA?pqJMrbud(g*-zR_v8 zOfBmdn&UJo)Q2IbC8Oi#704L7w}G}&U#@~NmDW34@B2UY53=88k4egTDSN*x-4f2m zE4Hp^;tz1LO@DTSOCjG?lzRz>xia9aR?`bt(nyTJfX)L%G zWppi?FH*cR7W%?K8pD2JWkbWyzU3pe%(@uRE1uUGJ}QAKR_s_y%&fGPw}-=I>!wPn zo1ra8KP(>~E^^=k-&P-Y$!H#SFe>{OW^#V3f|D=Xi*1sQMj`5D<=Y7e#nhThZoCaR z^NSjhM6@J{ns!J?5fQ9urM-rq1;mYH{GkDOqKO-+@CFfA^)H-pLX<+jA`)0eCkKWj6P=`ad8YN*^k*`(m=OHRZ$ld7PNh0BbQOd2F1Ae9lQ!wl@Vf8T zc%OR`aN2C;*dw6sFs*>B^{TSQ7f5 z-dBGI?AKPSDPAgwX;iM+;R8@F7i@vFOkV#9%Jds^R(806Z!9}I4281jAGyWH_Aoj) zEWePLGn|$+hcbl;|Fd`FJ9HU}jB$D2Q<*4Be#gIR;P+23r zBFIj+m0-jZhPZr_n|X+|stBf-Ws=I#F}@2mj4Z5{`T-jl@-D6dp5otr#FLg`W9&9* zqGBo1{C$pjPTd@=f3z1~MMaPe;l(T7<10I3<1D`1zJD=4<(?ybL;jrLua9$W4)3-g z``SkUDDLpBj_=6?h^W?78FUK3p&dkA-6ivS3&KiVHl3uP4JCi+5NmR2bv~XZSo@=Y zTKhdq?LDU2?n`Ud_|Qb}SaS~Dr(Bkn+(2U}t3a|EeD!Yrq_&go50`&X)1lTD%3hrhBSk89eyo=V}2MMj0~5yH9Om1YY#>J5Ua8)3j1mP zgLBJS?tXV5<(1s&OuH#&FlH&%^XMsr8h_RV2o_?nHmk$zv({v@IQJU(AvQqd z6iuoy<#v=Fv5Ng}G4QoAX^)z~n78AB1Fd1k zk|nP~M(>R=aEi6#d1+t`Wm4sLuIAdMM#uYwr$~sGXTNp}W=%%@;q^uyMgb#<{zpS$ zl1REb&o`|wpVH5_>NP8J^~6Z-wFim0L^O_TO|4!=MJ&z+W-!>&)GWa}mFbO!Imad`yDQROtVs+CG1r z8pimp2aaqbL*NgPkn_~fM7RQ^aaPhC?}Q*BD*^WOf!LF<+NM368zC(IQl~qYl$@%PWup$mYE+`kCz;=nt z@N@eb-LsC9h`#mW>h7bT8B)@_4u*$~!J-SJqUJz%FY@-z`GY_eE@=1RC(owVugpWhBNxEC%1#3e?Z+R#N7b9sTkKy)&Ym z828YkxaMl(Kjx>Xh}a_~`4x@tX?e!$w6(k#iNBr}4aHeKB6@)H@dUP!6IbxCF6Zbj zPM?q*kOrmVh|>FTHBvhezBqPtJa^qEVX-Fm!GIzXl%jY9ycPf zju{7zJiS;zYt3|eI9t;R7|o;JI_B^8X%7_2>2(D?=}inS0@usw&-XdOF!pugT*h}1 zuHQ#rTBocEl=Xp3)<37bE79M8rKzPp;dECr)`Dbc7g;~LOT5E(v-zGYaqe0#&)ra? z+~u>^xdlx}-fOoHwnI#xPYps*v_KjRvpmf;c3R)NEYSgq5^|yQw%N zETlii9M{RKe!Qx* z#@~`M=fhJNnFu0WF&TWcztf`sejFAeTp#qd;|%2QpBy6 z1VLs8c?TihMrc(!!K~E!w&r{n5T0Ja>d}_x6YlXvL87MqT<9GnW}PKYk(D=kCrb2L z0kSHzd3EgqKQ`%ZwdL+o5X>G&eT9t|XbBfo+CvERxn&j%nk~3(Qzj-o1ia`m5?64v z+Id=rjmJ3eA{UMk)9uB_-bU%{9m{W5IzM(U6TV=Tsyzp;VLFsSguI+I z+YQRfrfh1QTeqTXUOjdq=#67x)mW%lEV2>O$vAMnyVmlq3|d_rby_I2ZBP5^SC#T) zgf4wga8vCi_rJ(9{}+cTC{sfsb{oru_7zUHD!Fa?a5erXoq(^Dc_5!-#8-o#YO*VP zZrlpJle>cYw^ zFYEs2cby6%d{7;EcIj9hqa7TY@1NUHDl~} zcyXJJnyy>c6XhkdfgK~Ugg>{Uf84CJCrTVHwi=CFp?ZSjgjAUwP?U<(P)hqY>bqA-?1JBoZJKGq;5o?Cx7?Hr*b`;d>3d30RESC3AnBiZE;3c*fd> zp4xdd_#|vu`LvE}dMfAo2Ou(JenS9bg=10$x0e%v>*kx*oe9Qh zg!e;`0K(Xh*Tt#^uQ|H1Z%_&hr4Zn5dUW$}Idsq)zG!^k{C1`(6xx^gnN$S0OM2^} zV5KQ&Tuy{-fom?s`Bk@#;~XQGT1z|LtpMF2>2^Oig3q~|_0s%eVZP2+q-hkiXBrud zNP2FUPSE~IX{id^q@HDx24(HGX(1;gWw%*JIuP<6D1=OfOMKG0Z~#yNO9%sC9v}p; zq5jrh6aBw5X#f3s8KP?Cu|0U1cUbAINZhMmpKngRZU2;NK0stAc=ytFByU!x3}2|r z=ir#z?`-|alG|;s253eyHEYE#!^@P)*J8hExYxl3cP7Zo<=KXFw$euGffX(0d~z9V zo*)~PL8o1pS6X^WS7=2Sv6j$&VD~uHcUQ~0I-V;?|0B`X1~b1=+a3XZ?qUcD z;BBqlEkuC(=jCaUiS$@-9C5-cHh@yt5eG#WHHU02_FIX3zUWI8q zhV^)Rq~9g&^{_a0HCp!ZgF$38QY7kE`9kE1y<{}CO)GN&S!dhr*H)tBk=`uw+8c?w zx$Pdi8K&|T_R5L9OF#Ff;#6m$Z?DO09={}3rzm*S$9S=iQ0J@cH-3}vetlnDOhXmZ zu`BlZ4fLguFg%|=RdF9kYR~*t%=ud7@~gZOyDY0UoNQ9=cTTk7!vnzYYtc7amidKh z*TDmQgjreI;TwyCKCRz?elPNGtiM751wS7D2`Is5>EJ*tKZo^a?vF#u42k0J75swj z=_UW^OuG`TGxTr&xWUK7;=afGt!aFHCA+_*BXb5}53r-IL#;#o`9Orc&H(`5p}s&; z!X?73Ybbku07~*iz5nxf|2**lI`qcg9-HN2O;uG@{3+6-OVM8!OTLMnW80JsuJRl5 z-|$Sd>)ASb-)bohPW3YaC1J-b%$v`KO~}Zs7A^0}h(|CL@2`%mro6hQ8waVAv9R(= zr~kGuv;FN8uC*_C_qfDhU;?crzV?f9OiY>*ebDUapw@Ct8}o*vY_l12j8s@n@|#bg z*a+>*k{8i=+)^QWZPy ze7%T(=XHLopo?D-64!67w=0&=?1|-XB$>x2e)C0KZr6+mjVYHSbn-ucd|u0zR^@3( zfdbT7PnhpZ*V&fUPx)#5vs->-Qkn0cZ|HZ=JRJ67e-bwB+Zqyhhm-vI56joH{fsZd z!V1y?Nu*auG;OF+U+C$eY+6IEm#_RK{z;(wv4kQ%~&<_>PN7Cl?_c+r0`B^{?d zfy0&Ga*x+Wl*)o^3EnQ9uabLu`uTH*HLtB&1|~>Rj_(l@L|&w^Gz_%eg#v}2^XuYK5&Rz>xFG>C zJZUwZU>Q*2a3R6>lF0@uiEcEdNvXC2!Rxs8P`U10^}!t!LcAu-KV&njCj`jDEK!+= zo$A$>VRUQkMc+NDJ^NZCM&C8auL9l)5-N`~cg42Vjt}rIMM&H8i-mxSr-K_3N$I zV7=3M+~_pwO7;69@KO8b=I+inx=Gdc($+wA4Uo3IJU<`QWBet!(Hs7;x~fW0Q1Hi( zAAe!)-Cv&{t|0hTRh?#vmC4D;A$|BMF;5q&DY&`ufqnZ&P(B^t|Mu;hj0_zuEecR) zu@Dm-`qmg?4avAllKvLFGamI_5_hme2E<5+k^) zs|%HYd%h-OVQDEe5*1i!asXE$Ehi@@K7JNlIfBLkuy$$~D;PjLZVyc_K&j$aQGC7|empXG_6O`?qfJdBEkE zoKz4M6$KJG9g5S^K81_uGvH=rWq}Il7+wbWyzcW$OI*y%m;gRL{_w~Mo7FPA`Ro@r zx2LeML5(JzX2+A$mF7S+Lhw7QcCLfS^mw|ZqoYGbMXhZkOkq9uB7vkvtl+eqZaSa4 zjtJq$fYG!5@oO>_D?Z%qweqKthO#gj6UBArROcV^T*8<7rW3cWqD7b z<7ip$Fd_Guy0#-PHiPzBF!qg#=F~NaS#FlLWl7~Q7wyLle;f7X$3;tXYm)R+PeJ^0 zFTYo_af-Z=gq;KmeKXyLjD(f){Z&}Cf3AI>>|W07$e}~n8F_b~&0}q^(|G>X`0DV% zM-83u{e@=Zv*LSC?#p|KEaH+qjmVj{r;{WBSMBS)#Ngfd6Nf?$H9@thQT3>YFXTnR z^~Ae2FaUFMs?v<%Q!?)R1s~!b;QF#Y*z_-MthR&`FROcWT+jdjyz>mFBIgGL0gex* zkkHVkWO1Ei%$bZPEs0@YvWl8m-}6)cICNmr@7P1zpo%LJ$e3ci|4VM!KcW?96!YGlhuW zK>+6tuEVX2y}u+dh~U?@-7o31o+n9NbX+kcp^96Q8y7kdzU^7k%-55DZfEQh{vU*N z4a!T1{yG38A+({SN*C=A_|ZDBI?y^$J1_tMQoX7DKP2V(gG4l9x!tRspUC-QQ&?O) z81zvILzdC$$IFe?-vUwTb=syUCX%>aY&|{O*fAB3JA9z>^YcMZ!ph33-R|k3rkMue1v$C#a;@eZ(MS`R8($3m*MsLu=jX#1i=q#GZImMqr7)-YdF8Z*a9!mZoS5PaVwL? zX$f9^XE1JHU;r2gul1fnW;l(Djf<<DI5}{`z3b?dFihY?_jUB@ZbXI-x z?_Y!N02Cw~y5ZqrZm(C+PdwL}&zaFKeAZ8hj#g{;d;y=cr$kIkORH)KT*0xiit-*6 zeUzYjQgU*cBqsl{J;fp=W#yUt5n6x2g$5gaK|$YbJa6?irrO!?urN$CcW~(=K{xgK z-01vNs#fiKciL(&fPotLEl;69HshZgb8~acl8f1VzdM??1UmEZk8Ftkf-$7l%MHW? z4i2Cga8Xea7{v_}5pC@L(vs4_-zQi;7Aj4ULp%;u zPJ0Fqr4YapG_}&Con*jm_%ZM8`N(M&lWD+|GyqLWau70zXW(MrmWwt${OpeIP6(tW zBwcwxS5yUH1buHT($P<={>s6xI602(YY#Clq@FldG}a^d?zhclwnBBgQh6Y(3VgyC zTvWj%6)_qbour&Z_}Du~0MR>Y42QW!)yyE0SSE!wC}EW%Vpn7X{yR;g0!yq&I=;$dJtu|Y?dc&{o?$D5s zntyHxKHZ)`==MhuPfSdJMsD65qRsAT#HaS&Pn49Di76@V&KF%hJw2-hVPWnUoBnxuhCG2e3PqQfag`tZ{rxwIWW;|~|7~%(O6T>U4u+rw^-EYJU2j4_ zdptco-Q?E*;1LimE-sR|oUFpBS*jG@LlavGU4odmyPE?)qOFbBUr>)J2@??paONfm z?2fio$vaNG&jgW)PMdpUPL95(rza1}YJ&~vXQa|NYY*sI0U!*Qar1I#h=YT}8uVdr zuP-24JEG=rzdv7JUkA<6(8vgM!dj`1k9!}u1l~!JD^_We(e@7vsHv+5{Q5QBG{4@= zqHAecmXl+*-H&b)V45YDm@v04ePGV!a=9H86m)%UKM)Uk3KW^l+(27f+lYvWkE|B& zDtNNwoDU|+SW|d@uJaFk2fcru>8^hzOX)WJm>!~Siid1VoTvu$rddk-*y2z;@_|9&%bTSVf+yy!M zVW8c!;Q5IPE8`sXjtslxQ1P92vRcg48Y#Jhuh4(90K4+n;jTf@yC=w4gBWYMtPk&~@dk+~ zM~Cv+zAOB-SZe&}r&OEjVYf26WUggB@*$Hx+u_(GpIZ2j8$^8~(YBHy=|?d^6*W}v z5u>?iD$Pmam~!6rNSLX>!Thxb2iuvV7o-kKVegYgghuM#3vEm^OQg zO8m*Bk00>ViGRDJYCzi-huE+0@kNTuV8=*(AOk~X2c5Y|#qawQH};cH#UGu<%&&~a zFO3lC`Pec56bD0U1VG5t^D<4A{&VPIQqS^wqEuFl{G5u{(D8ru>_?nzF%3Cve(0#_ z|I^eD$FuzM^LIf+45Clck3AbdzyQ2fZ7d6h{FIaw_RR3kLT;Nj5PhQ&@){T#qTn&_ zcljgP*w`Q-Ab31pCo$>=GN23K9|*xp%qI4nzq5QU)-|UZ^(KP*Z~dBKv4) zXs#|VLn0#5SuGd;2ET_OA|WC9moEGj$Y5k-jG)X3s;Hn} z;4&G{+!;!+UTIP$#h4Kr2p;Df7$0XKBNLXCG+(GP6c-nlBCl31c^ti#O=2RWq}&G) zKMV{^g-&~U<=pi2HJCu%DDinb)}*Je4#ZMeT3QxOS6qDCD+%(`Y_>m8AQ83p199|P zn}>$S5C|M-zkD&nT&gjRz2`4cDJw3a0&g4!#y>19479q;%*>$#`mtmdNeqg`?>A1R zUT?3G33OU5&NlM$@;9cxrNnNCEx^y({2L4+CF!H!#H=hmTieU&B1Q0GF^P#Ly1H_* z1AqStnVTPhcRHt2CETm6t$n(=X}dd|1UgO?of;5&p}@nx3l?&qWgWnqZ*rhvHm#hI z5EeEZP37o|B&?`xeXW;z;k4h60$rJcf`W&>iAf=-?R9@qtX!gA3+izVVMp(qhcou7p?zWhOy0Wq*(;$o--(+VobPzx`a{ru+#lW-jS95+@ z9Xf>gfc4~$cKCf|w;}^Q8v^2zobu~hLkEYVvA@`Rl|I?|3%5t_?A57!z0>Y?0=0W! zRpHA>Nw`v?SSDu+IV1kWrla}|OZ21jL%HPApgtqY4U1Wf)%JCz?-!!e&W-_DGf{rB5lh>_}U+wYi_#$@sF+!>x zTg2O6IiJ^~p**gW!mYDvUJm!#>Kz@-29Lmsu0!;rst_*#(*dvlzm)}NWVEzC0iX#3 z>lN*(@L#vY254&aSQjfq|fHRA|)8#GG7$p_|L;6rVx214IfLd|qu1 zM=Fw%L!hBGY;8W`*9?{p@Q52R zKv=BRWIr)CM^8rwmIHVbhF}1C`U=n;1S|GiFI44~H%Xsy9Q7CH=_f z^{S~a__sl`yt=wNFtG1$%~-f~6C4OPucave(4AndPc zQdV9Tm81kxZS@xw4@4O*>0`ZN`vbD((k82$?rPt?z-Oy=*sb+1mD+^G5UcjL-*4HO!R!!m+-# z_swZX<}NDxQSg~~)E^{5OBnbT2m=xG6k$Yctq9&)xZM=1!a!gi)&;l<_Uor{dh-;e zGPKe63O+!$y5HHUbLstuS%*AknS$-`<3h%Yjy3&+o0*VnMQ(%Up%v9RmX^(WXM zj&~`C?l>N?$sUO^kU#>@5nJypL`*&?_hKQ4a5sq2bY8mHhFKjoLHF*4po{_VO}8`- z7*B~v%xDlZTpKSB5`m_V!=D%RtM6Ha(wv&0wE5ffAJ06JIh+EA{nPw1Ddv=0bUE*x zCv(qnVyk?N#Qve`&xn~n_9~?25xim6D&=mA6s^9ryWELPeY+?#&01Fx{?1ILD&4YM zc~<*|2CCEKAuEIR;xQ082FKO>2v)bt59hThf0;YfxyM*S6 zj(y5yz0x1fKfl5lgrcPqd7b#MWQMX@7NvIGjE)vN2 z!~|7T4-v;Z@WDH5%Yt&4I+k`08De2!@lGKvXJQZR^W`t^oI#@N1H_Z1TH+ECLf#KN ztgK2^mH>7D$X-olB`?SWl9R#iKB48V_m{b-r9}sTmOMPO)6?owQYestf&Jz|M{#j+ zFywMLnH!`$0`8Y)Qcvd;?}LYav725W&Ut!wu_Ay_V`F0zhR;}4U%vvf9VkLpc6PJV z0d?D^h%_jOrl+T;*bnDtXFVTol^m8N9PWIc?^iP4hs4FjflDX08XFz8 zUG4A#A*8IW@;B{Wt|EM6zk&jlu=Sodd7w9J;I|8^w+e$FR~p`OzH3*d$uG>NeP`6S z6$jM7xWydR>!`sN#pN2R=V5H;($ClM=Aq0k`X}yg*9{I~7&a2U+YV|&eAm_( zi#&d~f5+HPm|BRr%`zUJ^N0nF@LrtbZX) zV3uIrpEb@P4tYe+poJNOban6XmSvDD3;9w#0GAAs@05dA0G1U9&0ippWiu4|YuuWXOv`qRZ47 z(NuUU2cnFZN2U=OcuM4)=YF1E4vS~zWNk$G#T`cCPa#Or=kvX4TU?`hUA?i zRb5X?`Nw(gVWB|2v&8pJ>i4(3(#|;&%V)!g5EIYUMwQpxOeFQkO-g2;)mdqXF!w0Y za<^Vl_%_&bSTC%-T~P9q*5I%(<;>s|+;Hm9g@N=Af@OX;b#^I>IPW5VcSA0qUWYQZ z7ffwUv!}u2nkI91S18RW^9@bvg>v`;ha(R5<)CzwM?5KXA92Z~D*K0r>E-3K&qtRjSv)VOUWg}XW@9ypZME-?^P$q#vv)Q@Ss5vHP z9H9DBQ&XS;K}J9T^-2acMY&!pm=>q5s_J>QuXd#?mdWzwjTjAXsDswq$u}V5Mn^}d z0is<(BBHO2i$g=Q=5(PSvOq)U8yR%>ui?e^Fo+EU{r!dQ2{&{Ita5U4XPk&DpaK{N z35PqhtfhxCLPAARv7p^pK|#UP)D(12&Q!n(1n65Dze`yqPE2z00l*g4)Azy3qoa#>T?iT! z-@o^RTnn^OK+ezt0yP{G<|K{EBjej#(3`NeJprKx{99N^ymW1Sy#~0Av=d%#ZkONw z2%t|z%=V$Ey#O#GrI{Lk*oVFl+fAezx_{v1ZH+4{T&QbpqsxB7%|tCElTsTAj~I(W zNs*tkq^>Y3a;~7Fx9=$;>`NW0AjokxbR`+olue*6s;Ko4Nj7CdQ;_lLe7#zOBvtCpR@}I+1-)czp!j_C1HNdY;U-nkw%Q8Q;b+b- z#~nW$+>;8vf9#oF+Ce8~b@_Z^qpy#KJ=r|;n#*3o1*dks0PT5!8~T**KhU^dTyrGo zG4`zy&co$PbcPw5_bK?hWaL;D+-WVpRt*6dnP;jNhE9ERo1Tp-x{za;AG{G%Na6Cl zcuHB7qjoaaB@t;}s&jcfG>{lGO~Xv4$CIWw*>v%z=dp>-v!w!R_*Lx%dr-2s<#7DY zcT|(>4HM=QQ?a(z3PFx`t#9n?bgR3)@5pX@>msuypf3ZPgd^kmJbwXdK*il5-m`tg z+S@!vvPm^w_OYv*@0QOWNFCgj{}jf%__J}A>_m{xNiq#X@AMb2XUC69u_E%yx+wkf z*mIu2T~IwsU{1s^_%FtqIybg_qn)Ww1I37N< zc`^Rdj3Rxd>3Xm7vUlL$MlM`b3+iFtuCfjf+RVHH8pbtcfVCytA8>i!#3tt6WrxhM zXuIQrFya5g65g(K_l7nz`YTx{$FC?jCt5Eq-A&>(y+$A@cc zm0($fhod6~P+)&A(w9-}0v8Mlk?t*f@i$6ZT1lI7U;p(~h+JHt1qB757YXVlo5#B= z0n#YxTv7qoQ&47Hp>KliegU)ys*J<3vfc!VNhv6R+R5A5S>;&!alI!D+=hHI7r+}W zK@s37q)+}rRaLd2sfjC-)0|HtW;60NzkIroAb%K^1K=Q{W}wsEMU>cQp@oHKu3Yo86;O4 z8XA15BO7PP{&vpJ+h?xX5)m<~g_f0#4GkZ^%z?>jUPtq_R3L`xw>^Mv@p{ko?%!;X zS8!V7fN%tk^*+#pj0_FMu2($J&Qppc6BaTb&r!*j2PS*O5%}2HRd#Eg#EH9hJSoP5 z3Azn-qMoluz*}{{+NWB^BOs^;xzK(P8X6kbD<1L@W4#$%@Wo)i73m(73m=tx@FozC z1NMcpe>jYWztepjY;1k-sr7oT=*9E<(ZI`T>-=%$^`#HA%j*xn>)08&D%IE{-E)U` zK3s&QH1+ZK<|&+h71$dTZD&5~Le21BxD6}P53O-ZUi=&LMERDr^|BCQ`(gF`6YiOD z6S?q9@=+Vn6NWjV)C;TW2Ao4Wv(6 zvr7Z|G0LI%WZqLk4;Hm~S?x!rd)2l zW!U4C&D*rl>8@#MATu-Uj+bP}IH?x;*yyq0{N(TaZ}w48UZhzy@nSnB;MKBZ1y;diLDHIZ^XP5Tri`< z?m_v1)f$=7n5cWTBM~~6a;ws(^{Z||NriIYQ;CcBvSei2@Wyv&D6(8O5x=_pu4zU$ z$@-Vhef2M|XPwclSVM=ngk)^V`; z%cG=7^auHbMNa_xTf-%v_RRX(uSWIG7OFNQJ7^vnuODQmHR|magI04deedI#-pUq| zDI7IZ@f!}8V5+LGwyivS{-Tl_y*g6Zza_1qF$3araB#5AN}Fy~n0!9Swm|A7 zQ%6Whm?Gq*B`!_{cqX7hybfIgp%BzMpnJ@~ghNQ!VALPegz0h#0)%AL^sJeo;e}U! zO-&70_EQrVCx%i0c=onCQrr>Pq`%>BDe1s}z4{At5!wrz704@y3IH;+;G4{u2X3C#9sHsg(mRkYz0uU@v z`?YvKx(7r&#~Lcg$P~XMwKg{H`BI6BMn9SZ+9YlIa1cc;ttQjaw7g-{M+>jPNrkFw zN>8tUM?15o7X#?Q0N8~sE_Br z3h($wR~!@eGv)hS+H9TdzuU2K7BrCO#f71k7eMK$nPFPH`Gx4vXPhYdSUF<&_Bp4b8=V|U0TVj&`%-sX;%4sU;uXSbi`++V6Al$W{b z55{tm*>G=+3es)K2&RR;wN$$wsj}9;JieOR*>l6Q&_$Z#wPsbGt@?8Wmk5FUI`cWy z{j7O6y<=}DR6E#UjK*2~4pOE_+;Camg@d=AwsO^^DN?h~>vY*WVez13$`~WRExW$z2E)@*yzqYIaVy za8!&;peeB{;fTd^b5a;;@eTjbTT#pFQ1$WqLcNM|ncl2NU>Cj&Lba`7<{-It}s zzp`eg_`6G_X;MwMVKT3f?yB>-lbY_!BhETETBFPbE5S=}^f%eSz61`E@ z?3vWR`LgzH$@8m#?(Wdp|9+5BX61J2QlU(dlZ3-0qL|cS%6_!_1jka*a<9=>H#+q< z?K@-(T7Ldmy^+#egk*Bwmni*Xjj^?u&wB)j-)cHP7XOU^9tDK^QIW-=q&b*v-QE&C>?vS9xA@OQwb53cn8P6z=cQ?}&cm*0s)?Mfs>m1+Z3XuNKpu@S`M z&#SOAD`?ZZ8P%Xj#h|mjbkyScDdD8X5azoo_{0x4_Y3;ko)UTBYL4$}wM}CEWUlYw zUqU)KUsv5`mG*9IFp;QG_R*hkdq4jekvd2jy&L4p#7f=16C%~FaY*a>>O9=?ODyzk z(+~?kp=nshsqHSys90|4W}sbP#gLbYoC($l5AVZtH*#zaLbTIuJ3!{6UEyW5LyY2_rZ z%rkqO^+$hcow)Fp2r=3)~Zh>)QFQUjrCDbIVIg)$IJO0~}7LdUDg?!Z4drK5pW2<7ST35hF&*4r%Q&l-k-(KXA@=w^N64zmH<2nW(_{ z`i5S15EMpy3xk7v^nl8~8j*=UG(8di8qcrkTw+!F5&O5S@%Y~6{SADE#h#fr&Wy~< z?)J9FipvGA+yZT#&OAl&SI)dj%0t_5$f03w=;EBbRcoXgcG4eYQ%CCf)L`~wc6SDt zewe>x?(CK{H4*QrMp46%bKfe{k-B7;G97@>Q~!d~BIfI!*_h^}=sfOn@842`|I{|GgQo zs1-+-0~nhw1>B5WWL)6?8#TNo#KhDz0~n%4Of{@CO}2N!L;xM-V{lawDvF!_ITcK& zr?Xz&3wJvDSW3v>z_-$IgXm`hN32SIl%a10o=)9Y3Z^GHCqK%6!rwVaD7j?#5uTJp zA@T}hr{)o*-QcQ&Men7Sm-7)-1rZ`rTnmd}&u-X0JuJ23n|ot`P^wFhp*-t+R48fL z&YMLpEh^Q`JUOODC6i?@UBso*R)HFrvG~)E0N2RJOZjIX6~mNj(0c!-%g=odA3fm< z=b{+Zc_ZQgKp_@H-@=1rQTgYb{`D&-M8Gp;U{cmep)#y!F2Vgv@kYCEE=A zTbJIi{dVuMj`hB_JEeq*JN@6cyKE!Z2G2e#4DXs|BbNGdEUsy_354i|*CwOT2q{KYG`` zyXFk9I;;QLLa~sKi*uqi5|6zbL|x__f10kKX!URg`}OC$4udW$OeUQhmUgW} zyVvFwVeZ!at!^<^fzYW(3KTag2>RnMA0}%g^z&r6Nj?gQ$jk1PxmNCiZ~9(unVH+} zF6?h9(!vhsw*}5l&!PrGBWw@pVl<*Dcza%viZ~c?N+Lg=vp|Ud`(gg)k-fO1HM7_w z6$17-J!cys^^8jd|4Uhi!#kN7M&24Z#l=Cor80s_C*zCPwP;%&I^V!@)J*Ml;P<7B z$8$IqL~H2CKA2UoiP~cew@Tk!a>=c1pZy_ykK2;PQ~M#qJ_yoL(#R=X?prZAu2!1$)DD=ls%$_A;>2D2r8f8=X4nu+H)8xbpBv!qeW9OT_Yc%x`*4EsV&Q$zcGWsyN1 zxgdL#V)R;7X8>CJ0eb46!7n(dNm{6o_0~7mUf{X2&PYl^4>HZ?lTa!1Gon$+v9DiE z%dj3DQ}?N)(|Jf&8hxDGj+%_$@9{?+dDpI;{s0l-S^x9QHH&#ze1CeKcxMT!56e?z z!N~Rn>KSrU7|0tj6*u|e$SF>Wy8ZQX5_Tr zOGKx2yh&Chpa^UWG2i+huKAy*oS80WBcHJIB*#=HImFOZ>!|;KPhAX|limNG>h3+! zPv>f`0`!zAVyi6u_#cVnysTvQqUEs>a$ERmUlVPP#19ML)oWk1z&HE9%NQ&g;5eRL zZ|WExVg|IWKU3I#(i9H%VGS8mO&DjxdKR*Fo`vCnQOZXNfk@F{9M7mIMW6Dvyn^^L zsR;I}e=xfy;?&9CFBCJen%#6DbJI7qr9<`J|-!o8qm_4`@s|8 z33~BC0tl{;RY>OyHb2W{x_TVIJT7pt%^n?O()Lc=Cn-xWOZK5biekt#MzT2&_kfSK*@Bcc(oL*D6XKTD9sj11x z$hr12-_lX_XG6{f<|vbTE1~RM_+?IY-*v0cJvM}c!%5v{c4@jIpsI>4QTuFf!=qfd zZfn~JrehJ|fTN}cdZI;Nwp?6()DmP#FdqehxVZ&A>Io$!l_H~de~^47RKZCfJ2lQ8 z^4~Wu%ny6>g8()*IGoVMHDU28@Qv|=pP}lFfB9UhW+H57pDam*$IC|xqa?)z&z9Pk z!u>P+qKb^&VF@WoBSdC^M)%Rch*U{byZ<-OAwiSy`LWb*l=(B?&u`46bR z5l+UZRhRt{TiyKfxufZA`A>Nzg{i5fcZo>Jk5%T7jOeFB49m*Lq3i==uWZzxT7rjG z3vqbpM1c z3Nl?jP^BXEew~S$vkz=}d=opy{o2M!|m%MuGLT`1z zfJt?s|9`d;19qb6{#ZSRP=%$ixnZ8i1c^`$F8v;a8S-rMqzt-YxQ|fIqLyBNa zONryr_9F%2 z+4S;bPtvz+q?KRmHuR7r`y#6%_CzX?Kl=YRnoLvrX>Z&2R7_SNn3_;B*JRN%6p4!s z0g}JaH-+1>Ie=wKb;&pA#4JCMA$FwI-c;dq{mT0M>(9?|71v2(zN3JZ49v^%(*u1>Ap`pZ#OJP3r$!T!vB`eiSZU&xKs)9W_i;F+P1XF7R$(#Cz@M=~^7X`o&FXPBd-^0t zn>Lid4Wxh?KVm}e2m%1L4Q&WZuUa?5b(}rO?JrX9p z_wwRM3r9ll=M}5rwhy6?z~GW%;&bm&4_tM5*)I}_5$(-N>HCd5#&;*)tsYL|QLLo- z?r0-qvyH;(+*|dr+ToTpzr2T>Mt|62?z3+U_Nls;d$ z{`A$Yr4b0uYLsV$s;4h2*_4dqyrn$vW=&V}5BH@g4_36f^Kz#$%oB8*_V+AkM9hT_ ztanobMh!JrHg}X?3mK(&af5Mcjs|XeVx0#vSa4)`Xb_`U_^&^yToIuAWxRQoSub7% z<+olLwUlHjxu zF|&`!XxjTj6Y~nuphrxj(u1C+mVrHIp0GhnE3qw~AOFRQb%*POdeAc=!U7bO@tFmm zIXwRgjgpF!hUJqD<#HD_eH;5(y%kKq^``!8+PJ zP!u?=U}vVyMJvKXAebD)wZFAl9|CexcG7iVeW`u%M6f&PeTCE(f*`Ma%^=*qNW<m0?4iY+PK#h9FZk6eQS&tLy3lJuck_0 z9igrYpx<~iGYf*vow2=3qU*sXEvWj;*tWf!y*(;A^~F7Yn~^ERx2TCaSy!L3xGDHL zWkti8rBYEvG8eK*qn^|MaHZ+yKo+*3$2Rw+6$(NEy30JWN^ znu?xkH`AXTROFC`9sIDJH0;qg91i;G zw_Gmc#D|r_VVT=Gw{);Rr+iq7gyL9zt8(ZbPuco(dB>YL$y!GwU?23WYKEa*L2YfD z_`1DR)083oj(3uqBZ@wMV##w67)*~o7~Vo4@W|t&0=Ll;RDNZXf4fb_H{*l5k8e*d z&L$8+t?C1T%h)eYwkmhFmXdch!t@;ycCfo0E6&emP?v+c3%x!+X4?CD&X~6$j`0Si zM{@yIkU<>MQWN5{G?e-NtYR@sbH}@#%xmVKi<&_` zrLL8AQN-KR6J(9C;E~!mGhQ9!jrY$y<@ukvO;nZ^3C~k9<6h9E2sW~m8HLC$6?tpG zrxy$v_Ajq`mV1`VbQ`Z>)mt_Hxb%h%HvJsr|-;XKM_f7nrQrMrC@ zye}Q}x60lBQaaf1(m>uan~pe;wjfp^y+2KWwj`vSjb0Lq+;GLzjvzED^=Sc7F^#8f z558*NaK7T(vOKNF(}l;t?aN+-zbi+4sztu~KEIyJjYlK&PglPu>i19hWzk%%dK8Mx z>>!F?GRWPW)RpFs`tTR+E#%jGJe|2@2c#1&+Xg=Eaut_}^~YTNY3_AQh@Dw#|6&>g z%4mAe-9~=uk+h~hf%q#tT494x0w1D)0B)PqtMQ$eUE4vW_m7K8nXiS;@+|G&xbREL z+9)#LOrIIZo4PDtOf|iY8PY$y>myC&HMDAna-}NQezNfUV12#)+;xD zJzSdT5~mBp`#b8-N!jRmKLgF1OV~Ear%afs;kqNTlXdl3j5wJHP6B5fw!EBjM2)Kx z3cs6@Fs-oz`l8fuloSm#c#!^7r}jtiuXA|#Vi-&P(a8>4Pruwqj&XhXy3Tg=dMMmTq_}i?PirJyQ#DT5 z=;kMNnEU1aeUD^$LIOTqJl;+kAS3&djlJA=Yn>k)iUd<$ zy|R?4u2oN2DOGHGKDm?%kd)ITxRMFjLbcxRH&lxSBKfUa`gf8ryRYHid(b-NV=Gu{ z2`*5zKRB;$OPb2Qnm9CDQfo3m*!{LFTK7cfK7>_Ad&2fkW<_7+>RN^DsQ_XXf|c$z zn9*jioOWEN?DTi;&mXyylkX!u&`a6AGaOqlJEKqBB)ZO>#|kkc3+@qprym-xY-8DTnMwd5E=B7xL{g|plF6)^28_uJ`Lml!(=Fak<*&nRs;xSdZ*3YiM z*Vh@ZproBbXiLcN^yP*T+?Y~trrh8PZ?uX>VYV1O>UtDjP5UoQ!|0aa73hH=Q@T|E$%qHtSP_w)0-xiB>GTfn1j`_aTD<@XJq3KiZ+KVq z1QDLij!tLW`kpygL^ctm+=@Jdr77|*k7yJ=ngB+VT%6BXo4$xe6mV^|?lGl>&DdWra7zMQeQH2OrNXY@-Dtw=8 zh@?^1nbuFKgV9H7Nh5=_4G}n(pQWDI7DM)o+RN?upM(Y(MyGUFF;;%lv2>)eAluZs zPcR;Pq3vY+XSu!}2FmBIuIy|N;KvGnV~4;lAxtLturrgcUy<^vuph;+SMKWk7`9-_ zA}DJs>_8IhfGxq59>w==9ujdEs7Js6@{eJ)E4vOk%{j**}BD12lzS6<&a(8;7Jl2S&yn-pC zPO#_*5z(pkhcZ@<>Ze*pB7OyCg)y@tCfSwE-`wq$_rEl_+HclILJ}93dfNV?l12$W z_4m)Lc$Uf(Co}P!Y2;)(N6B#H1F;0-N00TBSPMHjb4+j$&ab;Thtii>k@uX z%!yKI;NiovUANx5WYY0IW#oP{iBC?Hba9D#anjLPZ4yyE(~~ZcMm;Y4Z@QF*KoHl( zXrFou-`-XTqTt2n?&cE#Zx$+z&2DSXpcJ3Yji6B>cS(omHhv2`{;P4_jr-OLnMF}G zN!f4Uf2RlQ%iAL0QPx#OytN~Cf3Lt}sKtX|^a(@cMF}Dq5B#q&4C>!}gf`QE*luvt zyLbq(PC6LXYpjmjPpL1fnid9s;|Krd`2=SU!TlxOEW+ps?<1lBVWU8%ejlv)R%MO8 zBA{=Lexj2rYKGWW3}2N)WgmmNE4G4=>o=DxJSiK3sSoBc3mI|9n@>L@P*`B5lc=(t zg6(Iqs^?pE2dlrnZGmuB5UH}Dq?D|FMooq$H;;^`|`5OdVX~`IkcaBV;H^Is36w!*UxyRE+YKN+TwST zp62wTrioiwxTJ{J?Poe#6)#Q z#%jwH(ERG^==dHU9s-WtFc2{VNdL>jqvXR8it6n)1M;+ zD%K~sOcl*KecEqVpGm#XmZE$doxe`0YJeg!4tGntEG9YjYFh@2LFeJm5>zd)Yy zko}E<;os(RL1!r3sQ1-|eCaX!@l@-A9WJCU@kjW3{ckoO_deB&{mYC%4AT9hO}=U~c+n)H#L zg11%Kd2ih1ddv^Haxma=;;lJ9ViG}F6lo;ckI!#1iI;x;QC-SuGia^P{iLMi z>|!6k)%j((5oTa2S8dhg!X{=@ZM;MJ_49q3u0(In(u2Rxt=C0)CCbW;%{(GOJ>v-~BFUoy*InfYCxvpF^||nI zh}vYf`oGpYJBd-E)#zp*1hv?gs$2MK8brmjLChZ1i<4rvXDh;PMm~{x-61Id_L=|u zI1xX+_>4XvYwN|1M2>zOf+Qz%C(8E#-@!kIpx%kCvgiC09nB!#Tq>M4=^BrQEiA2O zv8qvk27#>j{s9)U0`~&JmW#m&aRb58#V$}$h1{H=8nWAeiow&dfx%u>XWGJe_rkYu zIPMAK|@H4HW9~%#*^U-qI)?5Rb)f%TO zhVRy?T{dJ_g$2%+n*yI&)Q1Pfr_1Ry*__jd2Xmt&*=W$|89{{tMM5;qd~LXN?*4#+ zxT$kQBH1~3*{<743Vzn#ZMR325LHPo72&CZaM1ug+@F|-dAbub_@~pt?$=#-;h)O4 zk$$Ci0h+#tFf2mdZ(9E1Ge*svgKZzlHgKc@BQKc|-D1*@Yp7S95EE?}HX}szWs8M) zd3co^%7{lND{{3vb>;e^t^J3Dom=i3Y|^0TPAof;?&Kad?Fst9Bz~~>t5GhZgssXH za~o|f*L&*x=i!fKQcBU$(w<#h@PA$Z@HjCFL`^SVz48PCV8AOQP6PyBKX_dNsn@8`SZBtO=n&8?6?X*>OwajLsskFspIX55RJcK=%DwpO8 zK?cxF%H#6O0y@uLU*7h1y-v#Oxt^<+;dL!Fg^)#*rIRd475mKOarnc*-{#$$~K zP0xM**@hdEPQ=Sowe8fY1?b+1-1YIzX59y2U>x$gKowJ6t)6U&{=>lBy#{STaTH~) zOtpK2=dGHx80N&h|^I*sjK& z{H3^^u+As)bF|_wK4B6)uQG2OAm&aOJv3bp#ye3A2!)wMI`ceTaU+cFx&py^rn-Ld z=@sf}<<;)m%y(Jz4Ao+gO6+S@`n)b{eM&eh3b&m;5ww04DQ%2~$CnC|G3|1b+CH?O z8%O2Xm#$g&rFy=>GN%3cge+f;=Jit7U1YQOc@{y~vKvED1?K`D*r)=-GP7E8vYY?2 z*kM!1Al5Cf+bXVxJyF9x*X(V@u3|RJF^7oodLjnM%4B^;4QEF~d0Z%|g|(|M+qzJF zpO41DD$U~4oj^k9gHnifTN`5g`!%Tz?($69SzRnEPXj7?9=R4HPhGzFE`qroqC^K9 zjlV($yN<_N-NK^PW#Ai?OZHP(j;E~$=IV9@F|wZs@8n>bB_2f)G>12_Y=-QY<}!S1 zF}l7TsKjVjL;bwvtf2_5uqmrNL!4U!kH{&q-0uhl;kizARTu@Vu3d;cF_VeQDzcR> z*W65{W+z@y!jq7NVmTxsyl@J;s2eVd;)>M3bS_KPt@YGjx^Gpoc_h|ZR zfM)!%kLVAp$3mqEwYC5>`IS;w-;^QCE0YfLv94GXCq}4+Vs^>02ucaN#~j%F=XO}| z%7VB>g4@?zT<4pJ>RO-+4*$1=?O5k#XMvo@7|7z7+itRoi!HsFc9N2kG^*d?Y**gS64sl>+1vWA>a`NEC4cqBml_oVkkj+0OX3q=)u4o z1*q?&Vu0iThe#9J4>Uey4inA1`Zvy3_Z* z1r|-=MC^#Oz>i5Rvtccm1jsCefjeSOP7ZKpnqOF$fwnLE!oWQ|KH{N@J3C(k>7bG3 zF<=J+zX9dKNi&XrG}t}I>+5SE>8`1+re|i>C?xjOhMu1jbC^X_N}&d=j|x8nnR9+v z*dXvt0=9BMv(?qr1(>y9ye1&$K0V#r8d_KzngfwkzIVl$CAIVZE4DTO?yCT3E-~X3x`VzahEsx=)zBy8c#rK3rd)CnisH zdU~_hB*%1jp(+=j25sH>i00d;7qiU@)A8wHGF-j~A3!Y}PBrnAe9A zW$bSY{~RXcUmtF@F;~;E)Ff6Nj()P7z~la6SPYw5Rm1%P=@OMD zo%CpeMAZJq`PK^KHl@0mB_z(GfU7o7428Ir^Lr;on%@R#5}UKEZ@Gt;VpYl)OEk5r z`MD{TvPi~~lK{5nim|cott$V+mfn{wWob-=Z08drJ$ySxiP`pNmwyd)AvJN{R>K1g z%7RTr=l;7jDT_^?c8J^8F1By;K!u|!rSkV0aPjO^i|~A`pI7}pB-R8ncq!m!l$TD` zs#|D`uqNv5DcTU&r9VzQBny9I!!7e;KUGp;wDQ#%6Gil#P%Uu~Q>oR-eD!5G zVG~v^SKH>um2ouQxOo-pCwemliL!wpgKdL+vgrnmET=eR6lry5`jT#Y&A7-l_*~^zN2?>Ek62I%|D)4Sn)zF~Fby|Voq4xY_HH?Xg zVPj{vwjTBBIKQ}9XmP7nFMiGEXa?*efxf`i)fK3NWo5&G+6fdYUkpdRKH57wW1^t} zZvG08SqEc~nCK9alIkfdSNS}9gE9q(5P*jR0RcgCb29-R9xp#XJuWZ~Tie^yR8`HH zIiQjP$^(Aq!;1R)bWY0zH;5y!R1**o00xn;;jj6*fSnT%QUiZVLBZFALY}Um!~u8c z1qHIJ^495x1Qqw|Ln0g;V3}B`N_XY{2%-=0r2HpVG&FZ~bW~Nv{pZ(kbK`oUY?yK3 z2HX>YEK)~XyI3{Uve(+c&=7bysnV%Eiqar=3a9-33}#XCrIzLOCN&i10d==62r+Po z=%-g#t#K!#?yW-a-t7Qm8erCWuOR$5`GgpBU0ga9x@#(0A)-;t?K{FySe-LNB$8`( zH(Z_9kdbBiEwR~9-g!&>=w}m0t4>I%r0;7m)#|k4ml(g=$vt@ImIK({yf{-I77LQ{ zaM%yft^2WbRTVLqu1BWzam_XN^EQYsLZXP19hU`6Nag7o3>C-hx~?BqkLA^*LMF7( z^=;(eDQGz6BJeV5aL+m)2+vA5lT>_u^u05VkvwU$k;KHSzoc#bs7)t@Y2&O&P{1H_ zU505_S0ZcEB;gfeRNFTTO=U>8Gv@$H?SutT;mC&>yyao$Dyo#? z&8pkCxN1E~Ju03S>WsHmu{yr)!u*8As2)~#MM(>2{)}YBS-dcEre|Z-ansGEPs1rL z3|DCaGj?QTz@_zIJo?~gwB7!_G~{3v_W_ajv#*<+crkasz3!lHmB|VB9`&hT{i^w& z5J>|#N->BPyY`Wrot2Q5i*SqSS9*kDhQuUy^Wo8=guaClB&eKUE(?Zx7Y!kT5QYkJ zy&IQzU?B@ZTQEg5jDBsNA%2W~4Nq*s=;OA8(ePK!!AmaA9M0DH71gTi3vXVlV}|P9 z{Ah%11HuQa-mdJ^HatAW!^dl(7s~%0L|~YB`3&&g`wF)8E26jvuoQs3ko(~*YpHs2 zqK3=+pA6h*zyA=v(9zYkW+7Cii%(2U1nwfh=m%1(1#pMMtt}_uQUFZAGlWvyL3TnM zK}}1$*D`&6eqN)+2v+*x`Z}zuo13_#B;3EeLtK2D9<1M;%}qkM8Q`)2l*lk-Wa+>K z3Qnt(i=TgCW`_ISJAhJ3ma2nAE)}!6wY7D0|gCY?D}}#x8`}P z^#+jvh?k@@KhACD$}b6P5$2ZE$j$CVSylPL$`&!n3S{gqScIobm*c(0ePjBfv~qn>E{}j#%7jwvu1>$)62;fkh(u-+_lW(V(xR;B(U`DP z_JU+@xr%%;vm;-Q)m{Tx=oN-T4j&bFeHMavV%2GgYRp?p}LNT`%pvFKs#&yI8*2JM0?LM;`krK{k# z-d{}Z_chjira`wV#fDp`xtf~lqzh2@hDc@QxvJpcnKfc@)Ir$>u+x-7ll3ppX0B{H zhJLHLR5=!N84{cYgR`u3q-2Keq}w#&<&a7&Q9(-~UOa!Zn*jVR zNPC{w%24rwXw02kpZlYV!0+7}}sv4N9oxQ3k5K|po`9lm|q0@_CWBFV!uCa`;$PCO;bPZ=PZGbBPd9i_==ORwKh?kJB ze0Uk5Pmc(!8wL$)P1;uG35-qOM^cSSlZvA&Fdn*uiubA;TWv<{DF0F>VWh&+QgjrQ zs_N>Y0q$F79JM$)T3R65cR4A7@~FtmZ|>|Uk)UZoTE?}%M*-*guTWBZ4nV=f$n$|& zJYZxwH8mCZ$4LB<J0=Vq^!ig-Z zM%?%Q3>{v(BOwKEbO*oOEZG3Zo{w)?m?RWX1O%??w}gazKun<3>>L}L3NCU0?r>lP zy2WIS*uN978&*UG=L=Br0&1r0?6rV;BcSKF)Z}F0Nz0+E24QpBV5^C&Z$vBSJir$LB%&Z|?Jo{vTJvjZ00c5peAkdK`LovD_mQ_z7}3_& zp+36Up4Q2y#MZW^uy8U#KU^}5_wNvKU{=b~ z*at2C`S6rjY55!_D>?0g$71Twoj=zT z>c z+zm^9n@0=NzOeCm@Nf${0p)MPw;nSY}>{ui(vSOoegVT5_S6C%p>Zh)yg=ut* zG>Uu~c_jZ`VsW`4q3HV@8ZNGD&wSNOOj~NKJ)ZIaUz~cAt?b?V+tU0YP^-au|3sRh zD5Xj5k$dJ>t1is7@_D3 zyZ9^|>QCulV}&gWYyOlHEW_y5`Db z1S@6{Vy~~_aNHN`aw0c>7w&eQDL}MB@GrSt#QtdBgpw8*xvO#;oVt!~)3JmeqEALF ztjB@d@rdWyr-$f)>XO8u3Ck;$Sreyby0Gjao2cJVYVtTxY)20fz(u%RC+9+xAbmaB z@m&E_Gas^?Yy2Vb5P_7*QM?7#G(>Q6zzK)OJPhQ$1yu*e@1IY*7-oPbm z2Yo$QdVfxkD+wV!f=~pE2@R47E^KTCjOqIrH$g|FLOonlhfB-GQ>q5+-7cssNrheZ z$4`yy;1~B-aJo@ypsP2oBsU4#qkb=NBzM!6bT+<79utF!^~P!bdU(tFNs}CUDUR) z*qz!6x-PunZ}~;T?F@lmb2rzjb|g{vRm$O>r=*Ve?nN)6g&{IvVJ^lLKP* z=4L34j)xpjy&y47j*Mh>KA!jU^IKll6e?Aoo8_=8WMyKq0W@8E`;BA80s8*=b0;v; z3*;JV)k}fHi%6Kx?U0v|p$iQ20>fnW%dLPskdKc~LjxDU;xQu-F$HPq>CrGTId5_V zkoExaM*x##Qh9(O-Kj>0y@rN{U%ya*$zGuNQBXkhJxch?7a$2RI^?=C5Bw|u5DN10 zKz;$3t_6&$N=r)tHG1>fD2n@s2Yl8~K&SzQLIENIh`9rVCAnUg@B)ZH0M5h5&mRUe z4mft_PXR*k@i8&DXo!Fyx^(L3im*BTj44}qxKqUA+4=b=#MGt5ML;@^nAh17$hHDt z=mTVKoSp0H>s^5S1Heh1NQgXzv_!sMRg=D&Wo^?^eFx{M_Dyh()%kmm5WqPWqR@64 z)eu%c!;rPcx&_9%BQfbPA$cw&wuplkX2~mbGK#VqvWmpB6&87#8+n(q2Y570jj9%% z?N)Ce7C#FzCS0Dh`X0%oF-|)lE`e(V7g>4evNBJ?%N7Mq9vhZkIGIerBEPriAx*w3 zJ6Tc|@+JGOC+lisd^!(JtErS#Jd;8j2se|G4Yl)L*7I7a6T<*ATFRE6?<3Oev z#z(&spzat4)9cvcKNt7O3v1^>f9@Cc-!YDQ5yD8hA-#M|h%@AbODNCgTC^g)y2_V=B!!Fe9|c3UPA9^Nu#S7>1g zr^O*lEmjgMC?COFsJocS=Qkr5y!P7}UfmBzLz12P9A@Vec+n=yc6%S14bBk`N6CZH z(YaM<Kjuc54Rz8?P+KBp~D>bGA{-?p&la{blqF><^ zzqL1U{`u9fJ2d$6D}n`lfX9YR^Ki6%{?2L4L-N7Pb=InKl=$wR>PuBDPXm-AK{GE?k7;J{GCo`y5h}M!#_?!^1X+g~mq(#ub0J%ajYvjTsS5>f3kN+fM5Syk}+vlX~x#4!Z<+ zC>dT&MNiG6#=hmlECjCi2x^frei&EZ>3WB2jlf?Le2s0g#Wox$I}bmk_wlPQMi#-Y znDv-*@+CuPP`(})jUKEbyLul^xG-CQFNr0a5sV-V>&E*OcuZV$lTMN#4m2NY`!t5R zC;Z*+_gsjgLP4aq%m_N_)weG`@UMIXN8Bg5-at&k5P1oHtq0CZHnXn=TdY%^`3vJB zho6-?IsL28;IX@4NCz3cMn53!epTU{iet^u7Lc5Q*~1yObiae1No8^NuW|cB<{A2z3+e8r-*=R%l%HgjR{%H(00>ViBRe}gEp3sV1)#wI z{68R)4F}|C01JzdkdW-GEFgY?-QL}utp_w#7626Vg+l@a?=|+d6&0rog!0tr!R3kB_;F*6pm(Q2S6xzdYT3hA!RZC8L?Eg%{Bj!F_pnOx&lk$?CA zl(P-h09CEBl9FQ}0}bSi0R2mV3IeqyAZK&bCAa6q(5q^7P)<_R za*8mf3bly~rwiqSy3{vqVP$3KKa_5VKL;ZtZzj-n#(-dxoi!-9QEc%ss{z7M2IgG+ z71Mi@q{X||Qt#Xe=P$ZSY$zcmC)pK2Vzs2NL}m6(e)v2=Zv}dDB%uC1ERW`SKCj%x zaz)AD+sv$7+#kr22D@SSirY{zCZ&-Wzn>*a_*ZNnsV95SG2f|EHe)T8{yR*yeN031 zn;9gbA=iL4Zc5F3x}niu+O6|5#l>S5#2&tUg{!x>AQvyk1-d#|8rQw?`-ae0wnWYcD{U4TGFizUA?!9=ne zFQ*Zou3BjWEn-@>X{&v?0)w{+m+KE1`UnIGHB@p&me1zRgZHR0n>|NgYq=S3KPxeP zXB9|SUaPXY8XZg9okbFrR>v8b(fS8~nQmcT4sHZ#KAh``dCHi6TI&cl-9usG;4b6}|Cv0+?CTHf?6@Gtj6m2Q zeo)ePZN9u42-SC8s;@vMN9twqaJl2f*A8ds!-$-+LbQ&Y3?y(6A;F43!aM9xH<{f| z{CV_uk`@BUO|V2V0BYxt1;@X!!DbTX%Au-Lwq#71SjkW-WCfe?&;tBi*jEX=WJhm8gLC$bj|o_gXg?ByR?A z&x9H7F%#t`c_;TKh(wo*OBlw|U(xlAmro$WNHx1tyZ`rfWW;`doedbe@S7V~gtXxF z5$Ax$r`IqM-QW?$wO4apT4zcMxh@@BuVKw;0@l`bq)O|E@;aQ*Azw`GTCjJE8nZDP zh|~OZZ4uzMyAi8@our^=M~{y=UUAGCmaFsWGYseKG{ybxR}<~3mqjBV-hBkCzH0AE znLS?XrB64C+dI^;r4I1p70U>OcO7VAHW~u24Q`45%TVQU@$fhUikenV8dwjzY17sSbcJ<9+QI zTD?z~S+(su6$?w%b0S=PW=TnjUU!gIWZuOEN@^-Uue%VCEkKbTO{E?DBjwl?`&Hz` z1=3Eeaj4RClkB4X{S6-e-*^KxOM!Z>s>(s8yE-VkTI1V2mV`%8 zeHg<2BE?yn>x9Q{GfF-e9OzsbErAk#@^TV37XC-;7c}c{vNQ8b?S_M*0%xj;Fmiy? zm8tq4T;4mDmgx!pMp`Ssk7q0`s}ua6OwdLwKlNW1y}^WGMhth-BysM7Z$P}!!m56Q z(5k1}owKfT^@Fhi)jqB?;7`SHOosZuwZU*2fXx8pyL_vvSk&TX%$=-GjLw%z0RHdf zRI6R?VsD=WsILQ={Lr5%fkU%9uR&ITe$M*(__H(o=Q|_-%*`2*uK-0f;MQB6AhPTv zkKnWa93>P2GW~qF2PpJ+#ftT|?Nv3~KuGEzGEY9aG3*^;|IJ%sYdyn^v!Lu{yz%6T zrAYt5Fb#(rv)YPyYd0$BMxR?ofSBvjzb=r9B26sO)NDDLB2G#R68N3kklM%h>FZTSm z7vLEvwy2VfYa&jJ;TUr43hDf4;sc(Heggm26mcW|pAw~`xP-*fOPeI%1jfl(4`{p- zw6ltehyZnjPJG(fqNlJ|erIDj@`XvLK52lpj_7TfQ8cwo8!61V3X+;VGBJC_;rMEb z^5YHT-*g!aMbW#}yLC6L{eK~e-!YuOPsAnOABDH1ZG z`7?)3G**_e88&0RW%BOd`u!S1pg5&oo2e!d7L@D3IKp;(b<68nQklSHAt>X}@m9 zdY_mHd;MU&j`u4yBj0H4R*_*`@HP>&9V7XE~oSDlf(PG!8tZLS_dp0Uke-$+Eq4QaJpD8gj_0HbC>9 zbST5wxk_$iXnU;pLjf=phURTkqS}53c5_6(8pADX?8mfryBx@DC%j!Hi z23mGJPws=6I`gd9k=ot$6SsJ6S}G;gZ+e+#dxVZGq2Wyi2g~Q=?ylStJ9yuz>mhf~ z(I?+yh3xVJNy@=q{Rb}CPiwyjk@1=tF|D-RQZLhGFxBwdPMVYbM=iwF8Xg)0k$TIX zP3^A_^KIOo12kP2w0f~C3L$5x1PniU9IdRl zw6X7`tyDcFaAu_w+m)@oP5Q!g4Yn%pC#L$lxWMOyp2eJLO#0ViN*93D7qQH^LX>1m zIVs_El@r+J6r6$Z_7-VEFA7JG+hzeX-mzO6z?}8f!8E?l9_AxX+_o)5%O9DTN<11fyyw9llJN0-bKRq?o z|32y&E=aSgl=r>O%N?@SNiwAU-1WK&um}wIEvqGBdufe*IXqH~^*ZYh7cQ0AAOF!z zjL%Yc#Ok1uQcx#b`7po89uvp zfD{ENnkHne>Qi0IDe};O8DhinP1V15_xEeXTJ9YkloX7N(6PV7$raD|i(8S)sVQiy z!d1zaDrS~X{JI*$PFZz_sMy8sN>FHN`(f#<#n?Y|Lh?q|IF0Q0f3&rVNGI@EKb*aR zv^Tm+xM|snTo=0NJU+FY8C$70zB>O~hYq;>`9ky$$NrJ7f5o|gp%9Aix6AFv`x}B0 z#0xf>lXQ-tv!&kj*|KNEyW?c1Q(_>Y)OLT*?IT~<WxThVvhAo9lzz zs>e$#wd-MN>7S8uGw-LWK+n`7%=WYO>f*4>NsK9hr_I9eob*~%CocjuvXzJZgG!>2 zl<}WB>m#oE`LEY-yoTjql$;exk8?Wp#!r}96I>wv3z|-IZ4%RzcOgO_oJb#q_`sVkugAo*ZBaIexWRbjV?~O+qS|E!HJ98Y0ePrpJIf zMV(nF_Uz<&QGUd!z`Hr+gqkmjyIybfqL#c^QX)SvAQBzt*xwZ|`tNmwmQ@Xx0Za1# ze!8$A&!XB1;7cMKdY#6r%)M!jI-zm_J)!BfVAjcK;PdX?rtfOzlTXJf8Hs13@kCTG zvFF1~qQkCEi#DD?h6_^b*{s3TN|KJ%a)m&h{lh7Vcah9{ttN*Zh*}gOo4IO!NP>`5 zX2@!dy@PW6eFUGKKX_~HIkqkw=IN>V<{Gld*)gZE5`+!_$y?}z1f=PxZZ+JVK%A~w zi}z(!v==F)THzl@2>GVW1$O7j{77#ZS%*6yt1Yh{mR%pr?-SbfXR2GejGl3##*9}* zFg}XEdD0wKWcgF?#vIJ-_dH;9molaZ}!$>%h31A|GOGEeOfQKJt+h>2!{R}L2jsm(1&;dPb5c70MDs<@aMSS z;lN!X6S!XbH48qudycK@A+~BqhZDpI^Dbed8i(O z9LH!b3co=@D=08!bT}omLbNgeAm3av-<5_+;&zd#dw!+-Z#yw!_-EJWGbH&f7m08h zDy5CkPw+Y)%(QEmyfCgH1-LHEVdw4sy4y3RLH{Xed3RVdm0~3Ge6@d-Tiv(f0+mYl zI2#O~6TRlpkqTdN5I?j#Nwrz!G4HJTeJPxJwA)H?6hPSk;p|R|A|6sUa4b+tc0xS+r5w2_4mEG@jF?`!GKHXLk?5?<-px5)=&0o^i9>%k;X@{Q1I%?g`Iye;1E;n=J zF3^|mV5!HX38c`3%EvcsT*p6nKSD&U*KYdfCO$;{<4ggl z5+WO$2L{yfCRj=wj7p}}V<~xHPH&eN-Q2PwJ|+%^*7Xkp?Yo0>AJ1EnUZP!YLlL&9 zRym~@RN@S&dP~)u=*v2JOjO#Qv)8Jt%iAwGG`^3nJ*U@J?jQE0GyPvDmHdR( zfI|gj)P>#)bayhB?yY%^U^_P|Ov|>rMJHr_j&p1O zT0Z&-#3iH2vmg^UN*GEnw^(#(KS;44$TmF_DVN7w@YoG@6K`VWcB{>(tD_rHnCbO+ zn$7nY0J~W%+knQl(ju}1Jf{dakbY!CZ>`??e zjK9nF?^C1UwYm^?^z8B^g^GxQXJ%qjKA~t)J44a}-Q0lVMZ&;qwOhHpw#dt+5V)L= z`cbI>Cw=2Q10=_N5GU8kr!J5yknDxvW+GwM)K>zrkF4SGKCFWjeK1hI zvK&a1jr0WtPI8lW>(ThLs3KQp_;T%I);L<0AmXk1ODdGtIbP8}vDV(to9+>N{Ac*E z5UWY}!?ZBMF|7C0g^f_q-pFid;Kgic!ASKd#ODC{@z0+uIghYL+UL*%@qd`JdEj|{ zYL$e++YSO+l9+1di*HQFpg}t>UPGRBvgXUsAyVubSq$*7c!UocFHC}I(j?oFIwKMu zHtbnY;i=oLjD}abqn|-ueRDr*Jr1J`4rIQ5OaB8f$OkXXK z^QfdhZ?*2}JbdUL7p_JOCeGfaPjKq?Q{$v>xOw;l(~qspsD9gxbOyA=5 ztiRQ+mffs1|M6{r#Fx7Bi^H5Eu4K)EZ^{KZ#sU@dP8b(u1N}5*C;QL;dno>H3mgeR zo9X|KgH4w>qDltS&to9I@-@rLV*cTyP=WcL`SmFXDZ%6$PDu$Ay+Ds&v6J-dcs~)E zZ+2NZpJqcwuXx(->dfSK3jCjV02q8(XkmpDR4OYViPbhf|G&B z;g4eMJ56(XCN1XU^crz%1Ds$>CNjO-^tl4xWBs+EFlPe%k*;euHh%jYk?+(vfqNI; zl(~XX&`e%@x$cr|JZ`lScIjPQ0Dp#}{3*S2GTlHZ|cAbNG~Qq5}m%Rz?ZVM}*$ z=Q`j`p@MD&58bEsdsk8|?P1?7r5SrVd@T44`>6Ev7bv71@F<__?aj-EPGSRhH%ZIo zX8W3!9e_vA%e9nL4-K(c`I7C{Jq4AB{u?KAX+@S{yy`1y7+E$0<4X-Mf0Ej#-r zcV_x+ z($f&WqMSuUsWxF9z7R{?Xp;}?hgU?RbuNw>MLE3#pJE@D8o<6~*bzN;?!|&>2-v7sWm% zHLrW`Gxwr{C2p-1<|=008GZ|5hrMfq9LNzH4b*|BuS*diYzY;aPHTb);iw`@NJ`Xo z8>+r4e*UXUVX^uA0(x$eywUc{$ofEwLD*0_+v-y4q-gO}H#3V-Dp)*Ir+#8vV?I-N zx~P1#+Hm{z8^YHZO45E}WJBs(C^@{p@T&ma@!QLd?!KMV%SPz(-l_}o08}7frES<^ z1&j369d1Hl@)WEXqVs|FGdu`a5fHK7$ksJ1F6y|Nrzz(|1Ell#n=nni&o$t;4~>$Z z5kdP%!=NhPm*Qb(99U3I#6~OXqiSMyMr-6Q)Nw<((c`704?Bz8%}ZiNan=(K4X0APY*<7bF`yA~mbVtllN@9{V)l)Hg{=Vd(nv81 zpz#?ErKr?fJCA~zsz>n3X5R4TF?WgzL#|M9@m`a$Tw#v*nt<)M7)rU8K(-)hO?32- zX!Ic=-&n%Iw^uZJ=rVywLH#pEf^e#@9wC8Ul;`A#T|ROh@VaScV8aHbprbmt zo(}hnFzZpR45smH$0UQqxhAKM_Zh{fJA@qwTqet(SUlc`S}I{B5EJZN2W@Z=REySM z&&HuQ;MR-gUB!Na667Vj!sozA=UsbC8MnzvJOxAs-L#SSD| z0!|;(mdv+p>ziNFe#OJBH@LW3M2^&Yk5XdoQE1GVO=W{oZNLICS=$8Eqdo5{VC#*w zn(8lI-k9bjV%lgnyiv&rlDZ`OvJ$mrnOt{f=R>pc5NLX|Zq0DSbCFY6FO%97oe9F# zXusiaV!N%<3rQ2v<^w|um8MobH5wL+){>_4_9qfs^+4T=(-01QU2;wf$hp;@@2HhaRDUNW6?q;U9a$V;D@nILgf-nBlgzgmO&OOG5{A4mD;G{v zE{vU?Y73UHZXT(4WN*J153995^!bBBUOb`(au&yEJFR$6=e-4(IC*&vSG$qk?2Qph zNl1OFv_dn)tGs1c`2vW6uQL-OdeAxP8ik%)YVH*oFyaqh4n#+ddzZ(m{`q>h=9PtPt)28X@s zO~u+Kc|9V5zq1iRran=MC}G;V%D4n(?o-?#kf4OioW-A>Bg8>LKLcNGruSYs670_K zlh?Z;zI;A9O=kwJW+xGvt5lHR8_IZPM=0}g(yf8kI#;o7r6C|(rw5uXSd9Td6sT&8Ll@xaK5I{{yr1dKRnNlViw5z zV&zGpSE5DOsE^H;j`6gFY)!O?t|bW2R!q8znr*)w74hlQv`z%B{j<=8I3hMv^3hL8Z#6&~- z;F~SPlL4~|n}jU42wx^2WuYj>qbA`XXlK8&QrOQ* zIc*zU>*`Nn#oFI=QU`PfH!4ZVeNopKw*2mqOya1lhif!n%Eu`+3r= zJpD1?b@jmDAYdCj1N5x0<4XbVWd{2C)8_0x57$Q;O=e0l39q4jha@C?4H}B71%iSV zYC}}M3||rMr>M{e21enTO`c^ukuXR&?6Wvv@P+2DL-gjDU@IzXQ7sqsH^C(JrZc0H zykB1x8q?;g!xbnFMsbI#=fb!WGJ?V!JZ&`0l+DRRJN7^x0^NErZIMhq;+a|0i~kp~ zTEw++rOk*cFhFN`6@<725#EgT=~?YR%2+{*-_ z&{gv8QWi~4k_wo%LUZbJggoHp9s{YYTdEmNGf_BXXJzZ^7tt}yW-1Cdua0x^4hNruXmJD2C2lf#3?;d(X@F7EB{OynNkqP3bK6j&i zYFXjXjvadhSePB5==d+BuWcJOr{)aEO4#@hyE>tBsRYjZthzte$4@IXn;)!%)gQP~ z@vvv5Q;t;;BPsYi@ZZX=Q8DK0d0KY`qP1#lzWSWVPM>TsdlbQJrQe z`hB~e8O7Vg*f4fDWKg1H-b#T;wp{B0G&q^sx!R?PbU?SK0cO?dFq)07RgjJiZ2 zL2FI3t$QGGAFAz{?%zLmZ1>Ish0ae5=v6up6lu9ZZ9ZcLmi);9u7^q+&>D-A)Oc6U z4;pH*3~&Eey*V=JI|QjHJeO5U2DT`wJi@2j2NAb)2xkI6H`$fZXxktIrXxn1%m=pP zDUb?REnYS9W2K&JpYEejM}?qeO*NfruEJ zW_CxDFkeJ_8}v~HvD=DcjTo=m*vh^27S%}#*eDVA|J^7S(r%&^UlY>}X_w1cS69h*pjRZ3jp+1rL`KG#eMSCxIxFXF3S19qqqQC{ z*7!n3_q)I3#PU~kJa=8)EQ^_)d{yU?>-O8k&GUP2py|H~aOW8z;ItHoE?_Lnj(;_x z>_|O+IkY3sy*4ZEqR=Wtld(tp`eG883a|qD-~gxeNn^n=V8{^YSWEY~`00FSGQImI z1K8lf3cdWyYKby1Y-pyvTUa{Y_?%`ir2GuEj-TtcUn-7p(kQ6a-Gg6|&4m8m+Z)i^ zIW)_M!s~Ax>;Dl)2~h-1-$xjbd=r4iClw=Zt3_<;li-YB4j})f+5L1B{yM4E6g_G!Gi>u6CcyVE~ zH?(80D}6$(*Xz+`F8#RC-DIW-OUtA9{AU1dah?L>a|4I|!QzkHBkM3L6QkbD1XUVp zkKq6I0;sw`kMfBq-63-T9C&vJ}$Q{&go3wgCHq8?B3{$ zxZ4DH`jq?7kmBk3hWmiHvNf9Ua8a)ix_pyCsCRH2O(qR%aELI!h2Ymw91+rBoqTWH z$xmP!;I0rE;gEd@6OJLh?2JN5f*~$BP!3C4`>IXe4<%s{Yr*|DXkUCU5 zh+o4jbm&Ll-p&}b&C|&3Hf9*zB`K`Fp_kjG#xtnXSiQGs`%(HgTA=ei?ZE9o>WATh zsbktsRdKySCgZ8At9$!1px2jbm;CiBbGIATt#z$6>7-b?xB_@<-!T3h?%Nzb8@e#A ze4$>yqxtbQ|840Ht{ zL_Ydr5sBSJ24zfs3CdY&Gt{Hrw1n(-LMZ6P$IgxgiXVE~O}622+-KJ7g0EE7l>h5M z9%pTURckhP(0_0?KaYf&olX4bYrtAlTyiwRx}Kz=y7{mA2uRh;(rBu{uF9Bq|Brf@ z0OIHJE0lz&mvU_$hwI72pfEUwR+)gv!yeNfyutj2! z%ol#J_tjnIVZ|hHN^;8D0=a_k05IIH&2zSSGug2d2%cwmtBl{eR0WlEOmq)T4Xh4~ z_9g{q0X-b@6iV|Lg$NYHPiy;cDB2q6salui(D|Ul3H#X3mu(vMeCfKWI*_d}@(1;A z)^{h?S*qkx1G2Gbwd*_Db+dKRyj_$-C9k8wi?TytGbn-FK`B z5=%&g$|q2Qmt4{4+Rny~&7APBaJfCZuEuOu9vy?T+~4{(R`q3Ga%f^*@CCK7mlYe` zMuB`5+s)SG(i($Dm)%qkU*iN4s}j)}295+V9>@o#Q>ym1LF>x<23```8M0Q#LX0_@ z@4A0XeRPi?t_p`*T(@ovt>M@2B&XW3g}x%&&Aioqi|`eM?mO#)*bh4kt8*B8f@BHj z4hIK=9*Imi$>5H8N3aiKL#XoQ?=V;=$Uv(2Zi8O!Bj~G}fl4})i;nj{mxSDgJY49{ zu+k18U0hNaVc(6o)8aXRGx`=0cb*7+(Zy%RrGdu)F@VMRf)av@&d)=fdQiSIH;EsK)_@63z zjg9%r0L1`3+#o$yzBE~yZF>u->bubKY_(fm5^4+L?X1u{4QlJ0!kDb&N>83J;w2GA zK>AjO=IkS5u{iijb-d=Zo0U8FBE8cI<9Md^=R29wC>#o{_Se+@hX-L>dRp#iiPJME zRl}n3GC}duiHt~eUbZ(iO7)D{c~>|rMO~r&mwomqN(Z+P!|M}PyP}JU0V0)9wGq_f zkq|EMNa0+@_s&p0&J8{Fw&YxJYUJD)=bFb9F#T<{R3`=4UDD8)0@n;MNcwn;w92&=qGdm~(w$OG+!%lq8Qljd~j#!CB>>qt)gLZR{Ci1M$WiJQ}>Z+K>fA6oGU z>EfHLl@T_xHf>zkXNgzS(li?zWJg^}dVOtwE^ukRINn<+XO~>1dfAVqSs~qwxn4;~ zHZ={yihB)VBG&ZPE`YxoQP8lWFHO&VwOVT4mJN3{qm1H-Z80u=y!+KpbQwdF7mzQ`h!7IE@w=={Kk;;Y%i;*W#(0#fdh=83QI-1jnTpFN!9 z54$O72>IRPuh-qC?B1q8y9~=@pqUB}3qxAPy;Hg_S~NxF9TBLjMxfN5`|9Uq*rxO32%DIrI#O>J8@YQ_5#hb^vl913@3f;FL zT{CV3Wmr0?`E1(TUaSn0dkMOhS$^0OI$GAklzYT*zYS%j#GjRV4S#cn9g`nBVkRAu zT4i^?-g{@`O{pjh>ReghE74lc6!(<;+sT<-7NhU|C59d;42GT2Q`(JrkSxfndwV%_ zHNMfM%gezC{~%_J*BI*Cr|j`yQ*((pZGkQicl8WG-!UC&ul$^r`s5)%O~=6jX@tJY z`5Es_txd}>6MK8s^&E^NJ`%kwShL}EUc+-fO|_z>m~=(=ay%(r6*YbKuDMG>NnuYU zbsjGl>~sLpqO=;rfXeVjns*I^Av0OR7B!5c+bt?>|4CVT=YRp?TGi^CQz$%8ekH8U zh!M;Rw59=$upnGkV@Z4as%Lq|c)$_S;($YW@g!NrdaVUEBiv=BBd?UI^`){r-Md>v z%8f5gJQw!08)Wu{3jKuBR1xl_7vbK`M20B5#bs!+lqx_K1Q+&!PJn~1tt(1X&T<>w zT@m45K9MVGINzO|r<$;@$=a)gi%#a&hdKPt?rKFf_bnf!u2Uc;R|SEUiY&+3;Dmn}9)BWwI0{}}BwJB==d)f8?L`o=28XcyK{>BA+?x3|qoT=j z_bW_$bkw4&#Ho#%f{TP?MGF1ihO22BF1>(0KPy!PP0> zeM{JM6yiJi1(ix`;FI;86P?@qJ=adb`l{A^MKzky{5$z48knU2%{grEY&O&re4?1I zq}doePv}~V=N+D(wpyx=4JL}vQ_8C8XN{MqeJYU>7Q5cCmxoVB<{$hgmK7W6^4nTJ zWR%_G8{lNOiB(JtnyXtfEk5%WT)yjBbXr#-7hD~FvC}n`{OyJ3HB?T$fW^nxcI8~B zh|5efJ_;jdL^`w#ooLU$t1>2$Bv!2-j-A8fhyYt}ZKS7}s(bUf_dJrBan=X)GDjLg z+#qXP9=U5v*}YDKORSgjd~iPGzv2#NjPPzFK_ofEzZB!ded^>_SL-*PEIfR*pl+9o z;+R(2Zd#o_SSC6AAs=FCIQf<6jc~1h)9{Be#R{e$4G8Ow2x}`ZSGI`w3*tLHLu|0E zD==_xXRgcyCEJ!HbZWf}#=PFU!pToMI z)xdW1q1rm7R<3SS?ck->>)DrHr^hYmP-xQm37_{|S6NcRyQP<3I1U8Y1RT~+n03*D z+Mc!he?(#&m>Q2t7D85?-jF<4oz9ufV1a1U*CS@b(2d`fJv*?gZQ!xBJk2W+x7^OA z|J;R%M5sAk>SMUSI6;M8qBfMl`16FR?~oe>KXvGv26<7yO*IfsfCUY=n(2!jX zb|!oP3b)q>Z|xuLH_>50){u#GCNqmQC7ozJLc-UT#vqEAZA>zuUKjqnTz5>wV&G3^rIC>857P zVYOc2y}jH0f^>bsAo^>nDORd^_`yJoG}+^_|L4>}s8qKIS@sgt#@z#FaDMGYwKaI> z?$-4s`r$i4UFr{G7-ls*EMq}9*SGG1ZuaG~o?5~r*xo}quPonb>O@yOOWWPB7DpKx z<D^Fve{6Ad1Wk4KTvo$=!;GPiNB3N+OAR#0W+}%Am!Gc?W1b5fq?hG!00Kwe{ zcN^U0YtDI|d++=4W0)Te-Mx41s@k#D zZJDeqK2(1+NlWy+lf?9?VLGy@YUm$%MriafHQZNQ{6>J!yRTFl3G|)PEiac_i)IUq^|4ey4^HH>UNggU&D7dH=4$7udruEY79WkJ;D64kM%*;UCFQX4x|v;S2G-bLFUB= zJWkg-{RhPE&*V>UD1`hHuuY7boH@FI|6EV-*E^iFgZ`iKs?{Ll$%eV`$`555Rr z`bLwE0iuNi3e6K~?9D+g7TdAsL2tl5Fyge@oe+B^{-D5gIUrwg3QF>XARPxSX;G4t z4t{TTT$mry`~$*W3h_pjz-OYC*0Hym=;WB{rlc`gO(Pxk>^?JJt3C8+BmsHcaF=Jx zUa{23lP+TBK+Q_)u|N_xt_Q!qPC49usS72X=uLPZIQtcYj2^?y188KS7>5>=d1`~fHvR7-z zRjZ0!D1Y)k=V>Kg)o@S44^=RQC`aShU!VC%h8Djay~R9eT1f{Rkdf$Vb1AQ{!M z+6R+&r0Ew+F%|Vz;pN^At^rOAR?fvXUd^Eu94P2grtHrv>k8>f+dhTyj#aoTH2e{u zMsL9@U+1x!Uv4<3B5O)i^wKM`(Qd-kvtYErTB^2Ia{=%?Bzn!_9$p>(719%yBkk`Y zA{#_AbzvfI=L?1AElPByBWI9qI)}0}S$Qt=$=09fK7o4Bani%0f+K;qaRX3B`dGG$ z@C(j6j#sAd--tX_Wd51Rr;oyCRaIjjaNxw;yJ;)gJw3v~;i2_>W*x)tndZzBkP+)f z@Qr=P{1x8Edweq~$vnmUQ0(0bsB!({+nQC?k6^G=-o7)kXB)y=&pU(vT%SZ6FmPa? zt7kREaA6!*3u~ToC>3#&RZ8ES&0-KNqcf$GshSPcX|0h!f@AS8HudP~*W?^(3o7Iv z7{g?cPy^;ZV+})R=k;2q+dddky6b#p@0WQlR9yV_T901M-&Z@!Po56+#(a4rug=wB zbEP0D7aG4o2a2U;u@=kHyCe8|cKX-5EMuVSe$@6NL%|KAPe{&RwIc6yfy`j%=xi93 z7NsuXT^@3~QxAI+&`AMG*tf>TS# zkX1*RozCk_tdr&VTCN(x@&vSq%d}pFwbzC-giH(=n`lVskdq;V+2OM&3u?*_1diTD&kb@33SP(#F`i@|JKN*G6ga*P zo)mSYr$1cl=(vA}BfdqKjD&PcZ|J;$r{c`SDa7<@)A;z3_y11ctDG6zHpb`Zr8hVf ze3pyPa}}9iXquVimfxEk|AtS`%nM8$m)&w6d{#M4d<#HD{`uPzO{3E0g?+xh&;f)# z?>gR=P$n%K4W!sf?)8e}tKEG}wC{D{qM0NpzC%Lyon7jWrY(71qqqH!7)`8e@s))| zj3o>CHdn{i(AaG6vd6XeZdr<*))OulzlBU*#6cFuoB}|+#68nYw>%n3yBNOBJ?Qu4 zs5x~ls@|+Mx|=&0j{+3uX9kRxi?%p3!ByuTLZ0w77xEaKj5~LgMv0v0(-t=m4)&b8 zli||vzo<(OI_hpaLh&>tDo{g7JOS}SWg=~VQH@Qj+1d}@HlG;;UpLazreScb38S?= z6FP$NXkL{GmG2TNi&HpkhW;8O9;|a#EvXkN0bRuvOQ5oe%1HWnp`(HcrFi^tPZ~cX zT7l$%ugdXn;UrApS9arLO(Fw11b@2GGRtjlkVlS9tP}nnIZ+GNcoVe;&I}gZvt0po zx*8#elip$fHBjXBfE3`b%lVjgs;HHvQjo#chRhOTc0~_-r<4bO8M4u6(hKRheb8x;;KV3Scym4KEP!OCO#!wL?41fA*Mo#n5@vB1+MpFCwT96Dt33}S}Hn6)!Wo4 zWlPo&qp@?GI`7t~QJCJ9JK{9-z(a$pzFC5J|!|x8RR* zuM%L*UdOA~cymcOhvE~(O9dbIB2g~kCT3>JJ0DQBYTsvGdpvwE%y5QtZBJ*kveTg4 z^frjI4Bgo&K29Apdhj}Q?3$d)QOpl+onBHz#C{H0B%Q97*S^C`_s-5Met3uLdfDB~ zLIX~On#*k2+@Gmc8F!pnCdk2gzkMi-lp`6}@s6rHdZam%m6AyZu94{BX-gA0u(?t> z7>nC;M&Y#Pst3f>$Wd%(1bFwR$pvz(6|(Bxuqpm^%{h67d>1x0&XKw6L~NiWHo^&?-Y?6MAYYmlRmsU{9tJ6TRZ(-3JLW}xEoNsT=_rWcJ;a09283oa?IyPyau#@avcg?6Z z7#J8-W0Ugf9gOw1p81PkI>rW)-`>LC8PyfD@Mxijhqw!JQ3%ar1>*NC^#A^D`s!8B z^Kw?ui!e>7b;+}|U?~MZ{FC(C%6S{R{7u>6){t+Gx0OZSG0TesrT!ov?)RmhoFkq% z$48sW$w-^*3xK$F81e1 zdkj_2pD%tNG?5y*76(m0yqKsqy=vV{vYf}R+GHl(2mKn5rNgQG{(}~^^wPd8-!!U9 zd88wB!`Geo9ck{Zqw6b4L5;1aHPu6OjDK59#n=OaZ4azehG@E8vyG43YFyL_UC#XtdO*09mJOP8!Omjd^B>&Gi=CXf$Yn4IoWON&$;K2a!tg zq$jE{1&0kbq5%ia-zVsw{}juTPi&2KOc(%a<+SYLT1*}YmKaPkJBEm?2{-Ll*oE$sI$jDOI>MYi~u4HT|m32cY(BR#>mUAhJ#U>rb zp=sHCDr%xxQwl(X;5d0$nCz4)HtdV#Cx?b;YXERV3I5@>LpcNmh<9&BXhOr8oXQV z3>;Kne?gF4fo48>jfL`ANa-ZE;iPx=bZrSPXm%6pwsMRcK$XBUf!pe8D=pW z?Q7vd3}xS1jTaD1s7tXx`nsxh6Fz3&sRd=DIvDFf8J%JedN zIIlJo=w2}P&jD1>jZa?9A~gu-wPfCW9bDTKp+?@`X>I=f=8RjG`q_DjOPgNn#+pWm zw^n7l9rjt<{1k|Y+wH5}t&JK>+B=usYKA>u={RG&h$Pxa4RneeohM4alS%RXlVElIR<|Qtcvq@!Z(&31TE{p&Rcmt}oujCA z=~f4N6K%HsqKKgy-;gStez$w-RJL>RU>v5K=JYspgOvX(j3hy8!D>^B<0hWUoh$iB z{Ed$S?ULr$t#XNi9&ElhJjiBYo!41C|Lwi2_R%4W?!%gCk}kNoz({;-F?pi2>BEeM zLM^<3WSN#KFzmlr0Oi+(=>{L4S>~5-dUEA$o$cI<5u;6qGtQawxR3L$6*g%-k=LvQKMKs7=D0j?5J4o%G?8eb2#t^g z5D4Z&EyB=FfD18bS8h>#=BW}Mme#JWPt8hjgh179qOXsyDPmDX--GPq-!7lYaKtf! zUV(7po8e;uqUZmB#aMcj%z<--S^#+g?(xNKM?RV;Zo*YBVIaf0Lb&*(9Yr^n@r12_ z>p<|yk=XPvMFl7!Sd1{W@{~CuQk7O+Vpo$9kby|g7zdWe5E|$Zp-{xA<)qWcQ-o;b z?^sYGuA=|!+}u>LT6y2(mwmI7u|y$kM}P>bwx;Xv-@jvN?@OS(%kYj)S}Wbh(^ef)vq7z?Je{qAA6hz#M65;L3=P`+ zwxh{Iu$#dQuD1+tuz-`=zCLM4#o97fuBgx#X_x!KFBp%uqVSpt4u`x4iB8i|wZ>`Y0g!C4x{hn){AK=tCHv zcYo7nC;|Zj`Z8)e1Q~+)lH1K2OFOEDZcZxkEw}t=695-SZv%|hQ8p!lw z1L_lGvRt7G?@?W`h_B+g1qERMIpi2ti3mU*nVTYy_n9g{&zsNWcsNBUv0?dAB}7^y z&Oe63M{9bB$6~>1X8UIX%XeoupIwihB{`W6h?l>?*fhsNido}0!TB{4^@?}Q4QboY zCMfe`ll8&XTMpZ$`m=@Cv}TvWk3ywsLSEmtY#9hO8!ge;2rycuV92sos-ljLvaZ8d zs!|SP(HPqX2mfYm%AdPuYv)lO*<6jAyrGM6DXskE%qbQ3BR>@=&o!FsE-#DrLb|=L zf=c-DN*F>+_Itb?R<=}0M4~)ar|*+INPZq|`W?pw`pDjv!0z3JvwS!tV9vXjM`gn=F8zJtU81&HMu_q8MbLb?U`T4;%p!cKCgUGDv0XQn#)O}Mvt7m5u1Ip8xO+4b$g}G#LHM# zhzLf8Au;sD9q=ctJBHb}2>Q~}RHj=Bqw zhH?pmZ-8I%HlkS6aW6_WD)k4uJA*E5q&%aQH_m6~`9O5^&*88J`-fF0jD#|^;y7`f z2M()=7oZF5aKMeVmFuRa#Gr}Lu+IE~3EesRsw-P)FN!KW?BZk8j1$W# zF~SPJD1~uz~BvLubkqjui~bAnds{ zcsnK;^uv0;k0M*u&oBhnVa%Q7FGM}_)d&_H0PTLy!-lvFa+jpN-=C1t9!;5jz6>tR z6?Z_i)3W6!wPR}xo9hz$Z$DA#v-tyb&`ged1L+mEd6D}pHZBC9C~xF*@og2s0ctkB z00`FUgDpRTIXb2nlai>OU>*>(VoXff{%2()f*Kn4%|IBwk1D1K|1RrV0NdA{nJphP zxjR$v`@ApqfaD?dl>F%DB8(glthX!{*BF2+8fx@11bF;{SFgm;M!rYVbJ&p!IPFdV z?6OR-cJx$!r@k?nf~gv_K}dJrcl3?~dJKY8^!?iJ>dMKll6=P>_iqu(@DtfbXAe>5=G>I5 zxKrN!;2q0X<#cf>*^7E%sHT73nq63H#2D;s;)?d=C4f8$c(KmFL*Os%M*6}>AvT4y zpk>i(%NxRN@HY^3XN&NqQ?%FbNOW8h*r!?>Yij6@*i za87)7!%ZUr2jEb1c4W3dpgYDEr5sJ5GYe$cpZw(tlWX&i37))$x-HzHg1HX%$aa}M z!bf{?zz=l57>y_t{IzhhQeJU0S#`$pZf!(5gDgV0Ax2KYpMyg)kr9apj6oRcI#RAKQ6$UB%W2o$mq(vp)KEvK2KpucENj?S)+r`u#H z=AnBTOETJVfQBuoLHpOJIj}hHD@NfIi%@bOX?n1}k)@vE*WMp4e^ytIQ}EU(wVOOh ze8R%+*T3jlm|1Y{wsDs-+I|2?9kh5#uJkt%?vuE=FW}9$pa1snsmaoI30645pBs|- z;zz6qTyY!Z6n?f3u&|V)%y0`Mwfsdv)vpfHS5Z_@%`2#o05b?c`411lzS@)b^^CcN zq%{9{*&H`sN|O3BTyuGKQ43zLl{74CxOyZ0_ek_uuVfKDef|+jR78GA8dy%B*(Z1r zP(SSSl~|^W#5VVV`q1R|C$0rab=R4MN-OsP0$==txkk5Ha=wNerr@jf!v#aHtyl6E@HDwG3_&6Zg8(ar{599bm{M zYF7n?js8zthQt_l;?4ilmZ99rwA)8ZLb7N>(T=o;Ac@RiKc$t7GXq*u_ReUnDk31& zyjjzB1A&UJ20kgiNqSDY!R8T|{PiIioImD}>MII!dG>C3+j2!KB8QJfM}3)7q-zag0U70W;3e>Z?~V^G)}u6#&LxJ3W-Whh5Y0M{ zsCxaq`EdPfgYQa4Eug|V2_W3xD&?s+R+`LInMhevJS2_Yw($df3UV2|fY3SDCsn?B zv%L;Mt@ACi*|z)Mhl9BtlcizJi~ABn%QV4;$Nj&nJtWLJSD%wCDF7Tx2*){wu$n^l z=)m0)YUd#Nxr%@2(3SrzO89P`5s;c4VJ&*O6EW&b%X zDpJZiRN!N2DBGEQ8em18vlD;*_@N3uv;v>#a(`?HiGB^UrMP9R&1%EJ}V!hQ0(&^(oDrX2m)xpThRyN_DvAvP<1%1b?qJZR z&wCLnTyQ;tj&Ht*PNpEPfumM9M9_E1#|4n3r5sUK{&VC^2*LC9YDanr*~acYOMl-6 zLPe$lGx>h=!Qn(Af!Ko3;)NKw!hndlfwTbJ?qxd)FRGZ9TfRgss~~ru5#W(G?qL55 zx6gds?sRG=%fzw6j)bK7khK@Fdy0;hnE)HNK2=mCv%VXASZRU=+xB4WR`}MCMaReTv z##4#M&e)J|!PUdyFVp!yO*TbUwrWO*9zrh_=ou=po^JGS1XLv5zBea)!cqHVwc~=? zHJ7DYs_uQ-K*&U>n4nZ&yvC1ka6U*F7;J5h^Ic{2ucM4Hsoi#5Dgd5j+0f3kBiXh!!JU|x-jN0 zRuu9-rU`dmM#;n_!gPdSgO1&#{_<_f(A;*3xkw;xmb;s7*2OiGQ}Y-z>y?S(VM z8?3GWoE7ie@R;X$)5y;psk3wJnpG@zzyIFm|k@)(9KyQ@T2Fviw_P< zsDL$zyu?*Z>aoEU)inY-C(d^#i#1_)cOFta8O_eAsi{TU39sXUO5Trw0Wx=Yo~Lzq z!v4>8Sg+fRHC*eI;K3%8mx&`^!g}p+A{?q;`11)|A415lKM)tROo9ZMa5H-Z%8_262 zPm1T~)0E^iVnUK3E>9<^iq(#X3$D8pg@8~OAu1wJr~m}JgdDd=`fWdnh_vTtc#)8i z4Xho9Qwl%a17g7ueb=jFZs)t?@4m@nH4qeRtA+Z} zG0&&K05lKnNu}K{gU2&<$%pyRn~CGZQqSqAuM5dQ?FASZU(sO)sXF^GVsqsxzml9D zId-ww`=>vQoF-F>jf$rqOX;ZmTDWJb@{-vQ?6bB#n;AvRK!E>lP=#I8U5zk<;9_d@ za;+*h-iGh#Za=`~ZU|=-B9JbSVWz+`W=XAM%s-DIeaSWg{svhF2{6KlL{UedaK%9U zFIFxJA!H#SAb3(V1Mtr|)Bt@pIx1?t<4$x!!Zgru=kWEbYN@7){tlp5)&%5h2Z|IC zJYUy)0qw@<6BKMRdHT2(xAU9T5Eejikwv>+f7IsRJZb&>k4ScJaKvCRi&0eL%Qp|Y zfw@`|PY; zK|z*g?9#_E_(52t7o*~ouHU{bw0V02vM@ke1U$HAwdvrrp0}Xa-8VhGC1CDt=4!qG zMCf6pJlHrmPjYe+RLKG^hk!_~_x<@aj?fjbHUI=X=j+$2)x$y#_V#>s%k5*iQea2; zqGxv~F%~Iz>|bL*VN$co_@`tppvz5=&@Uq^Tc!m){2N0H2vwY?+^miPEX1~L_o_{f zBbo03O-shVLC8k{!3pg@5?D_(C&15`pv*sW_n!}yt~vL5G|peNNps+IVhOQ^;dplW zO~N1QX0!|p$j_(7!v0x9ZwZ0o1Zy>$56Vl>Hy<9>&anemtQIWRc68uyJTk%OR|^g6 zu!4E78<%klV~cGUXFUIhK`6`s1mOk6=-tRZ6P36|KtNc4!OC@8G^6h^eM~e!2XnQz zD?yZiQcvsSgF6dLIbil^hzQoc@YdTW9Swl;{b06w3s4H86m)x%0R@0}_G{f=FaRx% zfmFWJfg}zDUwwe6dCBYc;QDln`uSggKi<#ZAN-FStO7D6fb$k;y$C1cdoBW~0Dk?- z?gQ8z_{YzmqqapWxd6$0tYj|G{mGcbVetDqOY4;(@ZVlXZQos=#vgupvV(tv3V?+{ zCm=&nbMfV;rPb5XB^J^H6{w(x<&sK0U1Qi=EQG-MpNk_C{!C8dw~yz`f$whi$~|u{ zknHXsXPe;ld1aTrYZ(;?RfzBHUaH!^8fp~((_&X^Q(PD9ROh6nbxENz-?&?n?KEYr zGQq!F;*RrzFqp3Dy=n9kB^zf6QQ(-D_wa3;aJ0j);AQI8qGf-Boa|8g0dOmzhtVg) z>t^RO~Qds*FIoW z9X~2WgP8s^AFo}+00p=J3_@htR9-+ga(r zHk1}ql9Th4z3#5qaPjex-g(||ZnQKs@BwO-L5Hv=LV9J1Lgl;{2ms^7QzqWF` zgiA;mrxsLKRn@;Yit$QpD$!V;8*hS@s=O(l`f|n8`LYysvRbi?!+g27iTDSlxnp@X zd=q#0731ZvD6Jl@u@B#(NYh+#>>n~5ah@-|uZABM^`GnBBajefOncr!**uWde2`oJ z?#XVh{&*UasD>sNc=w+_z?78AXNdyM%_$savVD0YBl7zDGnFvMp)`SfSS%1_6=hPM zr=@$5$I(x+a5OeF04+(d-@gq3tv$eDNWX-`|gOCeBXzAz^S^ zPVECmwBzzwTDsZmp1;x9=6~P!zu#P`0WKV-v%tLyx~CUy^QDy@xCvKU!Q>n=SMub= zsEkDPWxa2~H0Bua<;!GKKR`8K)~v%eK+j9EV#f`8Q5V^gLCI#fa5>z|``8Z)lAr^( ze{P9AG)4_l8exYh8Wi z3xMDqP|i!_J(%)0Tz=*=i8%i0Odb5YzqSeFi=1c7w?f1x)#$a zkf&2E)7Ah4FKFe`&vqv*0m~oHe>@dW^DTunI&pAt%-4KTO3uavw1EL9p_DI+1bFnA z=_rE#>n5It;>Y9kz!-saplAhS`E8sE-U`Mxc1(4Bw!nJ7@wmm2PumLhL=YP;nnTru z+Y~^DL>%Xurk7=ajjh1oKmKrGKJ#1A91-a~c-Vm4BSosm^D*@2Y1;xF9a__}ukMN- zE;wEudAFTK9xHuw@qLP)i49IyyGlaPXm6bE#?6n1G|a9dNT`1mC={Z{iwHSw?BpYk zdRj!G73QPAdV6~Tb>y3?tFAwPV6C3Fy-{!4fsh@9Ln#Evt}0}IY5``K(BTIJ0Rd3T zm;x{Qdiv!`6_2fdOzd z^Cf(&&RQ!@692^uPTR$1K#XzADG{imp+>#=?5r|(MmbfLs&umEpW>za_2Ey`)g;G% z4d|bdgh#%%?!cl$zdmi$o2?+#{J=949oI7TUXBQ(S}|eRkInA+E@TA93)!0vCCz|? zEZ`ITE3XAFO6^$Ou->_^QWy+Y4YdU%S9Ae> z;sj#!;}MNR2M-K2PJUjt!n?T--G>$48(Hh=L)^zi=4Y`Xn-9>^-# zx~&Hu>bhZ^l_F`ob%bR?eY_faJF}VFYewM8V8%N7|G3RnJz8dFdZ}}nckYhIU-mZ+ z4fD~w?csOF!rZC#l+i68v@~Wn&bI?_A5%EhHDfKOihNxPe{=Uu%RwF%Yq!daD~q(v zXQ?Lh$K;U_s%AbpNqlE0#ArjpvYPfR!SQ}iMfHLA>b(LiPfrA5v_e27!bvu6&6T(D zLc9Z!p9FSbJ|ZFqGWA2|Jd!W(Y$r%WPS8hshY%6^JY1_2yh82RYPU{vS;R!4b{*Y01GzwM%k`B~ul(<(!Zkt#2wqe~+T-3j~XemnY zCNxe~oODrku@0e@(y(PB#1HSf-oGqiZ+f0C`mU?fNZov4cDgE{>4+a?XHBpJ zV~AuRyaio^4c4QR>Dy?BZupW|cZbObhFA8UjH(kuJ*u(rStCb*XRTDrdFk39qiSF| zf2M%8A06zv1zYedS=iVS$zkr|9JaDucfBr{+wQZAYpRr>E+mTcVR6__40+XqbTtdA zyI~nw0G+B@IkogzX&SIA@yL2v5^$awIGhRSdEAWH3f>|gc39~hbfJRCcqxG-tk{ce@LU;R95E^e}_;N2U#dB~TXAF)>f zXi^o%y-0R>*7z>JW316*>jGlxAz*Ty^Yaw>Vqc-KLWa4HQmg*J3?rP`K3ae5zgPg$ z*`ja19zV97&qLvYm#md`O&a}ER=a9&f9-ej-^JU?TenBuw<{$3n)#IeZf*#ta{~0d zW5hNp45jS)b?6k(?-?LlA8?=Pp zvkS-G4tcLEe=|fRvgb7we;2(=XsNRmvODIqWvI>K#~ySKBay$*yKA{JHgSc+oget! zMO5aO+qDwlAGcWGO>TqHg^K|L4vX~~MRKqX%tN!vuv0t2KG82n)A-yFEGvXeL@NYB zUWi-=d2%~rVElR054V$Q$QlCJU$pm_yyPz z)x-qLWg&6~h_rl4?xe5y9#tW!mUwVXVnp!HKT*1l;QL=zlAGTYyr9lyOP#M5!^@~T znIfvcMo~#R;BFy1bE2H7h#6#*({`LfzG#uC?j4vOB2mmCHw?UpApY|IhmdAXD zG+=0PhgfHa{yU4{$`8TA$vrKA55yTjUr6e)_YJG7??@2U0|)zvuo=Ou z05P2}JWyEMe|H4id8h6I5l_bguH}w&7Vwf6grCLFIHu!d8%EU)yWJ5t<{f%tfG6#D zfQ!YDiC{HWt1n%S*}u=sLB?<}iJ#n9i1LYY;%Jt~PA9jwd(viLmhivvugg7W&?_B| zSLt^3BPNT6&lZaMpUW*H8fQ9U`}3}2EkYY=It+sI zUCyHU%mi=gjA}ZcCgcB!`>CLkceA`@eQAjAz2!jC3PzCueMJvXuuy29Im0 zIAW!CHlt11J8(!V%dICIdoRfLWY{%D1OGs467z+x+LOoSwZl) zWx8WvqeZ!*l&{bRh#<6k%U`^PQzbSuc*;*4`E{c?>rU>~qtJ1hKZJ3FS_C-2Y*n%C zXM_&M`P8x(&+Nw8p}`PBnN*%PUzUF;Me!w{%cOxyW%@j#&rM~02{C=k9jWn`(X7Dw z%r64tM&zKAILV@N>g5a#h5dPH2nKGmF@aiBm(O2!{Mi24)bv$%O9B2ykaz{M zoX+bn1^~lf-Vw&Se^lW#?%>~VOS`n7WV5RC*8~3C^0;}wU5^4{bD2#cBmCK7#*on~ zvS@?nAR`0Z7lCy3nDKl^GU}|aqFCa#rtexkX2@`+TSmZg32@% z>M<9)k!FutJu~?bMaaJuGVCYERkuNGMA)u4U|H5#jH%N=1d-(Zr8Y)Z)EO6g==W=c zkSt4R!dGl2s?m>Hd3>*KN}?C1ZI__66z6sYpc0r>vP1Xj+X*-S)*NVLaz$~6h~5D< zFH}>1t%FkaEbLw$&uy1lWZH){(L$`e#?KKWH6c)H*~F2GZS~0?2d>?rFb1t0k=~&T zE{E&t*0Qy*Ggd!&$l(k&n=tPWec1Kg%M0W0* zVZ~#gBV%2a7!9t<25+~|2PNFqJEm}Vs+(*o6fboN4Kff`{cy80KK7&JMx-GvDp%#@ z(Xn$=&*ceulhitMCIdZI<3fMg@oUIOWo%P}@9^9tm=bZg644euE?m_22G%B3B{A^qV zY&{}VO_wl@3`LA{J3J**wfWZ8J8+y`l8A7i+t_|d+L7bG(8wc{#jcx4X@LyvC=GuEHc11 zHlfCy#ls7H?5>{^?6;}~qMohsnRldSVQXpOXt?7@R28p|KhDci#xWV^{`_X=0gu4r zS>&wwl%~0|Klw!+O0cN=CzVlIfrTQKuhn964&%MX?uRPZ>j-48@$sgwYyvX-r}I5i zvfB1nvPHtV_%@S0B`L~}H~ZXr>55zCeV;Upt}0rE;z9^(sdxYQhnR5>X&$RBKB71H zx@K8n*w(Mx!Ohl$GrYv1i_Y~r!<0}j52#Cu{{eRSK{EGJKj%vCe*FF8#rgc)eTEGB zDvRuSvK`fAiIksCTKI<^$1>gDQ?Fm`-wdgoZl$94DHyh?v_Vo}@t)Cc-89`r2NMTN z)6l#8uCwElSDvjny*BF61H~8Gt!1N28CVb3eh1 ze_2T))~Q%frk_^L-QR9>6r{;Ypj<(O{QckY~E9%4q*d%ml z=jZ6-<&>#m=F)-kEdy)m%klg>ccnu+i&q~i7HbWpseq?3t!({I%6 zmgG=HWjp2Y{PS+MnR5d11;;*uOClP99UH@SSKx4K)U-;O-obyP-}i4l`$gOgmY2iu zXwB<&uM3XMmk%3r>HdU^Z)fV`-L~3SsxN~4k^3!#()J&>xja_uGrC>w4Vwz(5;pl# z)je&a2L)5O-d>xlEKyhs?Q>WgrC#QY9)z@@?c|?+n*2G#_PZ$r?xH3Ymdaib-q3MF z642uR=U(T&|C*EWV7$VyzYMR;;srfkgygwp11`}GJwJ`4fmWRcbprIx!^-d{j*g8i zC5INvEiq`k9VYC_aq(-*rBkzU)3_IjAKQ|k&Z;AR?aXemp@)*N4~?E2p`T5aX($BUd2nt$N3W-Y@1O+bt*WK&5hTdFmKB@~(+$H5wIB9g zC~S^h%doSkl&N>!3yhvOTMbk468M{p`f3r#CbKzAR&gV#^v#S5@3(4{IUlr>LI*?C zBu+b9HAw!~F`#3`Q`T8nxf-u*iqjwTAWRBu2(s4J~X9&_Qy?@ za`xrUU*Wd;IOS4@+IsS(+o><(mGgHge;UImHDR3NXee>NONYKQ#mv2^hZ@YN!FC-V zOD^xop`Fohs^+jhepMd0XA;IP8L#`Wr!fDa7%OFB>NnspR*(%GO1+U^=##jNubwLhG*!z2MZr7ToRut{bL<~2c9E58<{Qc}&zr~MwV z(lI61G`)DT!ScRuLFF&eWPP1=Sxp-2XzKl*_vOs<(5?--Lcv@)+W5OQ{T^mXknbQ? zP>DXP2U6Oy{2@6{wfT(y1N+O-LSc&??=bO{ri>HY6J%6B<9c+$tmwm3lFs(lId8vk zv#M-1Ghq(@mxxpEiLYc>9&R2hFTO{#Qxcd?{FYTm4p||kLoymusZSWyy}-qSo=NJ0 zh?$_NlgA|CaR%bctg(LCM~Dx820OdP0wjc)19yw(3u6V;ds#{Co_*A$or{AN=UhWhU0D&2+n48a;2M>krw!2Q-+h z?&VO)<5H4k6eydren}}|T(Y}rj1wq-Tya@RNV;z+c}z-Ei@hYT+&`0T+e_iuCz{Ip z#rM4lN4MTq#vz4APRg)kE_|UiTWdub<0z0vx60|SnhlAMhjl{9f`cdA;87=q2kXS^ z^ww=9-M>}C+4w$v&pqM>zEEeY4JFwbuNsVDwoc;wHl8$JzV_<_%R}RXRjAhN$!4U! zLcV**uj#2!ULxFNT*v*OIIm2tL4QH-@3TJeZcn)r@iOY^t0SV9D@&v&-7w7&%4==xuY2h*=2C_V2m8}6Ks9nu+oG%4=R+fx$@wlkSbE>(QZj-2Z1nq56K8%p$RoqE(3FY3}M zF2F%h-EgMOO$%(?8~VRPeBK_$(E$Mgi)7zaQ5gPP;Aj&Cwwy&lX^(N8XzB!(LUp!} zY-y77^UB+>{6`+0-8f}j@4Y!Et%%@oIE{+3UUQgBgsXs>O1)6_77G>Q} zkv|BquB)t{rtn}h&pK>y0af>g>#o8!orN8jd#FnP&E##nNRSMd@ASn%!|7{HeDZIX z5ljhsgpO}>E=^VfW7?P>db+5+dm@#RSJVH*$X;9Xz91qdCv<*XJdX2HGpv2K*Q)Q` zr#w7R-jpyg$YR3Fg-Az#9j^Y>I40w9NHCw^yb!x!;9U`pcAMkYdD<;AV+wy!V+;w9 zk1e>DX*b^A0CuRwU9zM4S|iu))_gOm)In%mj)a?)h8N+4=qbJhqTax%}9fj?UE2uE+7D)X#MlE~<;X9vy%3weDkQKJmNk z>OF4!Jk8>k9h!^A9Im$xNm80lTU^NWu5ILHNuT`xhLWw>qC~ld}YFt19SNikiTnV&^MvE!mAZ`@B zV-~MetNn`o9mMfx;CnaxPohLuqWzBVft}BzHKT~TQj=)T;#e=V3TDWU{OfospOnB2 ze*Va|2!<`=-yw|aqc5=C^s-_$_-yv7lH^{Eni8V|Yps z4Wf@aFl2t9wH#+}uS!IC2My;5!pTqW(4Rp$mPgsl(-P&KA06@f`Rfg6@+GdSNv;uc z>*o2C1WD^92Wb@BbX>$?Mxlkq&g5tH)WdVj-J2wsNw+!z#;a!*?K0-5oniB%?Xx^5 zjeY0EI{&25Pap7TQM~ij=~ldJ6vtTk0Wr`}R27oj@o zHYlvHe2R2WpFD$@)^PR9aU`)uW_D3#PG_&4UE$=c7>I&G)3Jx3(#OBh1!{*2zoF(**}eaFxA!ZqJ4An@MFgG7k)T;ULuU*^tz{%fUMuq zm%27dt9C_;P-a(ej6J&)eM|C#sx*7UHe3;^nZ(_iUFGmqkFZ2>bmitc#NLPf;w-u{ zrc5P6pjK~fm7HJ@UNR@-T(KmDC*t}zcHV`azM&Yr2MtC|zfrAk0h z#~h#?(lDomP#`g2u-5%k$?colpGUrdCZ!m3Cgg!u#fv5bLCM_0|F5XCj%%`Q8!#Xt zt)wC_MQJJNoPvOKcS}l*?nVhIk&X#cq-D0aNjs=vXwdK)52+Qi+dkNX3Fm$&1@nYTW*(iiBS%5$Qq=F}LBk65wCQ zG9!!ls6#6M1#P9hwWiB&Xl-J#>y%DpEiYLob0lVODtGSi#wF*Sd{*tN|E{r>Qk4bDT}MI_+X>EuPO*1n2fxV|HR9 z^uRMQ-dM+S^i0Opu=K|yvMUCRw7zZGj<d|9O_|q%P&sp0>IDh0cR9s%e}4bkxqjDix8(?9IU_wpn*diC6e5l;P*%r`aoW-C z!tpXmpLevfHODwpX1owRS)mj=@Tm{3=`yJSlBLBwhNBZh`{6Vv<9Vwfby*?{>j1fB zI13`bk~>n0&Z@9TqFon~KH1sezA*S+;%<_DhmvBt~l~v z+>uo)zQO2L-()hQM3Ar-Bn7((R`*{TFxx-O;2aoZVs_{o(CAs=jTq}wy!@n1HY48S z66<5u-5SzoBcQ1p0&;jASgTAWg)u4%QB%yRprDm-eS*pEz!%ZtMH$N@@^@9FVM6w~ z3#3d+@e~df<$MLVvHoXGWX`M!b6GO@_0dIyTNv4^ua(TTNqrmInnm~D0a+XcQL)O& zD86OHnR`#_bilaei5|rue}5yo{#IB&c^1-{nMl*RqtA{iV@|io{E)i#Y-gs_6kgxr z%$VUtR^&zV3Mb0siz>I2*FFU4I)h=w%E40|@~*s_MJ#b&w>GB(t*6)qPi_zg8~P@Mi&FYO8- z?_)EZdnYe5xvzB(Kz;li8M9+k7WS^a#efzkf0a;bFy%t>hT7^2!_5h|WK9{iM(yeT z8%wE|dz2N9I{wz?5KB@@l7jg$A&<*Qhy_lqZsR2EWVtT5R@9@ax3)oOUE}lk+#=mn zt5o+ohz+Mk#ZK4z^=alEssBmcRE&-9?1^Z><5UK$w1&qzwQlKcG5YM&w*z1KYUU@a zgt?usF4m=nS`ds{w0s5mH|>Xgc2x1i&A?7W-1=)m&HoM=-TtAUzTcm>zjq(8##?>B zN07?rd6fcvVN$pypDD842e!vx-;kvjX{5L?d?QI-bTyjv;!hDKuyS;EH)7Y!S4=r8 zeOP8<%93VuHgG*yX&V=>fxM}Wf;Vq#>-RR8%WUS$k}(|L)tsdQ3r2V&Pt=TvSI?jM zGE?yk+8-j<1ejemkpBT)emW4mN`Ip?{`vpKfYf2E(Cj zqy={w3$9%$`S`wiQ!b+e!J0o;CAxCsG;n3moBT6cmrJ$U_{`=1SXqAvHfPR^HZL7Y zZ#gk@kPl*6^=q%%{?O-^$rrgG?7lOl6>c%ce_3*KtCCM~ZaX={WcNL{Q~V=Ycb_OA z$qW@Ib#>PnsgS#m9LkIPwE1NbJRuYZJ8G`~wAG^^%wd0ZhhH~|uwY2}Mgt!9TC_50 zV@TN7Z$|Q~LVn!J9XB-;O; z^NZ+(rVSM0H>UG5%7cw>vu4KP@*g}%7|J9*y2{0-HeF`i*`->7FLb;SyvL1Fg`53g z3XU0yM1M|tW3zBiU+T$z)&$JQ#igvn=EYDs3~aed;m7} z#F4`?#(X%Hlj)qMiS~Q@OT@0^3j61-ZI@L04w~c7d|4q&XM6@BsB#9^TR*Q~)ge!< zbc`rDCpH|ODS7)Bj%$5-WgL`7w5ZqU6wuBeyd?wMp^en$Pc~Qbpf#tagf`cdz#`$MZRf0*T+o3Z2Bt zuiAPniDrdFH#WC3z`y;u=elPz2gN*m~SByAbjqEsH zu}9qjy`&aVrDuvxS!`?(Q>B`9I_Wn<*8F#86Nh36bna})D0r>h$hg)N8M}_i_2zBi z&Ko0g3fi7De7@UHx2cw|3Jbi2nM-Wh!rEMKUQa#KP_+X30~=IVU)YPG`RCE(EzJGK zV99fN#3bR~Gd`zdy!20ZIBRtAlBXjA5}`{`Tdr)uxmfsu!;U&FZV>8b;A&d9be`@9 zqdY&6owZ&SbsnQ6!eNXSO{gr7>5z1M^OHE>)7fo6E}|_NaJa5<=n6p5%ufeG=PiI^ zE8Qk4r|L|(4J24jyWSbaUmDeJmccGj!|Nc0 zTfC&K2q(&@Ip8FRExrO4^E{6s; ze=1s*VrF*vPMz!G=I=bx+_vsJi$46jzg8*x4yDruX456IH+Zgt@yzbRW$4oVJNM$tKaH}XzYw}2DEr-l{k#gZdJs)_PJ-D%jP$zyMd2M%O65|rg%m^||JZqA-@Ju}GY{~MAL$30)yJ|Y3(WTo4O0EYS$eC$`9u`J~SN(_Y|N6*L>4Cs@vbmPZ-mbVsv}cXJ;U;=PRRzjJOzye}hK zoxg)UkKJi3uGwiHd42rYC5Y}3UQLIPL8+YuK@7~HxcW^KPHLxiJ-X7{2Q}>&cY`ff zuR|4;iR`=XT=cm2rkW!1Rvtf%M$rAl#WAKpBQ?Pq{QTfi`kG+DB{A`+5c3U-XhSv_Yy6tMILCnLm3!>b zq+i@_!hrj^xc>0GQZ}DE(rWi4|xoNF0D9-NqO+97qfLoFBePBm*yh+pl++; zF#MyodgOR(KyV7D+Vek?fBp#3y_v+Ed5*0fwg@+%4UhT0M6b?bI z9Cm+Xs^0%pN$FsKdtYE2k&v&5+n-Oq;yV7dw64=bj2bW6=&V9Qr;0Sn^9~jcTF~U# zZ0&e*>kekbiYDfeMS!s^I~-56?JfzS=YP=Xw-#oS7izPe2N=F~JDd`LQFKb3~~_b>S*gxigI>M3#GuBJIG z7^OqLU>3Y4w3jMkE`ucwG5Q!FF5DIPmQ_I|Nx^#S=b5m+$1fV< zjqFSzamOMmUs~=POYWM4UGJ(tzKk$kBkIAZz{S1Zx8oIIv)g~FWtEX8{KNn{5pSP_ zv~7kCWDI^JupBO5ioM%9r|2Qr@pm1V^&7L2Swexj8;r?9Y$M-VOqc^VU` zxkTh`0+H(B)w80vO5535hM<&p5@>kh+*a7;mE)(>+Qb*5G+yF0b&{rnKiXfRwSGa3 z;>y1De-bYT1Szi{xLnH*&P^CG^PpAO@uNg|9e!42tVa(*v!1vF_8$uuW3c*&FBNvvY1ex7BCuhoCsP&=9OvY0x-^neP`|=4J`BM9ylIiW_EG6l*tr_vMH1J z4LO!oPn$U3-CT~ll|7}4vyf@z*Geb3p z@b|F^Tv_Cs06QQD8^K!Z_7_#js-~;!)Ro5rG0%v%VJlESnpUa>^BZDlS!=(Gr|Cqy zUd&Lx)~W2J$GbR>dzlXvcu!GGd6KL72Zbb1{$Co6C(`|KO{GO-U)O(z6Hg-vTDAVX=g36WQgEfJD` zxKF(LvT)lBVptHLnu3swH6^31o?qOCpvc3orSm0iO?2z1eM5Xj$-LQ~-yQ#-hVn)O ziBv%ONV+8Hq!L9v6UcJBl}gz4S+Hmy&*U$*?&$hl)Y7O?-oGb?A@I4F`baI?21(ph zdtWYxY>bNu5nC87K~B=hWU1Cqfw{H#t~&?*W=&Ufbkt2lqr$u^&@8T+z*s1Zo++)L- z7mmAp_($OVSI8$6GW?~O+KJG^Wd)91aTZE?C=T{zfEZJBFslluRy9zlnLt>DIT0?ob_JMK}7k zdF$~gw!gE&S=!yT4K8$`P2Bn?Me-FBhQ&9K*IP*k8Mp-+84h=Ou%_1rNA}a1;HUMS zmYld`qZ45Q&#E90CJfnf4EHNlNL*l5CTaRgjeYpi*=``lrg^Kku=4hsh6z1<@^-wz zc6vml(!n*M9B_X92~}8vX2|6-(NC;lWAw!3M6W(-DG513+;YyilBXBDC0fOSAE*{# z$6*BBo~+suW0PD*Sa?=RJ&4rg!9!q0rb`G4BrV1^CQ;|KkF{8IyQm@1*Ai@-<3=6@ z{WnkK;fuuzGxKYyIL)(%ncNl~t;cEnN#S`^rb)$_cd-mYM)?`=XO#R-9P_^)w9&|0 zi|zKsKoiUPv{HlwW>bPLV6QH1Tl6-eW0|~eRaz;V&aApMU-_QuzC*Q0gyyvLML}%+ ztnZ&nlz{OcvMrQL=67ypaj95*dQ4dZWGo_R8Jc<_-rrd2G?G2Mx&jNyk@5(3dx=1y z2Vh94K=Yw`nvBJHp!m7hG@HTXoc^!Z>P?7pv5i}ITFP($Y0{G{%MjEbm_z8Cnsi;dHNvh`mi)@1`4Y+o_G@T z&UAi0%@Dy5d|kpdR+igQDGz%`ZqL5paWGW%G$C9XA=5;0R-B>AXn|Y}j?O&4s2#Zg z*pXoKmW!cyY5pai3P7sJU03N*;d|agXV9coH(1`v6-@r$3CqcnIjytFR+v{No4|47 z`_y3xifI7XIUps8zBUtUo>2{*C`;F$G4QeaG#C5flbO1o2sVSUUY#rGhql1q4%qMfA(7fLX&i?_H=GoT^>&2?V)pO{ua z_N@8dC+(pe_hMv)=2Mbc6MCAx{pOlSgCo$bv9kDFp<_jXsV!{mHNpfmn!FB(4B=gZ z!Vt3VrMb3qm4sN+#wq6-t&};h_;V&}Q0?Ajn|um!CcN18F}HKR`PiH}xE%dEHB_BN zZIW94GzsBUBmXr*UVSEEr_FD(z4r#ejlKD$a_I2SAsqJAU&PUZyX^b*X%o2(r_Wfx z4Ob|ik>?)XDE4e~qsxnO7lhJ$eM&Z;GyD9`1dAi(2+q2=X5*J@aFU}v*1ODK`SfL^ z)e(3z2KqIpmrK(n*SABiw3L~~CXn1QGsicaVjt(1!}}I|+h{b$3FZ;3buGpG9^<3C zJ?Vpk2We?`H3N^HG}65FwY`Kp-G)pOi22g}eotD79Gv)SL;xkJzyB?)qk@deAp|>} zw_SCQ4fXH_CqW}_yKMMrc57cJ2Vb*P5%f=Y@E?<-&JIHo=Nr4&;4D3Ku=6huIB}az zK>ERNj8p%7pIJ3vB2-%D)bF3LuYW6&c9j_>CCo&uwXADM%vdLNR(4KH+4&co>u&oT z5g`X4T~=0BhVJkGP7dz5{`8-gFtG1F5wz?Kx;ocK6NMSkyGvNAJpIcPs=G=X! z{@wG}8~fTG=mC5WS>S z?N0#yV<9H;u@}Vbw#j^-bh3nmm5QioOHlS{J>B8ZL>K}wf8@S#2endb$}wCdoUdj> zew?U=Y%R(Rk3A?y#b_!Sskn|MLHl$F^vZ8j{C<3oBO%LZb!IplYV&R%j5yi39O7iS zq5JA5l$(FEU2lqeG2AH0O{X+`=tFWkD{KSaTH8#7qqBlRsuz=I!bdhT8y~h5GbdS9 zM^Q?pa(2^=Jgi-J z(ypBw4)E-7BfbBi6BY9lo>`-x6uc=SOW{~7IDKR&+mXv#5(+F3_YIpACVoJnwb;8u zc|Yjk3%3ee-|T~h{3;sdO!V+Ie7Yy(C~NhFWq%%2ROGTT`iZW?2o)(RlsITp_nKCn zsEwT+Z!h`~A;VWUf4*g6xU%&Kc}^d}w+2=fYYR>)Mj)9w!d4AN$4vsfZVYacDL#uP z4l+2b-(O-pD7DQHg6L$e4q?XoMm^@f@e)dr5*4oV8?HAGw3f6`lOVcGicGZ@EIvxh z_f02re8CZ;@QfRBKZPY>ad}^`Rs6tcmNl;H(CEs1raIK!R7Cge$rOA-=9dS}>Hc`Z zEL8Cqvu3%6C$Sp89$`h}!zVwa`!9@v=k)%APrgHzM~AuH{mgUaiJJU}j#6=1Io2I0 zIz@KI`~Sqn8~F*k_u1-^bfSohpMeoR;?bM&XmK}H{GhZF-?Ha-gL!yMX#+buiZ9!0 zB*i;ZF|E|jZ>w0(D4sjT-u(&Vwg$=|O<@VvUJ^R-G z*tJHBK^jx6rGowm*0Q#ccX;C$2l#;BEGm8So(F_yMw;}G-34C#szX`rv7M8${EGk<*!|fBP zOJJoZm-E5aXdA0LM}D+`w)<0K@q7bBcoHp1~3DS)XH|5^kQ^rr$ zW1xR^KlN|<+vAaDc6JAchZ!xtFX`wae{JsXF94t>0F43A8NblYP}#bGpddiSvMAK_ zC$P@X&Z572_s()S?d0S{&~0B^US2-YYh!KAdHr`Rtjx#;tgEff^J&}+fS(1!RXc*P z0OSY&IKRh9q2;U27HL3fgx${6E~GuB%$gVzq!A zvM~39jR9pnJw{V&Yce-OQ@1D{0|B?q3*#CtZ!@*}s-y9m^I@068phsNKrU_4Uat+q zRq^pidM3}1lcjE{$O3ACQX;9XiILEgX|~Y^UMos9RjN8ZwfIJGwI^7PlIX4$SvKEL zS^6DYZi|AnkNCpTYroDCc8GvMu;@44nN@0Z*P;*h$UTndXJ%7}&5z!$@LJlZZm*ej z&+&hS6>44pTOtPRak(Rnuu?2OH5?9^^Q+?;Uj!~|-_HVmLkG#}7yrwV#d361X&8^j z9zYprl^yCWk5s@D^*LE^=KA~D%cO}PEt=ehH->u0=p(^`dL)MJ?mIVLePTPV%6_>r zD6r|c4Qz9lv2a*0t?mB%h>_p*wGyRMzD`vd|BE}Lu#gNi7YyRCFAAP;`jf>o46N=h z*C*+)^gj&+qZBzNEoR_0N>Y=LhL?Z^2FNr3v?O6+VN9xdzX2wcH--P{lPD@)7M;qx zwzm6|wZ0m9fIuar;t}w@aZy(OCZ1hhQSqmQ=oJG)0riIekMMArjs65~IY+)`4dCOh zV5(L*r5s1RB5ycR2Eej|?{QM8C}_^YA*OHsP*nlzZj4yYAupHJsY5U%t&Pw^8X?+YFDrhgQ*xquLV6?Z*38<63VjT5dKAK+|a> zCD+biIpN{qx+T~^pK-rDglALo&n=IbC@(5dr17!E+=ntc3tdeR3vnyJW@jG#bA zNALMcUR7BsyW{^AV2{b!bSJyJUokOl12idBQ~>}EXJlj)6chl#pf3ibq^Jl$vqc6? zwfXtxE-ux{Iz?^M>FMbJ)>@>@1VoBEfT#wp1cC^!-Q3x1m1Uhhyhy*H5(g#D6Jx2X ziw%({w^u0dNGrHJ-!sghq;&M-&5rs9qIac&jh@T|pTz_-`tO)pUifq^q{y zdwvgJI3;`To3}9^pcZ}lYyN2||DA)!;;XZ00FZx?R+%4AkwCE)#sdg;t^fVq=H}+y zzGeH^*vU#40mVLk1yktp`1#H#H$$;Hrw7`V7fGE zgTQX*>b~E;{x?(MA^Ke2fu1EDqVd^Lw8VBlM6V=%=lKh#h*xj_-FE+t#gbqRD>VS% zb_JbxI)HxI3otDtu^Y&K_~3DUnqR97^iasV)JN2mlz@kHXmBtsJsq(z0Q;%(vVCDe zL_}m^cJ>-D6#!N8R#wP0fkk6=Z4E$e$Ez;@a{u#Z%;&&chKGq9+}v6n*RUUvl9EzT z6d28pjd2MG2yk(!%o8dpDYf_XYyc9Cx8maBK!AlpZ5$lRBO)Ri8a%bNe~4!T7qe@Z zFC(@_YN9Z)v8|D1s=v;X9NZioJyBN`ZodK47bsu+hYuYb9hrf!EtOkQR~Hi*i3#B0 zrABQ;-MzgbAtAf&9M7L;Nkvf+5fK65N9~vTCn;UP004~ZMd~d1s-H|u9zR1Sf+`W0 zmyqe=*>qudT2Zg~!lcOmE!h7pS+h8w1As*Y;uhW)I^RtfASM96w4}r?<#c_0orfOp zOJ{qhy`vI8wba7#^P&yPmx*?06Ldh45j$r z-(H53G6HY2&LYBz^U;s^i&id}| z4v1zywaOOtoINGP#TOP9(11Yz0}F2(f=5U=#eP6Q#;p2Y?+TDt0GrpDFD2@j}BJUk~yN5E1dQ?=^qHIfN-cXx*ssQDjv-~hcI(5?V+7`Or`)~PAp zd~YCvV*yf;>M^-&b(XleIKU`@+@IwJL<@G=eA%=8{Va5J;w74TIyxJ`XPHwTOkZ7j z*x1Q?gKNy>LxusJ>b@#Flm=()qaWR|8F7v8!5#fo*bDaBK{+I`CFEN z76m7cQ3(iaeiH=>Pz`9UaYs;$*v(I1G{UE37jZw-m#qQJMnJgNglY}}cOfWvJ(er2 zRjMzuqzim0;HzkEX+f{i^}RXUIX*sS)hgw&nI`Z!S(TJc1*{@$0KN|NJ^O`vlPquG zs$T%jf3n(>o16Rc<;!z^Noi>yY&HQuUPeY{Btvw90%(m5fK3F5h0|6%K)pJwNbkH~nUJ4|XHRqDMCW0lkN$h0=T<3w<6^eHi$mxxjmsmhwl z&)3$OOR9V}*+$--c-W?FB5@;nI$aG+Bd2dPp_Q*;do`*>MMVP9Ndx9fml71`oK0~1 zEkr~7saFfp^{5J&V6NRuqVkq!t~6rDJiKjrCgT#{wj57)82!-_gwxFFW~~(u~PMc yJs Date: Sat, 25 Jan 2025 19:22:44 -0900 Subject: [PATCH 578/589] fix for sharded weights --- exo/inference/torch/models/llm_utils.py | 49 +++++++++++++++++-- .../torch/sharded_inference_engine.py | 14 ++++-- exo/inference/torch/tests/test_llama3_full.py | 11 ++++- 3 files changed, 64 insertions(+), 10 deletions(-) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index d3d8754c6..b0b37717b 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -122,10 +122,33 @@ def load_weights_torch(cache_dir: Path, model: Any, config: Dict): print("\n--- checking weights ----\n") check_weights(model, converted_sd) -def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): +def _permute(t, n_heads: int, head_dim: int, dim: int): + """ + Reshape weight for torchtune + """ + return ( + t.view(n_heads, 2, head_dim // 2, dim) + .transpose(1, 2) + .reshape((head_dim * n_heads), dim) + ) + +def load_model_weights_torchtune( + cache_dir: Path, + shard: Shard, + model: Any, + num_heads: int = 32, + num_kv_heads: int = 32, + dim: int = 4096, + head_dim: int = None +): """ Loads weights from huggingface and changes it to match torchtune naming structure """ + if head_dim is None: + head_dim = dim // num_heads + + + model_state_dict = model.state_dict() for name, _ in model_state_dict.items(): print(f"name {name}") @@ -172,11 +195,29 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): # along with changing o_proj to output_proj re_attn = re.findall(rf"model\.layers\.{layer_num}.(\w+)\.(\w+)\.(\w+)", key) if len(re_attn) != 0 and re_attn[0][0] == "self_attn": - if re_attn[0][1] == "o_proj": + if re_attn[0][1] == "k_proj": + value = _permute( + t=value, + n_heads=num_kv_heads, + head_dim=head_dim, + dim=dim + ) + + new_key = f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}" + remapped_state_dict[new_key] = value + elif re_attn[0][1] == "q_proj": + value = _permute( + t=value, + n_heads=num_heads, + head_dim=head_dim, + dim=dim + ) + new_key = f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}" + remapped_state_dict[new_key] = value + + elif re_attn[0][1] == "o_proj": new_key = f"model.layers.{layer_num}.attn.output_proj.weight" remapped_state_dict[new_key] = value - # add in permute for q and k proj - # see https://github.com/pytorch/torchtune/blob/main/torchtune/models/convert_weights.py#L199 else: new_key = f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}" remapped_state_dict[new_key] = value diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index b0b0ea258..ea626021b 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -23,7 +23,7 @@ from exo.helpers import DEBUG from exo.inference.torch.models.llm_utils import ( load_model_config, - load_weights_torch, + load_model_weights_torchtune, ShardInferenceState ) @@ -385,10 +385,14 @@ def start_model(): use_cache=self.use_cache ) - load_weights_torch( - self.model_path, - self.sharded_model.model, - self.model_config + load_model_weights_torchtune( + cache_dir=self.model_path, + shard=self.shard, + model=self.sharded_model, + num_heads=self.model_config["num_heads"], + num_kv_heads=self.model_config["num_kv_heads"], + dim=self.model_config["embed_dim"], + head_dim=self.model_config["head_dim"] ) await asyncio.get_running_loop().run_in_executor( diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index fca6b8829..0ba5ed384 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -19,6 +19,7 @@ from exo.inference.torch.models.llm_utils import ( load_model_config, load_weights_torch, + load_model_weights_torchtune ) MODEL_NAME = "unsloth/Llama-3.2-1B-Instruct" @@ -284,7 +285,15 @@ def normal_full(model, user_prompt: str, device: torch.device = torch.device("cp print(f"\nshard_model_1: {shard_model_1}") # load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) - load_weights_torch(cache_dir, shard_model_1.model, config) + load_model_weights_torchtune( + cache_dir=cache_dir, + shard=shard_1, + model=shard_model_1, + num_heads=config["num_heads"], + num_kv_heads=config["num_kv_heads"], + dim=config["embed_dim"], + head_dim=config["head_dim"] + ) import time time.sleep(5) From 8920a87963d847e40fbdf1ca4c899212151454cb Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 25 Jan 2025 19:29:34 -0900 Subject: [PATCH 579/589] adding torch support for llama-3.2-3b --- exo/models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/exo/models.py b/exo/models.py index fc0222eae..b3da1411a 100644 --- a/exo/models.py +++ b/exo/models.py @@ -13,7 +13,8 @@ "llama-3.2-1b": { "layers": 16, "repo": { - "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-1B-Instruct-4bit", "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-1B-Instruct", + "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-1B-Instruct-4bit", + "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-1B-Instruct", "TorchDynamicShardInferenceEngine": "unsloth/Llama-3.2-1B-Instruct" }, }, @@ -29,6 +30,7 @@ "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-3B-Instruct-4bit", "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-3B-Instruct", + "TorchDynamicShardInferenceEngine": "unsloth/Llama-3.2-3B-Instruct" }, }, "llama-3.2-3b-8bit": { From 1d7262d853ecd4047753bec4c994b5a4ed72f59e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 25 Jan 2025 19:58:37 -0900 Subject: [PATCH 580/589] fixing tok_embeddings --- exo/inference/torch/models/llama3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index b8159cfb2..98dae1498 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -274,9 +274,9 @@ def LlamaModel(config: dict, shard: Shard): layers = nn.ModuleList(layers) + tok_embeddings = nn.Embedding(config["vocab_size"], config["embed_dim"]) if len(re.findall(r"3\.2", shard.model_id)) > 0: print("Using TiedLinear") - tok_embeddings = nn.Embedding(config["vocab_size"], config["embed_dim"]) output_proj = ttm.TiedLinear(tok_embeddings) else: output_proj = nn.Linear(config["embed_dim"], config["vocab_size"], bias=False) From ec91e091dca6e1ec0641d9a764d4a8645c9dba17 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 26 Jan 2025 17:25:36 -0900 Subject: [PATCH 581/589] fixing caching setup --- exo/inference/torch/models/llm_utils.py | 1 - .../torch/sharded_inference_engine.py | 38 +++++++++++++++---- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index b0b37717b..d39c61d8e 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -267,7 +267,6 @@ def load_model_weights_torchtune( # if DEBUG >= 8: print("\n--- checking weights ----\n") - print(f"\nremapped_state_dict: {remapped_state_dict.keys()}\n") check_weights(model, remapped_state_dict) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index ea626021b..96acea665 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -53,6 +53,7 @@ def __init__(self, shard_downloader: HFShardDownloader): # cache settings self.use_cache = bool(os.getenv("TORCH_USE_CACHE", "True").lower() == "true") + self.cache_setup = False # device settings if os.environ.get("TORCH_DEVICE"): @@ -68,6 +69,20 @@ def __init__(self, shard_downloader: HFShardDownloader): self.rng = torch.Generator(device=self.device) self.rng.manual_seed(1234) + def setup_cache(self, batch_size: int=1, total_response_length: int=1024): + # setup cache + # this is needed for a primary node that gets the initial encoding + if not self.sharded_model.model.caches_are_enabled() and self.use_cache: + with self.device: + self.sharded_model.model.setup_caches( + batch_size, + self.model_config["torch_dtype"], + decoder_max_seq_len=total_response_length + ) + + self.cache_setup = True + + def clear_model(self): """ Clear out model and shard @@ -131,14 +146,7 @@ def encode_wrapper() -> np.ndarray: bsz, tklng = tokens.size() total_response_length = tklng + self.sharded_model.max_generated_tokens - # setup cache - if not self.sharded_model.model.caches_are_enabled() and self.use_cache: - with self.device: - self.sharded_model.model.setup_caches( - bsz, - self.model_config["torch_dtype"], - decoder_max_seq_len=total_response_length - ) + self.setup_cache(bsz, total_response_length) # setup max sequence length if not self.sharded_model.model.caches_are_enabled(): @@ -254,6 +262,20 @@ async def infer_tensor( device=self.device ) + if self.use_cache and not self.cache_setup: + if input_tensor is not None: + bsz, tklng = input_tensor.size() + self.setup_cache( + bsz, + tklng + self.sharded_model.max_generated_tokens + ) + else: + bsz, tklng = self.state.tokens.size() + self.setup_cache( + bsz, + tklng + self.sharded_model.max_generated_tokens + ) + def infer_wrapper(): if DEBUG >= 4: print(f"infer_wrapper called [{self.oom_cnt} OOM]") From a7757d305166d853452355c145db2f99a6f54d35 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 30 Jan 2025 21:48:23 -0900 Subject: [PATCH 582/589] adding qwen2 model, creating a general multihead attention transformal model to route needed model structure but same base mha structure, updated tests, updated inference engine, generalized some llm functions in llm_utils.py --- exo/inference/torch/models/general_mha.py | 251 +++++++++++ exo/inference/torch/models/llama3.py | 406 ------------------ exo/inference/torch/models/llm_utils.py | 388 ++++++++++++----- .../torch/sharded_inference_engine.py | 5 +- exo/inference/torch/tests/test_llama3_full.py | 102 +---- .../torch/tests/test_llama3_split.py | 154 ------- exo/inference/torch/tests/test_qwen_full.py | 246 +++++++++++ 7 files changed, 799 insertions(+), 753 deletions(-) create mode 100644 exo/inference/torch/models/general_mha.py delete mode 100644 exo/inference/torch/models/llama3.py delete mode 100644 exo/inference/torch/tests/test_llama3_split.py create mode 100644 exo/inference/torch/tests/test_qwen_full.py diff --git a/exo/inference/torch/models/general_mha.py b/exo/inference/torch/models/general_mha.py new file mode 100644 index 000000000..43616a3f8 --- /dev/null +++ b/exo/inference/torch/models/general_mha.py @@ -0,0 +1,251 @@ +""" +GeneralMHA class +Return transformer model with MHA +""" +import re + +from typing import Optional, Tuple + +import torch +import torch.nn as nn +import torchtune.modules as ttm + +from torchtune.modules import RMSNorm +from torchtune.models.llama3_1._position_embeddings import Llama3ScaledRoPE +from torchtune.models.qwen2._positional_embeddings import Qwen2RotaryPositionalEmbeddings +from torchtune.modules import RotaryPositionalEmbeddings +from exo.inference.shard import Shard +from exo.inference.torch.models.llm_utils import ( + layer_mlp, + ShardTransformerDecoder +) + +from exo.helpers import DEBUG + +def GeneralMHA( + config: dict, + shard: Shard +): + use_tied = False + attn_bias = config.get("attn_bias", False) + output_bias = config.get("attn_bias", False) + + if "llama" in shard.model_id or "Llama" in shard.model_id: + # rope scaling config + rope = Llama3ScaledRoPE( + dim=config["head_dim"], + max_seq_len=config["max_seq_len"], + base=config["rope_base"], + scale_factor=config["rope_scaling_factor"], + ) + + # tied needed for 3.2 llama models + if len(re.findall(r"3\.2", shard.model_id)) > 0: + use_tied = True + elif "qwen" in shard.model_id or "Qwen" in shard.model_id: + # rope scaling config + rope = Qwen2RotaryPositionalEmbeddings( + dim=config["head_dim"], + max_seq_len=config["max_seq_len"], + base=config["rope_base"] + ) + attn_bias = True + output_bias = False + + # tied needed for 0.5B qwen models + if len(re.findall(r"0\.5B", shard.model_id)) > 0: + use_tied = True + else: + rope = RotaryPositionalEmbeddings( + dim=config["head_dim"], + max_seq_len=config["max_seq_len"], + base=config["rope_base"] + ) + + print(f"rope: {rope}") + print(f"attn_bias: {attn_bias}") + print(f"output_bias: {output_bias}") + + # hack to align sharded weights with layers + # fill unused layer positions with None + layers = [None for _ in range(shard.n_layers)] + + # build layers + for i in range(shard.start_layer, shard.end_layer + 1): + self_attn = ttm.MultiHeadAttention( + embed_dim=config["embed_dim"], + num_heads=config["num_heads"], + num_kv_heads=config["num_kv_heads"], + head_dim=config["head_dim"], + q_proj=nn.Linear( + config["embed_dim"], + config["num_heads"]*config["head_dim"], + bias=attn_bias, + ), + k_proj=nn.Linear( + config["embed_dim"], + config["num_kv_heads"]*config["head_dim"], + bias=attn_bias, + ), + v_proj=nn.Linear( + config["embed_dim"], + config["num_kv_heads"]*config["head_dim"], + bias=attn_bias, + ), + output_proj=nn.Linear( + config["embed_dim"], + config["embed_dim"], + bias=output_bias, + ), + max_seq_len=config["max_seq_len"], + attn_dropout=config["attn_dropout"], + pos_embeddings=rope, + ) + + mlp = layer_mlp( + dim=config["embed_dim"], + hidden_dim=config["intermediate_dim"], + ) + + layer = ttm.TransformerSelfAttentionLayer( + attn=self_attn, + mlp=mlp, + sa_norm=RMSNorm(config["embed_dim"], eps=config["norm_eps"]), + mlp_norm=RMSNorm(config["embed_dim"], eps=config["norm_eps"]), + ) + + layers[i] = layer + + layers = nn.ModuleList(layers) + + tok_embeddings = nn.Embedding(config["vocab_size"], config["embed_dim"]) + if use_tied: + output_proj = ttm.TiedLinear(tok_embeddings) + else: + output_proj = nn.Linear(config["embed_dim"], config["vocab_size"], bias=False) + + norm = RMSNorm(config["embed_dim"], eps=config["norm_eps"]) + + return ShardTransformerDecoder( + tok_embeddings=tok_embeddings, + shard=shard, + layers=layers, + max_seq_len=config["max_seq_len"], + num_heads=config["num_heads"], + head_dim=config["head_dim"], + norm=norm, + output=output_proj, + num_layers=config["num_layers"], + ) + +class ShardedGeneralModel(nn.Module): + def __init__( + self, + config: dict, + shard: Shard, + device: Optional[torch.device] = None, + dtype: torch.dtype = torch.float16, + use_cache: Optional[bool] = False, + max_generated_tokens: int = 1024, + ): + super(ShardedGeneralModel, self).__init__() + + self.shard = shard + self.config = config + self.dtype = dtype + self.device = device if device is not None else torch.device("cpu") + self.max_seq_len = self.config["max_seq_len"] + self.use_cache = use_cache + + self.model = GeneralMHA( + config, + self.shard + ).to( + dtype=self.dtype, + device=self.device + ) + + if DEBUG >= 4: + print("ShardedGeneralModel called") + print(f"self.model {self.model}") + + # keep track of current position in generation + self.max_generated_tokens = max_generated_tokens + + def generate( + self, + tokens: Optional[torch.Tensor] = None, + mask: Optional[torch.Tensor] = None, + input_pos: Optional[torch.Tensor] = None, + hidden_state: Optional[torch.Tensor] = None, + curr_pos: Optional[int] = 0 + ) -> Tuple[ + Optional[torch.Tensor], + torch.Tensor, + ]: + """ + Generate logits and/or hidden_states from llama model + + Args + tokens (torch.Tensor) - tokens from prompt tokenization and generation + hidden_state (torch.Tensor, optional) - hidden state from last activated hidden layer, if any + """ + if DEBUG >= 4: + print("generate called") + print(f"tokens: {tokens}") + if mask is not None: + print(f"mask: {mask.size()}") + print(f"input_pos: {input_pos.size()}") + print(f"hidden_state: {hidden_state}") + print(f"curr_pos: {curr_pos}") + print(f"cached? {self.model.caches_are_enabled()}") + + model_hs = None + model_logits = None + + self.model.output_hidden_states = [self.shard.end_layer] + + if curr_pos > 0: + if self.model.caches_are_enabled(): + input_pos = input_pos[:, curr_pos].contiguous() + mask = mask[:, curr_pos, None, :].contiguous() + else: + input_pos = input_pos[:, :curr_pos + 1] + mask = mask[:, :curr_pos + 1, :curr_pos + 1] + else: + _, tklng = tokens.size() + + if self.model.caches_are_enabled(): + mask = mask[:, :tklng] + else: + mask = mask[:, :tklng, :tklng] + + input_pos = input_pos[:, :tklng].squeeze() + + if DEBUG >= 4: + print("model_input") + if tokens is not None: + print(f"tokens: {tokens}") + if hidden_state is not None: + print(f"hidden_state: {hidden_state}") + print(f"mask: {mask}") + print(f"input_pos: {input_pos}") + + + model_output = self.model( + tokens=tokens, + mask=mask, + input_pos=input_pos, + hidden_state=hidden_state, + dtype=self.dtype + ) + + if self.shard.is_last_layer(): + model_logits = model_output + else: + model_hs = model_output + + if DEBUG >= 4: + print(f"model_hs\n{model_hs}\nmodel_logits\n{model_logits}") + + return model_hs, model_logits diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py deleted file mode 100644 index 98dae1498..000000000 --- a/exo/inference/torch/models/llama3.py +++ /dev/null @@ -1,406 +0,0 @@ -""" -llama3 model - -Written with pytorch using torchtune and other methods -""" -import re - -from typing import Optional, Any, Tuple, List, Union, Callable - -import torch -import torch.nn as nn -import torchtune.modules as ttm -import torchtune.generation as ttg - -from torchtune.modules.attention_utils import _MaskType -from torchtune.modules import RMSNorm -# llama3 torchtune -from torchtune.models.llama3_1._position_embeddings import Llama3ScaledRoPE -# from torchtune.models.llama3._model_utils import scale_hidden_dim_for_mlp - -from exo.inference.shard import Shard -from exo.inference.torch.models.llm_utils import ( - llama3_mlp, - MultiLayerPreceptron, - # RMSNorm, -) - -from exo.helpers import DEBUG - - -class ShardTransformerDecoder(ttm.TransformerDecoder): - """ - ShardTransformerDecorder - Custom version of torchtune TransformerDecoder to allow for - sharding of models and passing of hidden layers between shards - """ - def __init__( - self, - *, - shard: Shard, - tok_embeddings: nn.Embedding, - layers: Union[nn.Module, List[nn.Module], nn.ModuleList], - max_seq_len: int, - num_heads: int, - head_dim: int, - norm: nn.Module, - output: Union[nn.Linear, Callable], - num_layers: Optional[int] = None, - output_hidden_states: Optional[List[int]] = None, - ): - super().__init__( - tok_embeddings=tok_embeddings, - layers=layers, - max_seq_len=max_seq_len, - num_heads=num_heads, - head_dim=head_dim, - norm=norm, - output=output, - num_layers=num_layers, - output_hidden_states=output_hidden_states, - ) - - self.shard = shard - - def setup_caches( - self, - batch_size: int, - dtype: torch.dtype, - *, - encoder_max_seq_len: Optional[int] = None, - decoder_max_seq_len: Optional[int] = None, - ): - """ - modified version for shard - - assume just decoder layers - """ - if decoder_max_seq_len is not None: - self.decoder_max_cache_seq_len = decoder_max_seq_len - else: - self.decoder_max_cache_seq_len = self.max_seq_len - - for layer in self.layers: - if layer is not None: - layer.setup_caches( - batch_size, - dtype, - encoder_max_seq_len=self.encoder_max_cache_seq_len, - decoder_max_seq_len=self.decoder_max_cache_seq_len, - ) - - def caches_are_enabled(self) -> bool: - """ - modified version for shard - """ - if self.layers[0] is not None: - return self.layers[0].caches_are_enabled() - else: - for layer in self.layers: - if layer is not None: - return layer.caches_are_enabled() - - return False - - def reset_caches(self): - torch.cuda.empty_cache() - - for layer in self.layers: - if layer is not None: - layer.reset_cache() - - def check_maxed_cache(self, tokens: torch.Tensor) -> bool: - """ - Check if cached is maxed out and needs to be reset - """ - active_layers = [x for x in self.layers if x is not None] - kv_cache = active_layers[0].attn.kv_cache - current_pos = kv_cache.cache_pos[0] + tokens.numel() + self.max_seq_len - k_shape = kv_cache.k_cache.shape[2] - - if DEBUG >= 4: - print(f"cache current_pos: {current_pos}\nk_shape: {k_shape}") - - if current_pos <= k_shape: - if DEBUG >= 4: - print("============ MAX CACHE REACHED CLEAR ==============") - - return True - - return False - - def forward( - self, - tokens: torch.Tensor, - *, - mask: Optional[_MaskType] = None, - input_pos: Optional[torch.Tensor] = None, - hidden_state: Optional[torch.Tensor] = None, - dtype: torch.dtype = torch.float16 - ) -> Union[torch.Tensor, List[torch.Tensor]]: - # Determine the type of input and shape - if DEBUG >= 4: - print("forward called") - if tokens is not None: - print(f"tokens [{tokens.shape}]: {tokens}") - print(f"mask: {mask}") - print(f"input_pos: {input_pos}") - - if hidden_state is not None: - print(f"hidden_state [{hidden_state.shape}]: {hidden_state}") - - if hidden_state is not None: - h = hidden_state # Use directly as hidden states - else: - seq_len = tokens.shape[1] - - self._validate_inputs( - seq_len, - mask=mask, - input_pos=input_pos, - ) - - fl_tokens = tokens.clone() - h = self.tok_embeddings(fl_tokens).to(dtype=dtype) # Apply token tok_embeddings - - # Initialize a list to capture hidden states if requested - # for captured hidden states - hidden = [] - curr_layers = [self.layers[i] for i in range(self.shard.start_layer, self.shard.end_layer + 1)] - for i, layer in enumerate(curr_layers): - if DEBUG >= 8: - print(f"\nhidden layer in H[{self.shard.start_layer+i}]\n{h}") - print(f"\nmask\n{mask}\ninput_pos\n{input_pos}") - print(f"\noutput_hidden_states\n{self.output_hidden_states}\n") - - if i in self.output_hidden_states: - hidden.append(h) - - # Process through each transformer layer - # with torch.no_grad(): - h = layer( - h, - mask=mask, - input_pos=input_pos, - ) - - - # if i in self.output_hidden_states: - # hidden.append(h) - - if DEBUG >= 8: - print(f"\nhidden layer out H[{self.shard.start_layer+i}]->H[{self.shard.start_layer+i+1}]\n{h}\n") - - if self.shard.is_last_layer(): - # Apply normalization - h = self.norm(h) - - # Handle chunked output if needed - output = self.output(h).float() - - if DEBUG >= 4: - print(f"\n\noutput {output}\n\n") - - return output - else: - if DEBUG >= 4: - print(f"\n\nhidden output {hidden[-1]}\n\n") - - return hidden[-1] - - -def LlamaModel(config: dict, shard: Shard): - """ - LlamaModel using torchtune - """ - - # rope scaling config - rope = Llama3ScaledRoPE( - dim=config["head_dim"], - max_seq_len=config["max_seq_len"], - base=config["rope_base"], - scale_factor=config["rope_scaling_factor"], - ) - - # hack to align sharded weights with layers - # fill unused layer positions with None - layers = [None for _ in range(shard.n_layers)] - - # build layers - for i in range(shard.start_layer, shard.end_layer + 1): - self_attn = ttm.MultiHeadAttention( - embed_dim=config["embed_dim"], - num_heads=config["num_heads"], - num_kv_heads=config["num_kv_heads"], - head_dim=config["head_dim"], - q_proj=nn.Linear( - config["embed_dim"], - config["num_heads"]*config["head_dim"], - bias=config["attn_bias"], - ), - k_proj=nn.Linear( - config["embed_dim"], - config["num_kv_heads"]*config["head_dim"], - bias=config["attn_bias"], - ), - v_proj=nn.Linear( - config["embed_dim"], - config["num_kv_heads"]*config["head_dim"], - bias=config["attn_bias"], - ), - output_proj=nn.Linear( - config["embed_dim"], - config["embed_dim"], - bias=config["attn_bias"], - ), - max_seq_len=config["max_seq_len"], - attn_dropout=config["attn_dropout"], - pos_embeddings=rope, - ) - - mlp = llama3_mlp( - dim=config["embed_dim"], - hidden_dim=config["intermediate_dim"], - ) - - layer = ttm.TransformerSelfAttentionLayer( - attn=self_attn, - mlp=mlp, - sa_norm=RMSNorm(config["embed_dim"], eps=config["norm_eps"]), - mlp_norm=RMSNorm(config["embed_dim"], eps=config["norm_eps"]), - ) - - layers[i] = layer - - layers = nn.ModuleList(layers) - - tok_embeddings = nn.Embedding(config["vocab_size"], config["embed_dim"]) - if len(re.findall(r"3\.2", shard.model_id)) > 0: - print("Using TiedLinear") - output_proj = ttm.TiedLinear(tok_embeddings) - else: - output_proj = nn.Linear(config["embed_dim"], config["vocab_size"], bias=False) - - norm = RMSNorm(config["embed_dim"], eps=config["norm_eps"]) - - return ShardTransformerDecoder( - tok_embeddings=tok_embeddings, - shard=shard, - layers=layers, - max_seq_len=config["max_seq_len"], - num_heads=config["num_heads"], - head_dim=config["head_dim"], - norm=norm, - output=output_proj, - num_layers=config["num_layers"], - ) - - -class ShardedLlamaModel(nn.Module): - def __init__( - self, - config: dict, - shard: Shard, - device: Optional[torch.device] = None, - dtype: torch.dtype = torch.float16, - use_cache: Optional[bool] = False, - max_generated_tokens: int = 1024, - ): - super(ShardedLlamaModel, self).__init__() - - self.shard = shard - self.config = config - self.dtype = dtype - self.device = device if device is not None else torch.device("cpu") - self.max_seq_len = self.config["max_seq_len"] - self.use_cache = use_cache - - # pad_id maually set as same in all llama models - self.pad_id = 128004 # from <|finetune_right_pad_id|> - - self.model = LlamaModel(config, self.shard).to(dtype=self.dtype, device=self.device) - - if DEBUG >= 4: - print("ShardedLlamaModel called") - print(f"self.model {self.model}") - - # keep track of current position in generation - self.max_generated_tokens = max_generated_tokens - - def generate( - self, - tokens: Optional[torch.Tensor] = None, - mask: Optional[torch.Tensor] = None, - input_pos: Optional[torch.Tensor] = None, - hidden_state: Optional[torch.Tensor] = None, - curr_pos: Optional[int] = 0 - ) -> Tuple[ - Optional[torch.Tensor], - torch.Tensor, - ]: - """ - Generate logits and/or hidden_states from llama model - - Args - tokens (torch.Tensor) - tokens from prompt tokenization and generation - hidden_state (torch.Tensor, optional) - hidden state from last activated hidden layer, if any - """ - if DEBUG >= 4: - print("generate called") - print(f"tokens: {tokens}") - if mask is not None: - print(f"mask: {mask.size()}") - print(f"input_pos: {input_pos.size()}") - print(f"hidden_state: {hidden_state}") - print(f"curr_pos: {curr_pos}") - print(f"cached? {self.model.caches_are_enabled()}") - - model_hs = None - model_logits = None - - self.model.output_hidden_states = [self.shard.end_layer] - - if curr_pos > 0: - if self.model.caches_are_enabled(): - input_pos = input_pos[:, curr_pos].contiguous() - mask = mask[:, curr_pos, None, :].contiguous() - else: - input_pos = input_pos[:, :curr_pos + 1] - mask = mask[:, :curr_pos + 1, :curr_pos + 1] - else: - _, tklng = tokens.size() - - if self.model.caches_are_enabled(): - mask = mask[:, :tklng] - else: - mask = mask[:, :tklng, :tklng] - - input_pos = input_pos[:, :tklng].squeeze() - - if DEBUG >= 4: - print("model_input") - if tokens is not None: - print(f"tokens: {tokens}") - if hidden_state is not None: - print(f"hidden_state: {hidden_state}") - print(f"mask: {mask}") - print(f"input_pos: {input_pos}") - - - model_output = self.model( - tokens=tokens, - mask=mask, - input_pos=input_pos, - hidden_state=hidden_state, - dtype=self.dtype - ) - - if self.shard.is_last_layer(): - model_logits = model_output - else: - model_hs = model_output - - if DEBUG >= 4: - print(f"model_hs\n{model_hs}\nmodel_logits\n{model_logits}") - - return model_hs, model_logits diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index d39c61d8e..fec9415bb 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -5,11 +5,12 @@ import re import json from pathlib import Path -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Union, List, Callable import torch import torch.nn as nn -from torchtune.modules import FeedForward +from torchtune.modules.attention_utils import _MaskType +from torchtune.modules import FeedForward, TransformerDecoder from torchtune.models.convert_weights import hf_to_tune from safetensors.torch import load_file as load_safetensors @@ -147,11 +148,10 @@ def load_model_weights_torchtune( if head_dim is None: head_dim = dim // num_heads - - model_state_dict = model.state_dict() - for name, _ in model_state_dict.items(): - print(f"name {name}") + if DEBUG >= 8: + for name, _ in model_state_dict.items(): + print(f"name {name}") # Load weights from safetensors files in the cache directory safetensors_files = list(cache_dir.glob("*.safetensors")) if not safetensors_files: @@ -171,74 +171,91 @@ def load_model_weights_torchtune( # remap to work with our model remapped_state_dict = {} - if "llama" in shard.model_id or "Llama" in shard.model_id: - for key, value in full_state_dict.items(): - # load layer by shard - for layer_num in range(shard.start_layer, shard.end_layer + 1): - # change input layer norm to sa_norm for torchtune - re_iln = re.findall(rf"model.layers\.{layer_num}\.(input_layernorm)\.weight", key) - if len(re_iln) != 0: - new_key = f"model.layers.{layer_num}.sa_norm.scale" + is_llama = True if "llama" in shard.model_id or "Llama" in shard.model_id else False + + if DEBUG >= 8 and is_llama: + print("loading llama type weights") + elif DEBUG >= 8 and not is_llama: + print("loading weights") + + for key, value in full_state_dict.items(): + # load layer by shard + for layer_num in range(shard.start_layer, shard.end_layer + 1): + # change input layer norm to sa_norm for torchtune + re_iln = re.findall(rf"model.layers\.{layer_num}\.(input_layernorm)\.weight", key) + if len(re_iln) != 0: + new_key = f"model.layers.{layer_num}.sa_norm.scale" + remapped_state_dict[new_key] = value + if DEBUG >= 8: + print(f"{key} == {new_key}") + + # change post attention layernorm to mlp_norm for torchtune + re_pal = re.findall(rf"model.layers\.{layer_num}\.(post_attention_layernorm)\.weight", key) + if len(re_pal) != 0: + new_key = f"model.layers.{layer_num}.mlp_norm.scale" + remapped_state_dict[new_key] = value + if DEBUG >= 8: + print(f"{key} == {new_key}") + + # change self_attn to attn + # along with changing o_proj to output_proj + re_attn = re.findall(rf"model\.layers\.{layer_num}.(\w+)\.(\w+)\.(\w+)", key) + if len(re_attn) != 0 and re_attn[0][0] == "self_attn": + if re_attn[0][1] == "k_proj" and is_llama: + value = _permute( + t=value, + n_heads=num_kv_heads, + head_dim=head_dim, + dim=dim + ) + + new_key = f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}" + remapped_state_dict[new_key] = value + elif re_attn[0][1] == "q_proj" and is_llama: + value = _permute( + t=value, + n_heads=num_heads, + head_dim=head_dim, + dim=dim + ) + new_key = f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}" remapped_state_dict[new_key] = value - if DEBUG >= 8: - print(f"{key} == {new_key}") - # change post attention layernorm to mlp_norm for torchtune - re_pal = re.findall(rf"model.layers\.{layer_num}\.(post_attention_layernorm)\.weight", key) - if len(re_pal) != 0: - new_key = f"model.layers.{layer_num}.mlp_norm.scale" + elif re_attn[0][1] == "o_proj": + new_key = f"model.layers.{layer_num}.attn.output_proj.weight" remapped_state_dict[new_key] = value - if DEBUG >= 8: - print(f"{key} == {new_key}") - - # change self_attn to attn - # along with changing o_proj to output_proj - re_attn = re.findall(rf"model\.layers\.{layer_num}.(\w+)\.(\w+)\.(\w+)", key) - if len(re_attn) != 0 and re_attn[0][0] == "self_attn": - if re_attn[0][1] == "k_proj": - value = _permute( - t=value, - n_heads=num_kv_heads, - head_dim=head_dim, - dim=dim - ) - - new_key = f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}" - remapped_state_dict[new_key] = value - elif re_attn[0][1] == "q_proj": - value = _permute( - t=value, - n_heads=num_heads, - head_dim=head_dim, - dim=dim - ) - new_key = f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}" - remapped_state_dict[new_key] = value - - elif re_attn[0][1] == "o_proj": - new_key = f"model.layers.{layer_num}.attn.output_proj.weight" - remapped_state_dict[new_key] = value - else: - new_key = f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}" - remapped_state_dict[new_key] = value - if DEBUG >= 8: - print(f"{key} == {new_key}") - - # set mlp weights - re_mlp = re.findall(rf"model\.layers\.{layer_num}.mlp.(\w+)\.(\w+)", key) - if len(re_mlp) != 0: - proj_name = re_mlp[0][0] - if proj_name == "up_proj": - proj_name = "w3" - elif proj_name == "down_proj": - proj_name = "w2" - elif proj_name == "gate_proj": - proj_name = "w1" - new_key = f"model.layers.{layer_num}.mlp.{proj_name}.weight" + else: + new_key = f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}" remapped_state_dict[new_key] = value - if DEBUG >= 8: - print(f"{key} == {new_key}") + if DEBUG >= 8: + print(f"{key} == {new_key}") + + # set mlp weights + re_mlp = re.findall(rf"model\.layers\.{layer_num}.mlp.(\w+)\.(\w+)", key) + if len(re_mlp) != 0: + proj_name = re_mlp[0][0] + if proj_name == "up_proj": + proj_name = "w3" + elif proj_name == "down_proj": + proj_name = "w2" + elif proj_name == "gate_proj": + proj_name = "w1" + new_key = f"model.layers.{layer_num}.mlp.{proj_name}.weight" + remapped_state_dict[new_key] = value + if DEBUG >= 8: + print(f"{key} == {new_key}") + # saving embed for paired weights + if key == "model.embed_tokens.weight": + remapped_state_dict["model.tok_embeddings.weight"] = value + if DEBUG >= 8: + print("model.embed_tokens.weight == model.tok_embeddings.weight") + + if key == "model.norm.weight": + remapped_state_dict["model.norm.scale"] = value + + if key == "lm_head.weight": + remapped_state_dict["model.output.weight"] = value # saving embed for paired weights if key == "model.embed_tokens.weight": remapped_state_dict["model.tok_embeddings.weight"] = value @@ -251,9 +268,6 @@ def load_model_weights_torchtune( if key == "lm_head.weight": remapped_state_dict["model.output.weight"] = value - else: - print(f"{shard.model_id} not supported for sharding, loading weights normally") - if not remapped_state_dict: model.load_state_dict(full_state_dict, strict=True) else: @@ -265,10 +279,164 @@ def load_model_weights_torchtune( # load new weight map model.load_state_dict(remapped_state_dict, strict=False) - # if DEBUG >= 8: - print("\n--- checking weights ----\n") - check_weights(model, remapped_state_dict) + if DEBUG >= 8: + print("\n--- checking weights ----\n") + check_weights(model, remapped_state_dict) + +class ShardTransformerDecoder(TransformerDecoder): + """ + ShardTransformerDecorder + Custom version of torchtune TransformerDecoder to allow for + sharding of models and passing of hidden layers between shards + """ + def __init__( + self, + *, + shard: Shard, + tok_embeddings: nn.Embedding, + layers: Union[nn.Module, List[nn.Module], nn.ModuleList], + max_seq_len: int, + num_heads: int, + head_dim: int, + norm: nn.Module, + output: Union[nn.Linear, Callable], + num_layers: Optional[int] = None, + output_hidden_states: Optional[List[int]] = None, + ): + super().__init__( + tok_embeddings=tok_embeddings, + layers=layers, + max_seq_len=max_seq_len, + num_heads=num_heads, + head_dim=head_dim, + norm=norm, + output=output, + num_layers=num_layers, + output_hidden_states=output_hidden_states, + ) + + self.shard = shard + + def setup_caches( + self, + batch_size: int, + dtype: torch.dtype, + *, + encoder_max_seq_len: Optional[int] = None, + decoder_max_seq_len: Optional[int] = None, + ): + """ + modified version for shard + + assume just decoder layers + """ + if decoder_max_seq_len is not None: + self.decoder_max_cache_seq_len = decoder_max_seq_len + else: + self.decoder_max_cache_seq_len = self.max_seq_len + + for layer in self.layers: + if layer is not None: + layer.setup_caches( + batch_size, + dtype, + encoder_max_seq_len=self.encoder_max_cache_seq_len, + decoder_max_seq_len=self.decoder_max_cache_seq_len, + ) + + def caches_are_enabled(self) -> bool: + """ + modified version for shard + """ + if self.layers[0] is not None: + return self.layers[0].caches_are_enabled() + else: + for layer in self.layers: + if layer is not None: + return layer.caches_are_enabled() + + return False + + def reset_caches(self): + torch.cuda.empty_cache() + + for layer in self.layers: + if layer is not None: + layer.reset_cache() + + def forward( + self, + tokens: torch.Tensor, + *, + mask: Optional[_MaskType] = None, + input_pos: Optional[torch.Tensor] = None, + hidden_state: Optional[torch.Tensor] = None, + dtype: torch.dtype = torch.float16 + ) -> Union[torch.Tensor, List[torch.Tensor]]: + # Determine the type of input and shape + if DEBUG >= 4: + print("forward called") + if tokens is not None: + print(f"tokens [{tokens.shape}]: {tokens}") + print(f"mask: {mask}") + print(f"input_pos: {input_pos}") + + if hidden_state is not None: + print(f"hidden_state [{hidden_state.shape}]: {hidden_state}") + + if hidden_state is not None: + h = hidden_state # Use directly as hidden states + else: + seq_len = tokens.shape[1] + + self._validate_inputs( + seq_len, + mask=mask, + input_pos=input_pos, + ) + + fl_tokens = tokens.clone() + h = self.tok_embeddings(fl_tokens).to(dtype=dtype) # Apply token tok_embeddings + + # Initialize a list to capture hidden states if requested + # for captured hidden states + hidden = [] + curr_layers = [self.layers[i] for i in range(self.shard.start_layer, self.shard.end_layer + 1)] + for i, layer in enumerate(curr_layers): + if DEBUG >= 8: + print(f"\nhidden layer in H[{self.shard.start_layer+i}]\n{h}") + print(f"\nmask\n{mask}\ninput_pos\n{input_pos}") + print(f"\noutput_hidden_states\n{self.output_hidden_states}\n") + + if i in self.output_hidden_states: + hidden.append(h) + + # Process through each transformer layer + h = layer( + h, + mask=mask, + input_pos=input_pos, + ) + + if DEBUG >= 8: + print(f"\nhidden layer out H[{self.shard.start_layer+i}]->H[{self.shard.start_layer+i+1}]\n{h}\n") + + if self.shard.is_last_layer(): + # Apply normalization + h = self.norm(h) + + # Handle chunked output if needed + output = self.output(h).float() + + if DEBUG >= 4: + print(f"\n\noutput {output}\n\n") + + return output + else: + if DEBUG >= 4: + print(f"\n\nhidden output {hidden[-1]}\n\n") + return hidden[-1] class MultiLayerPreceptron(nn.Module): def __init__(self, input_dim, hidden_dim, activation="silu", use_bias=False): @@ -300,36 +468,6 @@ def __init__(self, input_dim, hidden_dim, activation="silu", use_bias=False): def forward(self, x) -> torch.Tensor: return self.down_proj(self.act_fn(self.gate_proj(x))*self.up_proj(x)) - -class RMSNorm(nn.Module): - def __init__(self, hidden_size, eps=1e-6): - """ - RMSNorm - designed for llama model but used for other models - """ - super().__init__() - self.weight = nn.Parameter(torch.ones(hidden_size)) - self.eps = eps - - def forward(self, hidden_states): - input_dtype = hidden_states.dtype - hidden_states = hidden_states.to(torch.float32) - variance = hidden_states.pow(2).mean(-1, keepdim=True) - hidden_states = hidden_states*torch.rsqrt(variance + self.eps) - return self.weight*hidden_states.to(input_dtype) - - -def llama3_mlp(dim: int, hidden_dim: int) -> FeedForward: - """ - Build the MLP layer associated with the Llama model. - """ - gate_proj = nn.Linear(dim, hidden_dim, bias=False) - down_proj = nn.Linear(hidden_dim, dim, bias=False) - up_proj = nn.Linear(dim, hidden_dim, bias=False) - - return FeedForward(gate_proj=gate_proj, down_proj=down_proj, up_proj=up_proj) - - class ShardInferenceState: def __init__( self, @@ -368,4 +506,42 @@ def __str__(self) -> str: input_pos: {self.input_pos} mask: {self.mask} curr_pos: {self.curr_pos} - """ \ No newline at end of file + """ + +def layer_mlp(dim: int, hidden_dim: int) -> FeedForward: + """ + Generalized MLP layer + Ref: https://github.com/pytorch/torchtune/blob/main/torchtune/models/llama3_1/_component_builders.py#L124 + Ref: https://github.com/pytorch/torchtune/blob/main/torchtune/models/qwen2/_component_builders.py#L127C1-L134C82 + """ + gate_proj = nn.Linear(dim, hidden_dim, bias=False) + down_proj = nn.Linear(hidden_dim, dim, bias=False) + up_proj = nn.Linear(dim, hidden_dim, bias=False) + return FeedForward(gate_proj=gate_proj, down_proj=down_proj, up_proj=up_proj) + +""" +Llama utils +""" +def llama3_mlp(dim: int, hidden_dim: int) -> FeedForward: + """ + Build the MLP layer associated with the Llama model. + Ref: https://github.com/pytorch/torchtune/blob/main/torchtune/models/llama3_1/_component_builders.py#L124 + """ + gate_proj = nn.Linear(dim, hidden_dim, bias=False) + down_proj = nn.Linear(hidden_dim, dim, bias=False) + up_proj = nn.Linear(dim, hidden_dim, bias=False) + + return FeedForward(gate_proj=gate_proj, down_proj=down_proj, up_proj=up_proj) + +""" +Qwen utils +""" +def qwen2_mlp(dim: int, hidden_dim: int) -> FeedForward: + """ + Build the MLP layer associated with the Qwen2 model. + Ref: https://github.com/pytorch/torchtune/blob/main/torchtune/models/qwen2/_component_builders.py#L127C1-L134C82 + """ + gate_proj = nn.Linear(dim, hidden_dim, bias=False) + down_proj = nn.Linear(hidden_dim, dim, bias=False) + up_proj = nn.Linear(dim, hidden_dim, bias=False) + return FeedForward(gate_proj=gate_proj, down_proj=down_proj, up_proj=up_proj) \ No newline at end of file diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 96acea665..2c24aaa44 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -27,8 +27,7 @@ ShardInferenceState ) -# supported models -from exo.inference.torch.models.llama3 import ShardedLlamaModel +from exo.inference.torch.models.general_mha import ShardedGeneralModel # from torchtune generate recipe # https://github.com/pytorch/torchtune/blob/main/recipes/configs/generation.yaml#L40 @@ -399,7 +398,7 @@ def start_model(): if DEBUG >= 4: print("start_model called") - self.sharded_model = ShardedLlamaModel( + self.sharded_model = ShardedGeneralModel( config=self.model_config, shard=shard, device=self.device, diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index 0ba5ed384..ff1b62327 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -13,7 +13,7 @@ from transformers import AutoTokenizer -from exo.inference.torch.models.llama3 import ShardedLlamaModel +from exo.inference.torch.models.general_mha import ShardedGeneralModel from exo.inference.shard import Shard from exo.inference.torch.models.llm_utils import ( @@ -39,23 +39,11 @@ def main(model, prompt: str, device: torch.device = torch.device("cpu"), dtype: "content": prompt, }] - text = llama_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) - tok_out = llama_tokenizer([text], return_tensors="pt") + text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + tok_out = tokenizer([text], return_tensors="pt") print(f"tok_out: {tok_out}") tokens = tok_out.input_ids.to(device=device, dtype=torch.int) - # messages = [] - # messages.extend([ - # Message(role="system", content="You are a helpful and creative AI assistant."), - # Message(role="user", content=prompt), - # # Empty assistant message to kick-start generation - # Message(role="assistant", content=""), - # ]) - - # tokenizer_out = llama_tokenizer({"messages": messages}, inference=True) - # tokens = torch.tensor([tokenizer_out["tokens"]], dtype=torch.int, device=device) - - rng = torch.Generator(device=device) rng.manual_seed(RAND_SEED) @@ -85,12 +73,12 @@ def main(model, prompt: str, device: torch.device = torch.device("cpu"), dtype: # masking for proper attention # select correct pad_id - if hasattr(llama_tokenizer, "pad_id"): - pad_id = llama_tokenizer.pad_id - elif hasattr(llama_tokenizer, "pad_token_id"): - print(f"pad_token_id: {llama_tokenizer.pad_token_id}") - if llama_tokenizer.pad_token_id is not None: - pad_id = llama_tokenizer.pad_token_id + if hasattr(tokenizer, "pad_id"): + pad_id = tokenizer.pad_id + elif hasattr(tokenizer, "pad_token_id"): + print(f"pad_token_id: {tokenizer.pad_token_id}") + if tokenizer.pad_token_id is not None: + pad_id = tokenizer.pad_token_id else: pad_id = 0 else: @@ -122,17 +110,12 @@ def main(model, prompt: str, device: torch.device = torch.device("cpu"), dtype: print(f"init mask: {mask}") print(f"init input_pos: {input_pos}") - if model.model.caches_are_enabled(): - curr_mask = mask[:, :tokens_length] - else: - curr_mask = mask[:, :tokens_length, :tokens_length] - curr_pos = 0 _, logits = model.generate( tokens=tokens, - mask=curr_mask, - input_pos=input_pos[:, :tokens_length].squeeze(), + mask=mask, + input_pos=input_pos, curr_pos=curr_pos ) @@ -155,8 +138,7 @@ def main(model, prompt: str, device: torch.device = torch.device("cpu"), dtype: for i in range(MAX_NEW_TOKENS - 1): print(f"gen #{i+1}") - if tokens.item() == llama_tokenizer.eos_token_id: - # if tokens.item() in llama_tokenizer.stop_tokens: + if tokens.item() == tokenizer.eos_token_id: print("stop token hit!") break @@ -192,51 +174,7 @@ def main(model, prompt: str, device: torch.device = torch.device("cpu"), dtype: if not model.model.caches_are_enabled(): tokens = generated_tokens.clone() - print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(generated_tokens.tolist()[0])}\n\n\n") - - -def normal_full(model, user_prompt: str, device: torch.device = torch.device("cpu")): - # Tokenize input text - messages = [] - messages.extend([ - Message(role="system", content="You are a helpful and creative AI assistant."), - Message(role="user", content=user_prompt), - # Empty assistant message to kick-start generation - Message(role="assistant", content=""), - ]) - - tokenizer_out = llama_tokenizer({"messages": messages}, inference=True) - prompt = torch.tensor(tokenizer_out["tokens"], dtype=torch.int, device=device) - print(f"tokens prompt: {prompt}") - print(f"pad_id: {llama_tokenizer.pad_id}") - # messages = [{ - # "role": "assistant", - # "content": "", - # }, { - # "role": "user", - # "content": prompt, - # }] - - # text = llama_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) - # tok_out = llama_tokenizer([text], return_tensors="pt") - # prompt_tok = tok_out.input_ids.to(device=device) - # print(f"tokens prompt: {prompt_tok}") - - generated_tokens, _ = ttg.generate( - model=model.model, - prompt=prompt, - max_generated_tokens=MAX_NEW_TOKENS, - pad_id=llama_tokenizer.pad_id, - temperature=TEMP, - top_k=TOP_K, - stop_tokens=llama_tokenizer.stop_tokens, - ) - - generated_tokens = generated_tokens[:, -MAX_NEW_TOKENS:].tolist() - - print(f"generated_tokens: {generated_tokens}") - - print(f"\n\n[resp from model]\n\n{llama_tokenizer.decode(generated_tokens[0])}\n\n\n") + print(f"\n\n[resp from model]\n\n{tokenizer.decode(generated_tokens.tolist()[0])}\n\n\n") if __name__ == "__main__": @@ -265,15 +203,13 @@ def normal_full(model, user_prompt: str, device: torch.device = torch.device("cp ) # Initialize tokenizer - # llama_tokenizer_path = f"{cache_dir}/original/tokenizer.model" - # llama_tokenizer = llama3.llama3_tokenizer(path=llama_tokenizer_path) - llama_tokenizer = AutoTokenizer.from_pretrained(cache_dir) + tokenizer = AutoTokenizer.from_pretrained(cache_dir) # Initialize LlamaModel with config and tokenizer - device = torch.device("cuda") + # device = torch.device("cuda") dtype = torch.bfloat16 - # device = torch.device("cpu") - shard_model_1 = ShardedLlamaModel( + device = torch.device("cpu") + shard_model_1 = ShardedGeneralModel( config=config, shard=shard_1, device=device, @@ -296,6 +232,4 @@ def normal_full(model, user_prompt: str, device: torch.device = torch.device("cp ) import time - time.sleep(5) - # main(shard_model_1, prompt, device, config["torch_dtype"]) - # normal_full(shard_model_1, prompt, device) + main(shard_model_1, prompt, device, config["torch_dtype"]) diff --git a/exo/inference/torch/tests/test_llama3_split.py b/exo/inference/torch/tests/test_llama3_split.py deleted file mode 100644 index d6fe12785..000000000 --- a/exo/inference/torch/tests/test_llama3_split.py +++ /dev/null @@ -1,154 +0,0 @@ -""" -Test of pytorch based llama3 model -""" - -from pathlib import Path -import torch -from huggingface_hub import snapshot_download - -import torchtune.generation as ttg -from torchtune.models import llama3 -from torchtune.data import Message - - -from exo.inference.torch.models.llama3 import ShardedLlamaModel -from exo.inference.shard import Shard - -from exo.inference.torch.models.llm_utils import ( - load_model_config, - load_model_weights_torchtune, -) - - -MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" -TEMP = 0.6 -TOP_K = 35 -MAX_NEW_TOKENS=10 - - -def test_generation_1(shard_model, tokens): - """ - Test the generation capabilities of the LlamaModel with sample text. - """ - - hidden_states, _ = shard_model.generate(tokens) - - if hidden_states is not None: - print(f"hidden_states[{len(hidden_states)}]: {hidden_states}") - - return hidden_states - -def test_generation_2(shard_model, hidden_state): - print("Generate with the rest of layers") - print(f"in hidden_states {hidden_state.shape}: {hidden_state}") - - _, logits = shard_model.generate( - hidden_state=hidden_state - ) - - if logits is not None: - print(f"logits: {logits.shape}\n{logits}") - - # rand_sample = torch.empty(( - # logits.size(0), - # shard_model.model.tok_embeddings.num_embeddings - # ), - # device=logits.device - # ).exponential_(1, generator=None) - - tokens = ttg.sample( - logits=logits[:, -1].clone(), - temperature=TEMP, - top_k=TOP_K, - # q=rand_sample - ) - - print(f"tokens: {tokens}") - - return tokens - -if __name__ == "__main__": - print("\nTesting generation:") - - # prompt = "In a single word only, what is the last name of the current president of the USA?" - prompt = "In a single word only, what is the capital of france?" - - # Get the path to the model files from the Hugging Face cache - cache_dir = Path(snapshot_download(MODEL_NAME)) - - # Load model configuration - config = load_model_config(cache_dir / "config.json") - - # Setup shard - n_layers = int(config["num_layers"]) - s1_end = int(n_layers / 2) - shard_1 = Shard(model_id=MODEL_NAME, start_layer=0, end_layer=s1_end, n_layers=n_layers) - - shard_2 = Shard(model_id=MODEL_NAME, start_layer=s1_end + 1, end_layer=n_layers - 1, n_layers=n_layers) - - # Initialize tokenizer - llama_tokenizer_path = f"{cache_dir}/original/tokenizer.model" - llama_tokenizer = llama3.llama3_tokenizer(path=llama_tokenizer_path) - - # Initialize LlamaModel with config and tokenizer - device = torch.device("cuda") - shard_model_1 = ShardedLlamaModel( - config=config, - shard=shard_1, - device=device, - use_cache=False - ) - - load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) - - shard_model_2 = ShardedLlamaModel( - config=config, - shard=shard_2, - device=device, - use_cache=False - ) - - load_model_weights_torchtune(cache_dir, shard_2, shard_model_2) - - # Tokenize input text - messages = [] - messages.extend([ - Message(role="system", content="You are a helpful and creative AI assistant."), - Message(role="user", content=prompt), - # Empty assistant message to kick-start generation - Message(role="assistant", content=""), - ]) - - tokenizer_out = llama_tokenizer({"messages": messages}, inference=True) - print(f"tokenizer_out: {tokenizer_out}") - tokens = torch.tensor([tokenizer_out["tokens"]], dtype=torch.int).to(device=device) - - generated_tokens = tokens.clone().to(device=device) - - for i in range(MAX_NEW_TOKENS): - print(f"--------- gen #{i} ----------") - print(f"\n------------ {shard_1.start_layer} - {shard_1.end_layer} ----------\n") - - shard_1_hs = test_generation_1( - shard_model=shard_model_1, - tokens=tokens - ) - - print(f"\n out shard_1_hs {shard_1_hs}") - - print(f"\n------------ {shard_2.start_layer} - {shard_2.end_layer} ----------\n") - - tg2_token = test_generation_2(shard_model_2, shard_1_hs) - - if (tg2_token in llama_tokenizer.stop_tokens - or tg2_token == llama_tokenizer.eos_id): - print("hit stop token") - break - - generated_tokens = torch.cat([generated_tokens, tg2_token], dim=-1) - print(f"\ngenerated_tokens: {generated_tokens}") - - tokens = generated_tokens.clone() - -print("\n\n[resp from model]\n\n") -print(f"{llama_tokenizer.decode(generated_tokens.tolist()[0])}") diff --git a/exo/inference/torch/tests/test_qwen_full.py b/exo/inference/torch/tests/test_qwen_full.py new file mode 100644 index 000000000..93b320718 --- /dev/null +++ b/exo/inference/torch/tests/test_qwen_full.py @@ -0,0 +1,246 @@ +""" +Test of pytorch based llama3 models +full layer run +""" + +from pathlib import Path +import torch +from huggingface_hub import snapshot_download + +import torchtune.generation as ttg +from torchtune.models import llama3 +from torchtune.data import Message + +from transformers import AutoTokenizer + +from exo.inference.torch.models.qwen2 import ShardedQwenModel +from exo.inference.torch.models.general_mha import ShardedGeneralModel +from exo.inference.shard import Shard + +from exo.inference.torch.models.llm_utils import ( + load_model_config, + load_model_weights_torchtune +) + +MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct" +TEMP = 0.85 +TOP_K = 35 +MAX_NEW_TOKENS = 200 +RAND_SEED = 42 + + +def main( + model, + prompt: str, + device: torch.device = torch.device("cpu"), + dtype: torch.dtype = torch.bfloat16 +): + messages = [{ + "role": "assistant", + "content": "", + }, { + "role": "user", + "content": prompt, + }] + + text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + tok_out = tokenizer([text], return_tensors="pt") + print(f"tok_out: {tok_out}") + tokens = tok_out.input_ids.to(device=device, dtype=torch.int) + + rng = torch.Generator(device=device) + rng.manual_seed(RAND_SEED) + + generated_tokens = tokens.clone() + + print(f"tokens: {tokens}") + + bsz, tokens_length = tokens.size() + + # using self.max_seq_len will take up alot of VRAM + total_response_length = tokens_length + MAX_NEW_TOKENS + + # setup cache + if not model.model.caches_are_enabled(): + with device: + model.model.setup_caches( + bsz, + dtype, + decoder_max_seq_len=total_response_length + ) + + if not model.model.caches_are_enabled(): + max_seq_len = total_response_length + else: + max_seq_len = model.model.decoder_max_cache_seq_len + + # masking for proper attention + + # select correct pad_id + if hasattr(tokenizer, "pad_id"): + pad_id = tokenizer.pad_id + elif hasattr(tokenizer, "pad_token_id"): + print(f"pad_token_id: {tokenizer.pad_token_id}") + if tokenizer.pad_token_id is not None: + pad_id = tokenizer.pad_token_id + else: + pad_id = 0 + else: + pad_id = 0 + + print(f"pad_id: {pad_id}") + + padding_masks = tokens != pad_id + if not padding_masks.all(): + padding_masks = torch.nn.functional.pad( + padding_masks, + (0, MAX_NEW_TOKENS), + value=True, + ) + + mask = ttg.get_causal_mask_from_padding_mask(padding_masks, target_seq_len=max_seq_len) + + input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) + else: + mask = torch.tril(torch.ones( + total_response_length, + max_seq_len, + dtype=torch.bool, + device=device, + )).unsqueeze(0) + + input_pos = torch.arange(0, total_response_length, device=device).unsqueeze(0) + + print(f"init mask: {mask}") + print(f"init input_pos: {input_pos}") + + curr_pos = 0 + + _, logits = model.generate( + tokens=tokens, + mask=mask, + input_pos=input_pos, + curr_pos=curr_pos + ) + + curr_pos = tokens_length + + q = torch.empty(( + logits.size(0), + model.model.tok_embeddings.num_embeddings + ), device=logits.device).exponential_(1, generator=rng) + + tokens = ttg.sample( + logits=logits[:, -1].clone(), + temperature=TEMP, + top_k=TOP_K, + q=q + ) + + print(f"tokens: {tokens}") + + for i in range(MAX_NEW_TOKENS - 1): + print(f"gen #{i+1}") + + if tokens.item() == tokenizer.eos_token_id: + print("stop token hit!") + break + + tokens = tokens.view(1, -1).to(device=device) if tokens.ndim == 1 else tokens + + _, logits = model.generate( + tokens=tokens, + input_pos=input_pos, + mask=mask, + curr_pos=curr_pos + ) + + curr_pos += 1 + + q = torch.empty( + ( + logits.size(0), + model.model.tok_embeddings.num_embeddings + ), device=logits.device).exponential_(1, generator=rng) + + tokens = ttg.sample( + logits=logits[:, -1].clone(), + temperature=TEMP, + top_k=TOP_K, + q=q, + ) + + print(f"tokens: {tokens}") + + generated_tokens = torch.cat([generated_tokens, tokens], dim=-1) + print(f"generated_tokens: {generated_tokens}") + + if not model.model.caches_are_enabled(): + tokens = generated_tokens.clone() + + print(f"\n\n[resp from model]\n\n{tokenizer.decode(generated_tokens.tolist()[0])}\n\n\n") + + +if __name__ == "__main__": + # prompt = "Hello, how are you?" + prompt = "Tell me a joke." + # prompt = "What is the meaning of exo?" + # prompt = "Tell me a short 4 line haiku" + # prompt = "In a single word only, what is the last name of the current president of the USA?" + + # Get the path to the model files from the Hugging Face cache + cache_dir = Path(snapshot_download(MODEL_NAME)) + print(f"Cache directory: {cache_dir}") + + # Load model configuration + config = load_model_config(cache_dir/"config.json") + + print(f"current config\n{config}") + + # Setup shard + n_layers = int(config["num_layers"]) + shard_1 = Shard( + model_id=MODEL_NAME, + start_layer=0, + end_layer=n_layers - 1, + n_layers=n_layers, + ) + + # Initialize tokenizer + tokenizer = AutoTokenizer.from_pretrained(cache_dir) + + # Initialize LlamaModel with config and tokenizer +# device = torch.device("cuda") + dtype = torch.bfloat16 + device = torch.device("cpu") + # shard_model_1 = ShardedQwenModel( + # config=config, + # shard=shard_1, + # device=device, + # dtype=config["torch_dtype"], + # use_cache=True, + # max_generated_tokens=MAX_NEW_TOKENS, + # ) + shard_model_1 = ShardedGeneralModel( + config=config, + shard=shard_1, + device=device, + dtype=config["torch_dtype"], + use_cache=True, + max_generated_tokens=MAX_NEW_TOKENS, + ) + + print(f"\nshard_model_1: {shard_model_1}") + + # load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) + load_model_weights_torchtune( + cache_dir=cache_dir, + shard=shard_1, + model=shard_model_1, + num_heads=config["num_heads"], + num_kv_heads=config["num_kv_heads"], + dim=config["embed_dim"], + head_dim=config["head_dim"] + ) + + main(shard_model_1, prompt, device, config["torch_dtype"]) From 1431d489a568b5b003b5529fa3e9aaff2a5d871f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 30 Jan 2025 21:49:47 -0900 Subject: [PATCH 583/589] removing duplicate mlp for single layer_mlp --- exo/inference/torch/models/llm_utils.py | 27 ------------------------- 1 file changed, 27 deletions(-) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index fec9415bb..0d3212c16 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -517,31 +517,4 @@ def layer_mlp(dim: int, hidden_dim: int) -> FeedForward: gate_proj = nn.Linear(dim, hidden_dim, bias=False) down_proj = nn.Linear(hidden_dim, dim, bias=False) up_proj = nn.Linear(dim, hidden_dim, bias=False) - return FeedForward(gate_proj=gate_proj, down_proj=down_proj, up_proj=up_proj) - -""" -Llama utils -""" -def llama3_mlp(dim: int, hidden_dim: int) -> FeedForward: - """ - Build the MLP layer associated with the Llama model. - Ref: https://github.com/pytorch/torchtune/blob/main/torchtune/models/llama3_1/_component_builders.py#L124 - """ - gate_proj = nn.Linear(dim, hidden_dim, bias=False) - down_proj = nn.Linear(hidden_dim, dim, bias=False) - up_proj = nn.Linear(dim, hidden_dim, bias=False) - - return FeedForward(gate_proj=gate_proj, down_proj=down_proj, up_proj=up_proj) - -""" -Qwen utils -""" -def qwen2_mlp(dim: int, hidden_dim: int) -> FeedForward: - """ - Build the MLP layer associated with the Qwen2 model. - Ref: https://github.com/pytorch/torchtune/blob/main/torchtune/models/qwen2/_component_builders.py#L127C1-L134C82 - """ - gate_proj = nn.Linear(dim, hidden_dim, bias=False) - down_proj = nn.Linear(hidden_dim, dim, bias=False) - up_proj = nn.Linear(dim, hidden_dim, bias=False) return FeedForward(gate_proj=gate_proj, down_proj=down_proj, up_proj=up_proj) \ No newline at end of file From 7ad4b1c9c5385626d130670b93437a3ec900d472 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 30 Jan 2025 22:17:09 -0900 Subject: [PATCH 584/589] fixes to general mha for detecting to use_tied or not --- exo/inference/torch/models/general_mha.py | 15 +++++++++------ exo/models.py | 6 +++++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/exo/inference/torch/models/general_mha.py b/exo/inference/torch/models/general_mha.py index 43616a3f8..f3589c345 100644 --- a/exo/inference/torch/models/general_mha.py +++ b/exo/inference/torch/models/general_mha.py @@ -40,7 +40,7 @@ def GeneralMHA( ) # tied needed for 3.2 llama models - if len(re.findall(r"3\.2", shard.model_id)) > 0: + if "3.2" in shard.model_id: use_tied = True elif "qwen" in shard.model_id or "Qwen" in shard.model_id: # rope scaling config @@ -53,7 +53,7 @@ def GeneralMHA( output_bias = False # tied needed for 0.5B qwen models - if len(re.findall(r"0\.5B", shard.model_id)) > 0: + if "0.5B" in shard.model_id or "0.5b" in shard.model_id: use_tied = True else: rope = RotaryPositionalEmbeddings( @@ -61,10 +61,13 @@ def GeneralMHA( max_seq_len=config["max_seq_len"], base=config["rope_base"] ) - - print(f"rope: {rope}") - print(f"attn_bias: {attn_bias}") - print(f"output_bias: {output_bias}") + + if DEBUG >= 4: + print(f"model_id: {shard.model_id}") + print(f"rope: {rope}") + print(f"attn_bias: {attn_bias}") + print(f"output_bias: {output_bias}") + print(f"use_tied: {use_tied}") # hack to align sharded weights with layers # fill unused layer positions with None diff --git a/exo/models.py b/exo/models.py index b3da1411a..c1a45c229 100644 --- a/exo/models.py +++ b/exo/models.py @@ -143,7 +143,10 @@ ### qwen "qwen-2.5-0.5b": { "layers": 28, - "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-0.5B-Instruct-4bit",}, + "repo": { + "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-0.5B-Instruct-4bit", + "TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-0.5B-Instruct" + }, }, "qwen-2.5-1.5b": { "layers": 28, @@ -256,6 +259,7 @@ "deepseek-v3": "Deepseek V3", "deepseek-r1": "Deepseek R1", "llava-1.5-7b-hf": "LLaVa 1.5 7B (Vision Model)", + "qwen-2.5-0.5b": "Qwen 2.5 0.5B", "qwen-2.5-1.5b": "Qwen 2.5 1.5B", "qwen-2.5-coder-1.5b": "Qwen 2.5 Coder 1.5B", "qwen-2.5-3b": "Qwen 2.5 3B", From 76e141ac87e94bb28c6d40345837ca6620837d63 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 30 Jan 2025 23:04:51 -0900 Subject: [PATCH 585/589] adding new shard download method, adding all llama models for torch --- exo/inference/torch/sharded_inference_engine.py | 4 ++-- exo/models.py | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 2c24aaa44..a21e2de77 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -17,7 +17,7 @@ from transformers import AutoTokenizer from exo.inference.inference_engine import InferenceEngine -from exo.download.hf.hf_shard_download import HFShardDownloader +from exo.download.shard_download import ShardDownloader from exo.inference.shard import Shard from exo.inference.tokenizers import _resolve_tokenizer from exo.helpers import DEBUG @@ -38,7 +38,7 @@ class TorchDynamicShardInferenceEngine(InferenceEngine): """ Pytorch based inferece engine for sharded models """ - def __init__(self, shard_downloader: HFShardDownloader): + def __init__(self, shard_downloader: ShardDownloader): self.shard = None self.shard_downloader = shard_downloader self.sharded_model = None diff --git a/exo/models.py b/exo/models.py index 7653ebdb3..9c9ea19cc 100644 --- a/exo/models.py +++ b/exo/models.py @@ -23,6 +23,7 @@ "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-1B-Instruct-8bit", "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-1B-Instruct", + "TorchDynamicShardInferenceEngine": "unsloth/Llama-3.2-1B-Instruct" }, }, "llama-3.2-3b": { @@ -38,6 +39,7 @@ "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-3B-Instruct-8bit", "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-3B-Instruct", + "TorchDynamicShardInferenceEngine": "unsloth/Llama-3.2-3B-Instruct", }, }, "llama-3.2-3b-bf16": { @@ -45,6 +47,7 @@ "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-3B-Instruct", "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-3B-Instruct", + "TorchDynamicShardInferenceEngine": "unsloth/Llama-3.2-3B-Instruct", }, }, "llama-3.1-8b": { @@ -52,7 +55,7 @@ "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", "TinygradDynamicShardInferenceEngine": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", - "TorchDynamicShardInferenceEngine": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", + "TorchDynamicShardInferenceEngine": "unsloth/Meta-Llama-3.1-8B-Instruct", }, }, "llama-3.1-70b": { @@ -60,6 +63,7 @@ "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", "TinygradDynamicShardInferenceEngine": "NousResearch/Meta-Llama-3.1-70B-Instruct", + "TorchDynamicShardInferenceEngine": "unsloth/Meta-Llama-3.1-70B-Instruct", }, }, "llama-3.1-70b-bf16": { @@ -67,6 +71,7 @@ "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-70B-Instruct-bf16-CORRECTED", "TinygradDynamicShardInferenceEngine": "NousResearch/Meta-Llama-3.1-70B-Instruct", + "TorchDynamicShardInferenceEngine": "unsloth/Meta-Llama-3.1-70B-Instruct", }, }, "llama-3-8b": { @@ -74,6 +79,7 @@ "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3-8B-Instruct-4bit", "TinygradDynamicShardInferenceEngine": "TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R", + "TorchDynamicShardInferenceEngine": "TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R", }, }, "llama-3-70b": { @@ -81,11 +87,15 @@ "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3-70B-Instruct-4bit", "TinygradDynamicShardInferenceEngine": "TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-70B-R", + "TorchDynamicShardInferenceEngine": "TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-70B-R", }, }, "llama-3.1-405b": { "layers": 126, - "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-405B-4bit",}, + "repo": { + "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-405B-4bit", + "TorchDynamicShardInferenceEngine": "unsloth/Meta-Llama-3.1-405B-Instruct-bnb-4bit", + }, }, "llama-3.1-405b-8bit": { "layers": 126, From 85d25c101d3d239bf5e6f6bd91a084280c874780 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 30 Jan 2025 23:15:25 -0900 Subject: [PATCH 586/589] adding torch support for qwen models --- exo/inference/torch/models/general_mha.py | 2 - exo/models.py | 65 ++++++++++++++++++----- 2 files changed, 52 insertions(+), 15 deletions(-) diff --git a/exo/inference/torch/models/general_mha.py b/exo/inference/torch/models/general_mha.py index f3589c345..8a9be51a1 100644 --- a/exo/inference/torch/models/general_mha.py +++ b/exo/inference/torch/models/general_mha.py @@ -2,8 +2,6 @@ GeneralMHA class Return transformer model with MHA """ -import re - from typing import Optional, Tuple import torch diff --git a/exo/models.py b/exo/models.py index 9c9ea19cc..6b69cdab2 100644 --- a/exo/models.py +++ b/exo/models.py @@ -162,55 +162,94 @@ }, "qwen-2.5-1.5b": { "layers": 28, - "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-1.5B-Instruct-4bit",}, + "repo": { + "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-1.5B-Instruct-4bit", + "TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-1.5B-Instruct" + }, }, "qwen-2.5-coder-1.5b": { "layers": 28, - "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-1.5B-Instruct-4bit",}, + "repo": { + "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-1.5B-Instruct-4bit", + "TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-Coder-1.5B-Instruct" + }, }, "qwen-2.5-3b": { "layers": 36, - "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-3B-Instruct-4bit",}, + "repo": { + "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-3B-Instruct-4bit", + "TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-3B-Instruct" + }, }, "qwen-2.5-coder-3b": { "layers": 36, - "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-3B-Instruct-4bit",}, + "repo": { + "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-3B-Instruct-4bit", + "TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-Coder-3B-Instruct" + }, }, "qwen-2.5-7b": { "layers": 28, - "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-7B-Instruct-4bit",}, + "repo": { + "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-7B-Instruct-4bit", + "TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-7B-Instruct" + }, }, "qwen-2.5-coder-7b": { "layers": 28, - "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-7B-Instruct-4bit",}, + "repo": { + "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-7B-Instruct-4bit", + "TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-Coder-7B-Instruct" + }, }, "qwen-2.5-math-7b": { "layers": 28, - "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Math-7B-Instruct-4bit",}, + "repo": { + "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Math-7B-Instruct-4bit", + "TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-Math-7B-Instruct" + }, }, "qwen-2.5-14b": { "layers": 48, - "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-14B-Instruct-4bit",}, + "repo": { + "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-14B-Instruct-4bit", + "TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-14B-Instruct" + }, }, "qwen-2.5-coder-14b": { "layers": 48, - "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-14B-Instruct-4bit",}, + "repo": { + "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-14B-Instruct-4bit", + "TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-Coder-14B-Instruct" + }, }, "qwen-2.5-32b": { "layers": 64, - "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-32B-Instruct-4bit",}, + "repo": { + "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-32B-Instruct-4bit", + "TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-32B-Instruct" + }, }, "qwen-2.5-coder-32b": { "layers": 64, - "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-32B-Instruct-4bit",}, + "repo": { + "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-32B-Instruct-4bit", + "TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-Coder-32B-Instruct" + }, }, "qwen-2.5-72b": { "layers": 80, - "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-72B-Instruct-4bit",}, + "repo": { + "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-72B-Instruct-4bit", + "TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-72B-Instruct" + }, }, "qwen-2.5-math-72b": { "layers": 80, - "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Math-72B-Instruct-4bit",}, + "repo": { + "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Math-72B-Instruct-4bit", + "TorchDynamicShardInferenceEngine": "Qwen/Qwen2.5-Math-72B-Instruct" + }, }, ### nemotron "nemotron-70b": { From 57b43f76261f04b1c594961e28798e6366c19556 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 30 Jan 2025 23:28:35 -0900 Subject: [PATCH 587/589] updating torch to 2.6.0 latest stable --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d97a5a089..3b0268a9a 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ "uuid==1.30", "uvloop==0.21.0", "tinygrad @ git+https://github.com/tinygrad/tinygrad.git@ec120ce6b9ce8e4ff4b5692566a683ef240e8bc8", - "torch==2.5.1", + "torch==2.6.0", "accelerate==0.34.2", "torchtune==0.5.0", "torchao==0.8.0", From 611bffb31f26832b8d2a304c62e4188314c2e203 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 31 Jan 2025 21:45:28 -0900 Subject: [PATCH 588/589] test for mistral support --- .../torch/tests/test_mistral_full.py | 236 ++++++++++++++++++ exo/inference/torch/tests/test_qwen_full.py | 14 +- 2 files changed, 238 insertions(+), 12 deletions(-) create mode 100644 exo/inference/torch/tests/test_mistral_full.py diff --git a/exo/inference/torch/tests/test_mistral_full.py b/exo/inference/torch/tests/test_mistral_full.py new file mode 100644 index 000000000..54c0c8dee --- /dev/null +++ b/exo/inference/torch/tests/test_mistral_full.py @@ -0,0 +1,236 @@ +""" +Test of pytorch based mistral models +full layer run +""" + +from pathlib import Path +import torch +from huggingface_hub import snapshot_download + +import torchtune.generation as ttg + +from transformers import AutoTokenizer + +from exo.inference.torch.models.general_mha import ShardedGeneralModel +from exo.inference.shard import Shard + +from exo.inference.torch.models.llm_utils import ( + load_model_config, + load_model_weights_torchtune +) + +MODEL_NAME = "unsloth/mistral-7b-instruct-v0.3-bnb-4bit" +TEMP = 0.85 +TOP_K = 35 +MAX_NEW_TOKENS = 200 +RAND_SEED = 42 + + +def main( + model, + prompt: str, + device: torch.device = torch.device("cpu"), + dtype: torch.dtype = torch.bfloat16 +): + messages = [{ + "role": "assistant", + "content": "", + }, { + "role": "user", + "content": prompt, + }] + + text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + tok_out = tokenizer([text], return_tensors="pt") + print(f"tok_out: {tok_out}") + tokens = tok_out.input_ids.to(device=device, dtype=torch.int) + + rng = torch.Generator(device=device) + rng.manual_seed(RAND_SEED) + + generated_tokens = tokens.clone() + + print(f"tokens: {tokens}") + + bsz, tokens_length = tokens.size() + + # using self.max_seq_len will take up alot of VRAM + total_response_length = tokens_length + MAX_NEW_TOKENS + + # setup cache + if not model.model.caches_are_enabled(): + with device: + model.model.setup_caches( + bsz, + dtype, + decoder_max_seq_len=total_response_length + ) + + if not model.model.caches_are_enabled(): + max_seq_len = total_response_length + else: + max_seq_len = model.model.decoder_max_cache_seq_len + + # masking for proper attention + + # select correct pad_id + if hasattr(tokenizer, "pad_id"): + pad_id = tokenizer.pad_id + elif hasattr(tokenizer, "pad_token_id"): + print(f"pad_token_id: {tokenizer.pad_token_id}") + if tokenizer.pad_token_id is not None: + pad_id = tokenizer.pad_token_id + else: + pad_id = 0 + else: + pad_id = 0 + + print(f"pad_id: {pad_id}") + + padding_masks = tokens != pad_id + if not padding_masks.all(): + padding_masks = torch.nn.functional.pad( + padding_masks, + (0, MAX_NEW_TOKENS), + value=True, + ) + + mask = ttg.get_causal_mask_from_padding_mask(padding_masks, target_seq_len=max_seq_len) + + input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) + else: + mask = torch.tril(torch.ones( + total_response_length, + max_seq_len, + dtype=torch.bool, + device=device, + )).unsqueeze(0) + + input_pos = torch.arange(0, total_response_length, device=device).unsqueeze(0) + + print(f"init mask: {mask}") + print(f"init input_pos: {input_pos}") + + curr_pos = 0 + + _, logits = model.generate( + tokens=tokens, + mask=mask, + input_pos=input_pos, + curr_pos=curr_pos + ) + + curr_pos = tokens_length + + q = torch.empty(( + logits.size(0), + model.model.tok_embeddings.num_embeddings + ), device=logits.device).exponential_(1, generator=rng) + + tokens = ttg.sample( + logits=logits[:, -1].clone(), + temperature=TEMP, + top_k=TOP_K, + q=q + ) + + print(f"tokens: {tokens}") + + for i in range(MAX_NEW_TOKENS - 1): + print(f"gen #{i+1}") + + if tokens.item() == tokenizer.eos_token_id: + print("stop token hit!") + break + + tokens = tokens.view(1, -1).to(device=device) if tokens.ndim == 1 else tokens + + _, logits = model.generate( + tokens=tokens, + input_pos=input_pos, + mask=mask, + curr_pos=curr_pos + ) + + curr_pos += 1 + + q = torch.empty( + ( + logits.size(0), + model.model.tok_embeddings.num_embeddings + ), device=logits.device).exponential_(1, generator=rng) + + tokens = ttg.sample( + logits=logits[:, -1].clone(), + temperature=TEMP, + top_k=TOP_K, + q=q, + ) + + print(f"tokens: {tokens}") + + generated_tokens = torch.cat([generated_tokens, tokens], dim=-1) + print(f"generated_tokens: {generated_tokens}") + + if not model.model.caches_are_enabled(): + tokens = generated_tokens.clone() + + print(f"\n\n[resp from model]\n\n{tokenizer.decode(generated_tokens.tolist()[0])}\n\n\n") + + +if __name__ == "__main__": + # prompt = "Hello, how are you?" + prompt = "Tell me a joke." + # prompt = "What is the meaning of exo?" + # prompt = "Tell me a short 4 line haiku" + # prompt = "In a single word only, what is the last name of the current president of the USA?" + + # Get the path to the model files from the Hugging Face cache + cache_dir = Path(snapshot_download(MODEL_NAME)) + print(f"Cache directory: {cache_dir}") + + # Load model configuration + config = load_model_config(cache_dir/"config.json") + + print(f"current config\n{config}") + + # Setup shard + n_layers = int(config["num_layers"]) + shard_1 = Shard( + model_id=MODEL_NAME, + start_layer=0, + end_layer=n_layers - 1, + n_layers=n_layers, + ) + + # Initialize tokenizer + tokenizer = AutoTokenizer.from_pretrained(cache_dir) + + # Initialize LlamaModel with config and tokenizer +# device = torch.device("cuda") + dtype = torch.bfloat16 + device = torch.device("cpu") + + shard_model_1 = ShardedGeneralModel( + config=config, + shard=shard_1, + device=device, + dtype=config["torch_dtype"], + use_cache=True, + max_generated_tokens=MAX_NEW_TOKENS, + ) + + print(f"\nshard_model_1: {shard_model_1}") + + # load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) + load_model_weights_torchtune( + cache_dir=cache_dir, + shard=shard_1, + model=shard_model_1, + num_heads=config["num_heads"], + num_kv_heads=config["num_kv_heads"], + dim=config["embed_dim"], + head_dim=config["head_dim"] + ) + + main(shard_model_1, prompt, device, config["torch_dtype"]) diff --git a/exo/inference/torch/tests/test_qwen_full.py b/exo/inference/torch/tests/test_qwen_full.py index 93b320718..2e4f55d12 100644 --- a/exo/inference/torch/tests/test_qwen_full.py +++ b/exo/inference/torch/tests/test_qwen_full.py @@ -1,5 +1,5 @@ """ -Test of pytorch based llama3 models +Test of pytorch based qwen2 models full layer run """ @@ -8,12 +8,9 @@ from huggingface_hub import snapshot_download import torchtune.generation as ttg -from torchtune.models import llama3 -from torchtune.data import Message from transformers import AutoTokenizer -from exo.inference.torch.models.qwen2 import ShardedQwenModel from exo.inference.torch.models.general_mha import ShardedGeneralModel from exo.inference.shard import Shard @@ -213,14 +210,7 @@ def main( # device = torch.device("cuda") dtype = torch.bfloat16 device = torch.device("cpu") - # shard_model_1 = ShardedQwenModel( - # config=config, - # shard=shard_1, - # device=device, - # dtype=config["torch_dtype"], - # use_cache=True, - # max_generated_tokens=MAX_NEW_TOKENS, - # ) + shard_model_1 = ShardedGeneralModel( config=config, shard=shard_1, From 0523893ea32e02e7eafc4b87bdb7b5ee45963544 Mon Sep 17 00:00:00 2001 From: divinity76 Date: Fri, 7 Feb 2025 17:26:16 +0100 Subject: [PATCH 589/589] --help list --interence-engine=torch --- exo/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/main.py b/exo/main.py index deee2fae5..38094366e 100644 --- a/exo/main.py +++ b/exo/main.py @@ -81,7 +81,7 @@ def configure_uvloop(): parser.add_argument("--chatgpt-api-port", type=int, default=52415, help="ChatGPT API port") parser.add_argument("--chatgpt-api-response-timeout", type=int, default=900, help="ChatGPT API response timeout in seconds") parser.add_argument("--max-generate-tokens", type=int, default=10000, help="Max tokens to generate in each request") -parser.add_argument("--inference-engine", type=str, default=None, help="Inference engine to use (mlx, tinygrad, or dummy)") +parser.add_argument("--inference-engine", type=str, default=None, help="Inference engine to use (torch, mlx, tinygrad, or dummy)") parser.add_argument("--disable-tui", action=argparse.BooleanOptionalAction, help="Disable TUI") parser.add_argument("--run-model", type=str, help="Specify a model to run directly") parser.add_argument("--prompt", type=str, help="Prompt for the model when using --run-model", default="Who are you?")

0)Jy{|q)XY$Ou?`eUA_)BCh3;FR$LCqX$m5wo`Y#X`S@2oU~*xBW09 zB;VJT#z3+y;1%^6_g2%?|N1drT^ay0Gn{o>X<|!qva;2aKaH$Se7Yh&Cp=o@0KinI zymi{pcf{OFySm)`BO%O^?f1U%tUuCkNBloW5|n9{R{i92*=OJ*q(}Ad9Y6`@Idk+9 zS^7*geG?isWvRQmC<6M`WEpH`Yk{F-LzEdpm^AZup4Y__Wj%W!LHs zIVAIU>!{#)I=;-HpNpqtB4~J7Ir(1jye#%~S_UUBoPTV+$9;J=_TFnE!Ty_%s0k*! z6Y+Kb4BgymG~1T^H#afV{ajNwJ`rtcCcw~ay70Zjb}{&5@pXPE<_t4&^|Q_igGaj3 zK5_uA6_QV4D8=6REc->tNb^BjTS;`WBV#$V{{3BB2;W0jeA*uQYee@eS;xq46NCL7 zzD-)^@-N{kh{EWw7zGr;Fi=Loh!?{F6wtogV4@7w2vY)kiw6LKz$H`x6i>#lK@0#5 zAceq;qGyn-$^(MMqfY=# z4#jOLeQe?<54j6XbJ@?F8zMhofv+V7%L8?&TShsHpKd-_Oc`!!k>Vgyq8{`ks&pza zGai=Hw0bhoL`rfJa@sAnzCUz-PzdB)nhD&l_aZu)O||B;Xp}!qqIYP#ufm6C(L;5*^!`-I~(vVNzBQlpR^8`}`znkYH(1*eKA0k_1*~&-zGSCIhKn_em9Rn$_07MT+0k*;aU<5M? z#COjLj!*aQ-wnCj}i+2zqYXXVi}%#h{BU+Fy5*Z+gZYJ8Mr*X2UNf@VC$`A%lk{OTU+X(uTk_VJ13x4I{!$#I56`8yPVApxf+@H1D!ardS0 zt%t3Y*|kblWvFb4^D2%45k|uGl;m|M<47Pb6d-M<%`azuSC<{l`V{upe|zEZ-#jZD z7IMGRx!Guugs>QNu6@O2JeL6NN&9zHzUEV6BEjZP9f{rUY86-8PpuC+H|yBa$JW}8 zwi`{y;>dLAn?{fiASK%#FoXj1aobunIJz1+6(m*uN_XG0T}x= z{cHR+K-KOB=G=WDh|Cj1GjnCibnWT@Z#x07#VkJ9c5KvIek|@5T6Ab$EghPoV$frr{X=B* zI$fOO)Guff4GxNAX?^@_!ZKRaNMFSdne4PqRB`g>q>`%2M{q>`x0dv_@fFU>kSZ9_vXE{vM4peQY?prohq!HXgx*1U*EczP03<<3tR3GXRlqGKTE zXfmVMqlW(aXu%5Ap_NAD_NWDiQS+93bdm?P{98s6Gfcq-GSmlu2c%4lsnb`SXoP?n zwmEuMPNSH|!YCAwlZOU#NCPKQ33pqE@ZaSg_5CRWXEue@#mv$cz9$J4(Q4Oi1zgPR z?hV-75VbeNt8hVCS-httYOgvP5~vbiNZ7MLm`S}je?SI|PjPJL1OM-@ZucpdYgY2z zaXFSlcqn6X(Ny>W|56dzqtXYRCVIBU$kW|uQoOUz}Px z7~<)QUrYII7K~{n(`P5|Izkm86%js&t6{~e1^1=tZ~{{P*}2`6ZgW{C0Sr3Qk8mB^uO<`Td5njKhPh)r-(q(r zb{PP3v71c*76L6y2Mt6)BQg*WXM_f$LAGxvRw8ZR4;mApB297}Bdfe2Y2xO`0an|5 zitVF&D(@Cu7ikDFF(o$mL`1OIk`ki0pY`CCpZz8#1ub1{BoP3YJ<5vDYCYdjI{B-|04gYUk|kIHZoJJS6;@|e(ojV*lQXINzj!kc5g$9if$jj ze%!v;+aMac;%TRsYjt+H34|Qp&)7Dor5rkXf`b*#?!`iQMVJ0)YbgEbjG4f90`wJW z0b#?6J+?~KB1N-g!b0SM(0v*dUO0$6H4O!jDs2jpee0PGR0q)^uJqlR|pk7?0?ts-;0SJmSQt){J3eOGuyvPq?Vh- zCOtNTf1JxG!x$m^=J`f61opd;^ZUvN6Kh}Xve6pS`@MT+p9d=2}|HTI@Q#tL}31uNI(s3s7j6Ow)~o*3FL zsR#&4oDMDqh79vk&0{IzwBYdng|IagklYPes;G@2>j%Fm7(UVC)4Of4Gx7lO>Og;R%yvZcOKh;Hgtp)Z7pCq zKdh-hg0WPY4vk)Ys}qTT@D#Zs+xR-l?f5pLGSC6Q@GQ~tq`4g&${}=pGm_TQaDRX= zcT`+-+_FC!-a-O9Hyy>bmgsZ2W%IJ35nUE!qpkB0=R$xzpmiZ+!vWhhp*?z~3Z_t2qGn?h@+T zG4pj8*x~%iRFl<7w?Z2|gY<|DwLdtqAu`+f#zuY4L089_Whvb!T))S@Vi`%q(nA0x z-T8Nj#kRKjfgOX;#jjhc371u2zo!X?6y!EF-15{EV3pXGF8|OS`&l&7#ZWgEV8vH2 zma;$E`~FSp?~77!z}X{`sNToOi7dImIzdrb~FY^+6AS>2ZR0cgyf=ib-ptG2bGsDYh;||p}3!sw}(4dZGSG}kHcxJ zJ~bLv8qSv+`i9Wr*q3B3+gbVvU#J22l-%2Dd3U~n3wl?=1 zhO?xv-_okBELVDtUH1)~2$mXaqCTwr#tu!_UHN5o7HK|J(i-ElaCbExTd&7F17jDX zV5L)^nV}`?a`v;%WCz`mbZCV*n@xs+g@DI1n$O-=rMhCsfrX+1-sdd8-0OjP0^Rg% zc7A#(n2ipokV_@{pb;;(l0d0%@hLLpd@E#yd%yJ7!}A(>B5VYukgg%GG{$&uYfhz1 ztz{rE0Dq}gL4nPAC7b&W?H7QTK_RZ?(|*iEHS}~P zqB!EU9_xnlKhD#W*%u7V)Va8{li@XtI@X@?@QjIxi#Bq*u~9^Kk={ZJ^k_)M6tc1i z3M{$tu6B`TA%hR5XxC3ZeVd#he>xx?=~YcxOLz*xzPMNI&SF4b_XiHlY#$N`rbb6N&aYZK*8bnW2Qwb*2u4wcw%}<%)XM%Y zt*&7d^;*YzyhcmNUBR*=v8=3h~$08-LeEu)Gl)=KruyQ;_B$TU5)Zzc71d;REy+dkI5XHBub zBMA~R<>HQ|;x{%aX1A}i3?-N~9n8qY-#F%;)QWEXu|!Xf=8AN;81ggLOubcWaXu_D zT)?EVy4&68Y)kF)=q%?GebIbL6LS3j9c}Q{s4uig_UJAkEG#hca^rd66Crr(y1wU4 zn=oQbeaZulE>%n8H7#&{B!c*ll5#uJZOt<`iH+}XeKEhTSy2!gC1K0rgUaa9hc0id zmqilZGBw6kbDH+~Gw`eE+ZX5$QC_r-p0Bus)<;I zB5r$6pk{eKzoq~b?=<12z5rx0Vy=Pw(??sAHI*fA6b`bnXE1G37KJppcUOOB?Bk+R z1q^=ygv!54VvrT}$rjr1Cejs_t_LIHmp`91ShqdJ8L{4MkjuIeJ1D7%r^+$~=Kpw3 zZspM#w;i7(R~1*+|4>`F@XC1Spb^DkR8B_f5xrMR$jCw@^kuo0zOZL<-n6jy*1R^w ze={n|0vZsmIv>1~qkk9LfMe)Oz-jJp&|TUtmo7jPO`~BH(g9QVXvZC7DR`X!`dw<@ zXCZ`b?sSs;=sR~$D&j^WDvop>r=G~PcaoZBSUGw;@e4Nh_`*$_?S%p_IWbT7BcB9J zN}5d0lUlfA_(*7pNgz$%2Jm~5(Rf5AO)zmy?&wHT+I~M{Tk<4EvYv!!J<6~ zOzls^8OYfH@MkvtvHNzJL?0d{_30Qg~%lf?zNXooz}O{rTVka?PfKjo*^gx5&EnK#C*%^ zWm0tya`RmVX}U}{kyJp&Bhqh2dJO&L-P(a`J&U32b_#QMo!R6L%s zS=}2yi$s|yBIw6;9^2cgvF)ljpD4wHsQ1&?G@t3#g+-}e=6Y_Bnu{dOcr6i4qF+k` z3k~BrPDa~3vseY6=Z+#z?vywXbE3Kdj{iXiApet$On9i3q~~zIR7~1*#dE*3u|*za zDq2@I@?NK1qV}W1ZB^o2Rx!P-a?YQLhI3WsNL&G*p|bKUZ|}oLk{kG$jvIsNw@dC; z6{FW{=H*H>?=c@>G~pQRTzFEn%J6ZN{St-%KdWwuR*exA!l#|m)7+9ghKGpFiJkRpG_#hcDHbERvpDBRX#oc!`u zgGQNg_mjjl-(!u%-_eqMnY`(z3v(4$T}qU=F~X2BdKM*Okn!AVpA0*TqvA_-Q3xui zz1;auXbZ9+}Q(yhK^_RT{?fb^RLL6i5@{CQHa z=UYn;?Wc+C%~s??04OP+^S-^bfk6gh*zPVaHw~cysO|GZz2~V^wBHn{Vj)&%VjIMv zhHM#>6GUet8AzV}4bQ6lUoQyzPlB<2Hqcr1pI|0a0NLZLw_1XW;1T6yv5WA_jVOZ- z_YT=+^$B220pT(Es9A^gtGlnRtt7~D5xWzUuf`)XgB*{rwYMWz>Y9c1Ge&)oj1QE# zo7p9A5*?K#VcHm||NaK@f`3&78#n#KFUnVzlp$!F@P*udg(Fj|JjKdN@6;Hqx?5N0 z5@_HD41C1>{KDv}uu_N&guNgsasP~$syCif$>CTqv+O4k;EV+bBVycmrGH|CW;|eb zTtUAb%J7P}?Q=)rn$Aaj)BS{28AXrIQDL{>b)d9b0lLC_^3TKkc7Ku;q1oB6lY15y zwT8W|o+uy-c0A^6w*Qk~q;WG{(z$eS-VRStgD9imsdCYfTRnG(|NfB6(esF7A+r;c zqrWM#VWTPt`?g|P%Q0=v8MopOK8O$W=WTz5nN3Xt$LmJ^AN5ba&41TNZ|1TWDBuZZ zA9m<6J75;3uH*cq;-MqiH6VJ{29u8?i>~=>9pneVr&v1(>lvs#(QNfqQnQsT+9tuy z{_3qhykaVErX@MytH^?=Q_v6HcGAS?4rL<>K?0WFT0Pu|kSnib_GaV`nYJ{c{rq?~ z8tYrzW+3e2&-Wv1JZp6m?Oe2BcH%A>JatNjuk%u4`jNu^Eptsd^!)N(d*h+0lV0d( zn)(A{Y5)M~OD#1j5n}p+v}G!>yGJ~~VsEEJ`>YUn6ra=h!CIX{!DvQVdlPM9auLfR zV$``eYi|^dGwgq%X69z2`|(Q*6v#^gpZ$N;*>CwyjfaE12wf&ryPI7BK$tOH-Zy5= zT~)of?a4t~a^;QAJLI11K2v}ux5L@e%o8eNk5BLUVf$e=sSpcxc@gg`@0aanAt5{u z)-#I?X$I4&8D38uP2T$>6KeuF#lBeU>RuH9`YtZHoXOP4oGg6d__j(%P~Y_WW8^UsJpD9}NU9f1;Vej2k+Bg!GLGk(~9J3p3#<200zm%IM{qlb;&= z2f`})M#U5&1VT_*iXtCAXdv2ix<^8C%V+vnSWUOp3CjS>?HmN?H%J0=m4cJIKmrHj zuS=e*!?)(z7u$PP>AZIK{p(0ZCb<4*?vN=zQ+|^rmV-oa;nU%kFwn_m zNQv@Rp1YMewdSbjN~1v$ds_Yd+*uEnC_@P&6DG3GeSe04#n3LH$kT$HNUvi>u9K22 zCx9d~NAz<0RD82hSL$}M^+~>MW!d*$6o>0baLpM@wAPS?{CF#0-}g+rqFcy-&8^tx zF_;J&5XKIhX;@Anc(|uvVm>e$&oA+$Dr_yx4e4LmQzHW`-V$$^sU~_elap6Q zO>>?Kc}y32;kFj6DOOqwp~<4-tY|xs>SUO4tmhUpl`~I)M#WRtGP>pN({nzUGo*i{ z=L3O#pn!pyk-`^&5pXvD$;jxkR1Iwihm0tTl3vvr~LUlVb&1(zh>tm758U zfggOt-RHC&F+o3xRyk}A<0rau6>ZqZs7&8ZOz6-A;piW+Sf4w?EOZ4`GpB z7kDWl?3DkB!XciG(tib26X%>XSey*+vIj*Nd2v17Fo2w&ly%StSy`*kaX(uw#{zY6 zqSjZ*207HDjDiw6(RUWT5zRuTFE_^mcu+t+@6^kkaRzmYH~m+!mgOGXz3H*egT6(M zOYxKl%20uJ`%g3(IecHY-JH&BqA(F#pM zS%Vp9HtJSw?#d=MD9BgBdx*2Xjb66Dg>_wzC!EOpw0ZP2YOdl^9zj{`FvoN^qFej4 z>k${AdAc)$&byNF5j$rAJ5UW=oQ1?p&zYvQcY83QFUC-UiDBbl<;m){Jb+Hc(-maZ zz%@Kv=g)O9>+-}~5Rk${eDABDe>eByD?bHTA|n=`GvqG8uB>?&qlZHg@Ev68A1d}DiMaE z_zzxrZPqoqZhPGXw!&A^g0nfK!0ES?9 z*0FFbo$Wiu<#TMtZX(Y2U{hpGrZm1DkJEcxDD{MU4jxsJJ$5+Z@r!UwXe*s1@iDuV zY{7ix@gW@eoo?MU5R_k^UkE}Uf8cC|-6ThO=f}&>%3Rn0DGMbAgmMxf07~D3du+5@ z>w*F9@AcN3$J%#VboH9!mvFx&^}-R}(?=o*7o>N7ZoIgpJ&6i|%p5>>j7BH%VxMZT zTk%nAR*FU91pD64_aC1L869JrP<>kk&mSV~s}>1#-tOAE{0`nM92h5j*qN!Q2(Zszvl42Q^sCwuaYgd`DxH>`IvFp9 zY9sR=nkKFi*Tb97PTKzDC4?V%C^916iQ8IpBl*Y-=NI$wz9hiTijooE_iAMp(lCr^ zw46&Fb+_h>pUZ4bZgec(e7llxP3NXVE&`hP$j5GQv2pFjgj5X3?1HJ_$4+lY{LUU@ zN1ztkbTyNMz)+-DTEm~I$GV=K$qWGi)bGhOPP=epJ5JaXWB~}8K}C_+8Z3bT@p0$YJliy3NMtNtK9u>$7`tr z^1pZ(X;_c}OP=?RahhizNp&7B41d?gRpMOf?-|w6l}ZTMG`C#G8MTP}zYce@vC|5} zn^P(8oDlH3&(1jcs&g77F$~_B*l)Pt!8s0bYw2dV))LB(@aJa_O?y?OSdoL=eTGg7 zhxo6$-m=}bev?bjec#v6I zmBnS{o0+LDwJcj|s>1oMzA5zG({YXR>(nbAqxxOYSHQH7XX zUR-+nHFIm%RxLH^W8Tnvy*BmOE}g6>1)X{l{#{pJq$Lrmgo|MWvZ2!}8y^d2&}<0)9Y@fn!0^y{wm`zu{LGNOj68BkBDshaIApKe#?)vS?W1)Lhr`oI?IRuxJl*twR#iysbtl*RmR@$Q%MQ-2%63;lY zizmKVN+@mHtS}9ax|2Zx7bfh9OO$!)ra_M5JA0E*faeu=A<|^7oh*J*xRN5RXK9FT zTi_M}e=aMrP6_6=4JMB2PcEMQ;HvfO5wE+Cl9Fy!>{i6+9xm=W-6JXATNa_oCM2XH zk+Mmh`*|!yG=>Hp;o}ueCyN>IUspHj_RLOx+c|j4CaTYT2c|h&-essQ1}B`sVvN_h zofN!Sp}cc(s8*Qi1HYlCec)Ym24XM^U|QV@)q<7>(P_*bvkcm` zzq4u{cp`J^aVdlILt%9Bu~Xt?pa4=~XGZeE;Mtjx0*~-Pjsre5Me#Z&TQj-E}Uo) zk=FkQ%P2ouaHNWpdpx_oJ0QhJ@vRYtTob(F?2qzCp<#CIWlcH^qC?X;lKBRa2hu<) z9XRaH$*Lzy0TU5gob&c|+5L6QJZV~S^#MFwUe1Bu={0h+a%?Xhi+$gJ2Z7@0u+>O# zD2@#4++SLbQD_#cp7&i@l7u$A895m>2eqsy?d47N7s z%W$^dyZQJQj9}Pmv}TYAr+#H%=wGltsy1x92(Ot6bL=J;d!|d<7=4h`i^1X_v3%CR zMiy|2t^_TI{BWMJ)bSfOWrKpvVAFs$08KfhtDy?#LnN0r3i?!PmvodUwKQ=<7qZZc zYb%OkiQ6RMB`NER{Y$4ZQ{r*CM#9u`^sowDPGLVZHRLR;9GbpJc!-&s{`5?Q5#y;s zgb<$8DvG&e@KHP6EZoBx2YcZUkoS}mp0UwMk&$>Z=S|MjvC*iL%?r0Eg`zsYatc;d zY$|-WIzWUT2u6hg0zN~1H+41vp$bcMP4!K)N_^5%V+=llZ$ZZ3Emo5?5Wi@b>85Um zyDOFda%hDc^OJ9TvLHI5C|$E4+w!0W!1vKcM-u zdtI^kQ8E42jCGbm?oze;8!LG$2n_?rOS@`A|KgQ@QJ%&AFKnzpx}&5W@*wKbob zo3_hNYgh8cNn1H=Oc+?yjm(mp@dy}Fe1MNTk`5K_&v0!#O;-RE!Vi2t<+dMIQ{s0 zVbR0yxbEe-&+C5d^-A)pv_o}`j*j`#yY2BN$F%{Cw_HKgz`@FVT*3z&t<4C*igfe? zBrdG)F#ykg0x3k7cmLGa_0f<~^0;q@g>xGrh7`)h({uL!;&#|8T%WB#(YJHxC{hR~ zczIvoNU^WEoI+rsYn)^LoAxOua{}!1!yP6~#>=OMnY$;vvuz+L`v~HlXdO-631wbmUf3jqkF9!t-g?s!OXRS-Y*vFi2gB-nrAClsnPKCMkNt^`%=5N|Cq6V&fTS=M<67rZYO2><{mrfHZQ_>gQZ z)y2v>vxD*b$=~q>T#f_)S3kGHwV#ul(^l+<^D&=V*l5>(7WM@OLm(NdIpO#+g`LYq z0zh;WTuH@|vN3!W_k_d5kmJHWMd^{E4VR&#^Ft#GgCG-w0M%rYRFiY;jVla>6qjK5MvDAfb|}Q`f2I3osCTO z@g`=(vrr?GxvH+3;jy$;@{5Y8oOLtR&U&&Do~Uoxrw!Y_tTA~qXd7vCmAVpAaDN}6L6qxw+`fT;x9$sfA9!6rMhM2w^}fqc zQea8Uu#$LlQCf&TDw5<@!5lufK3;*l5kHl>sc`J2^^YQ@x2pYN1Drm=;{3Fa4|^QWGArKWp9N?}M`*23h24mj=K@h7g^$al7cNvMi$D+* zuy0h%&&d~!ms)~kni_=KXwu8Rus8xnZyVY`26QeH(qpa)1B(yeTDxvyVeBn4aR8*2 z&iKNrO)0PqQ|?|XyEWP39{K`Zv#gfXat|tOKQjlWZPFzUHB;dUTQ`n&Vt33YhG`4E7Fa*vNeTLIau?XR_G?MYA$p{ zQLgqHo4mqnzz+{rV`{&Weq7fWg|U-RJ8NuPVrbzX?Vte)2`SHX-_A6Y2O*v51!N*6 zW^nK&G=KMbZsb5}`k8!U@?mj|6owwkXekL7cv7_@ks!i#!j?Ec-Cvxn{gsr@TB2Jp z%Bg-jE-|pMkz~I@VW^FIJL3G>t+PMH{ZpYvme}7g z?1`e2FJ&(|tbbH(`o39+E9+0oXnp=!>W*V+WqKR`$sAti*kie&jXYcQ-fzx^TGuM3 zC7}W4jvOiukqZ^}nqCkUYSt&L6a%XGOAkvjZ2ujF|511!*1e-ccV%X&xShVw^3J~` z6^u68INmWMmwpjo(4<76-#d8OCpMG*Azbe3YcJ)|wa$0W2{}KaFr#p{I#>7H;PTv` zm{u|s4MvqGqlz;)!6eZ(l~-&sn{St!msOME-o+XYQ*#_=EF-AUG54Et+a+Ur?fzCz z8>1e1o%mxUF0L*RZ?dstarL=>XN67DK*#ah?OyZcPOb42-Cl6OL#x(c7PHH+w45UC z*_qBqH#Yk7V!wA()IF54fZq&wc=$WAF!y9WjKav5@A=9i?~Hv`pB>5S#12RZdCgRBI_*RuP_Jib;MC0dp#5-X@*rQeoXI zHTbr*gR6Z>GdYbVtA z93Zy$)Pf$L*lI{Ov$9=L#St#B`wgeey|+6FCX|)|X^UGWii_a7GqiIl5PF|F$Y#cj zH{$MEchfX`(cfPW3DIu{zF6E%Z0Iky!AWoXIGqOxu< zPmQb+<@H`U*eW)*$>*`O_Xh{U;4tf;@CA#eABI=DKnT(2y!VMa`)aJ+1-JepR~LUC zsp!_ZrUM~i5)9F|^tdCw-*~uYmZ>V)F zKjl~-vi0aQGe2`+1C`$;_Yyw?|Lq=*%PFg>ny{rnlv7mku@o|K?%KSb`v0;>%$@il zPn9SOdWQd3d>Z%t_P;tFqc#HMr!K}-t&>@X5^%kWTdPcbBJ43um43t z+mG7yxl#4H2(9mUcEkjVEd()%yHZ3=Nwcvp11TCLYg${MysSs0awsnf6l)NO?>y{I zr^P-f^-MqEU=DiDws)RYIag0MavN8>)4st$&@WWRM+iCib5!@MdvPRUDo74nUA;&A ze{D{BSmCOR9VlveDc2wpvCwesbY)1lWI!Ioo|7n5=Kb-J3z)O!)=`qL{~P4D{e1J<)%7isio ze{--LGf3C|&Um)3Jh zz^9GqM>q4w$fw`7dnzuXYC3lh*SUolK(_QnMdioJm*;GQDaq@YHx~xMnTx`A&w^Jg z>hW9p&o4JFa@0fxeh6$x4j)l4^gI8eZi8eK0aWoF4o2FG#MV|^ZH}=D$sOWu)jZ1X?Bjc*r zUW^^MLUD^4H$P$y#z(uD-LOWDX};eFY@aLt)I7&u)u#m>)(L{qUU<566O&Fi`{JEGPa*me z(|s`Gmn}GyYc1-+?9;WUQ-gzHnNCm3(FC;z*!A?a`q$UNv}KM3f)Gmm!qA+^ z59pdgKzX+I>B42NmP8l$qbprBi>ou?-atZI%gL*?L5ZwGf)!ghu@d*q$uKFT+JO`~ zA}uYv-W|flHlmbD3XJW@BHFs#ZMKk~9u|m=EP+YF^{{2RCIuaSGt)hjd9L}MLE0)) z8yZMhVDvc$H??KKk9sV4vp-uK->wV2PLx`#9KdknmRB{6p~`UMdfqI3+Q!iw4z;TD zbjHSOUz+}aQy_OUs$WHYdr3P^|J8t~v6s(#VX(~jgmvOr2o{{s-raq9tW6XK;1@WMLSJC(cA07E^+h2nHSCgF#ALB2v!HS{ z@aAZ;vRo6Qu6-x!8<59_Byk-ti!A$C5v4IyX3{)@uQofvU;V*(I+iv5=!2s1Q?)G% zVAT;mB%jW!MITOHqb4r|BOGMZcw+XntE=QKxo=Ks^bu5-WpH$|wn3|fS8p47R5-G) z;(7AuWC?4B6sUvzPwDob)P3AM0M-T5+R2~5kfxFQ;SVkreugZKDVv0KU$y}C$Kvxz zyO%gk%qmi020Cfm0~^G5>{%y>czf(^V|%VR{!?23M1i$9lHbm@Y2#g{eS2O@$ZOdn zS)}$!`-z-KYJI#Tjd6Lj@}Z}xFp_K#)pHb~$OnNtH~ew0H`A^{a#8J*z&5_089ub?d<$UYkzacI8Pxl^yAU6=DH@A zBn~a4&n*AAk7jl|s|gK|d4uL3y~~=q^nLR?%5P;mzh#=i>*QsV^6m80(M(=H`ebm{ZbD!ZmYN#@De%DerK8kf`_8gXM#voUuMKXjHC*bV zQL^j^K>6qinCRlxH*2m|Guh07>2Da(iTHBWYaO1Yk?#$cVKH27&^_Hcp& zt7hw9_C*uwwxyvkwgv2U#I%foSWg47YyUSy0SQn4dU82q@J64l2RF70&J+4baAhYW zh+6PqpoE9$&1IwMRNeu?v0Fkqpshss5$~%zzs%EX+?8Lqb9hipad`yM z#HOlc@E)F^&Uv7LsEMMKsPMUt*`kq{NS9*#K*)KrjJY6>`t+k-vL6*v`3v=|!Vz8Q zYX_X+g1 z=kSH3IFU!Zf#!*L56?c0(BpzhRW9=;!x5XFLFq;oj>s_S4_5T`O2qV4?;{ z_awW1hhxGkHlVI17|xacswog$;@Y*WfDm`qSHUF*6KputIBzVUW`&s3OLY!omN)>O zPqpuV{>0gr0Dk(A?DQB`%ou+Dm^b&lg+rojoYq;L2apPEFB z{i44#R;a;+J{6OGSLkvEHq033`&)78eNx{@k3(p3!d&+#-|Ph^3Z);jw_Y6agE zof*ZH(D*d=e40JemIOuB!ii~12A_i^wcpwC<+Sx(@@yx9rLsbEQfCsPJS%14xvsWo zrEM=70=6z+nybIfNW3pj)Lze_{Jc6k`m>_SMbX)6h7C#E(jgQ2s_yMUB?s-UWN#lV zB?V6X071S?Hfs7ui#Wl@qv|TU2!%S;gSUD=P4p}YTVGRWPB^-qEQ-;e-; zPk3uW@1!8Is+;-qJQgV?5i42iFvZepGW$MmkVHH&`HiWtRWbKcBsVXNFherLL=SJ} z(V0zcryyaUrwD314ATk8eu?Rr?1M}`PrJNgCB(aQgwPI$;MU5X28@EL39SqzY-DRA z{Q;$*{LWVAnHDRivRo3XX#Wdz&vLq~pIa#@yNcl|Xx=1gEy8vcyLnLC<9xqzG=#eJ zM%IBy%vO2FvF6bKw*(w?wk1I-4Qc6v7+EKVRa`$C98fQ^lLpC{?RPPt0JRZ+?oe=c zECL8Owvf3@@l$ayInwk?F1Z$Z(|@@D4``Zg2JVItltq6RU}j=Zp6>X=YpU;6-7FVT z7_w8I%7m2sU-eDS``@3daOIm-ihmPSTK!Yl{j+Wy!?q|1lz85786pS(d?GZ!5sa_T z6gw#B>;*C70LCq@s1m4NXw=vmo7AAqf?EzK)Vzyy&JgTZpCuyFV3Sfhm?RAJMiZfx zgOUV^uB9hG&!p>@TJ`6++)T75sa41>-C5dWB-mvT%Hw$xXqpVPr76#63p(r!LYTxu z`0^jnx47V0jI;0CaFl{vrf+8(+DuKrF%Zul8OpYMD~(H;T}WM9HM(Zy`n*At*(PWFNuZJxfFDT71dyzKL@ zOi(`?ncsYdc%$%Cf)MC#eRMu4oTa9!4ku$Cgk22dEJy1(#2}GxWJ&Z01*BOS%IkI( z%bh|c8)jIv33FElqG5qP5WrDr-i8%7zaQ|RJZcbXwBYq1PCLvab^u-AF-dJRG^%JW z1u8n=@=<=VSuxK$jFD=^igrEyL6|!+TN?D=C4OG|XNxIgA_j>Tt1Zr!3%}B-%Xk*6 z(Iv{_y$x<@@zibndzc^0(f4onPlls92OP9n^wJ7O>m2KeIol}K-7PU!;TJxJwSC;b z2L=2b?nDd2XkO3Bd9d8Z0zTJ~Y}m$O;@&irdm%oDAilv+<(WYU&^Kk`qj>21V?E_f3i!XOo!0TS4Fz*?t8~(_1$ihGH2OU zyN`(}I`J_DwU}H-jFS~ap+G1`#dLw@^$MMeSAKRTLPa6Hd?w!yA zxp~=~CVQQxc-5YL*B3&=6{#9rF4v}<1{W7FTePeuiesESkLC)etU_q^h^SB6S(>kQ z@vN1Sxg09u6zP9+M>J4D3BPNy-Fy16 z5Xqh8Pc9^Y^4r?g>j#ym!Al!4T)+PcKl4h4hGezDbBbcGrYgk=6f!6%Wl?{nxHCC& zJlhx~2qdDJs-*Q?=E?w~WbN8feoT7Llr=%SM@6E~`5#4qRQj_e+Pzuzeh#&f{rMYm zE6xj9yJy{CXB2CHwXl0ie)PYoOv8yWcq>``dmsj-nAnbT5Sxm!+{l4?+qTk;HFl~7-9MwLgYG}C^PmT|R>M#Q^Nt_#R zm~!K)Y6zOcBsiyQ5C2x+Nxmmf`Jdqc!?{9LzTho&HKS*OGulrJ7w9(?nbTEECCg<6 zg>4GPULH}dx#1g|hO-$$h?CgXQQd`u8_+sNoDg2L=qY(WN zQIR6{30X`=Ri1IG7v7eNWM*poz8mREDalesOclYwX{^BTe1|ukz?jeCbZ1Bc^FHw) zTL~KcC;YtK>m9PI{BRdXP@LHd7`f7i!A|_} zG+Qc-cj%^emEHKjE_gwNUbHVI#DM?Z(E9+E z@*)p&5+{%6ST#28dXZ(7oPA#EuXSCaS-yf5LJv>rtLOcizF8}?0uyF*FsH=wCNX8G zJ_O5Yo#Ti(+xPupz+gV-8%&#qvXnJe@BY9-kL!+KZ!%9)Qmj9pG=GuHYB>3xWS-z~ zGIwfd;paMbPw>3h!b+0Iy5ai zsc<~rAk!dIzR;qXk$9?gCYk9PzaG>&lV;jF=H#uBPC&YN)4fjMf<`fJYCrmPW6J_I zJ*Y{QvJy1^pHqO5qx^l}d>R?znXgRK!}~8M3V4v{cle2aBT_#Z-PgwY}(AApMTJK-yJ=DYs~>Rim+2h~4T8eea24KKNmkN|U4M zSRu6Ob!xeWdFS?k$4J_qQ>Djjw+*%3O*8-ZWJJW)_c>3y=Qe#wR!dn{zV(?P1-F3O zlG74=r5{bs@-8A@FbIQBoNW%Foh*s(ojt-gV6#CxjN9bk88U4k(*LJ&H>qrMW*P+icpO;H#v00F*Ym-SYO}B zio1;6)ag*lPQ!-%oO6&+&M`eg_&hNi>0#LH!fdY3wtrP1cPprTkU;0<6_R9t8yGyyCdxalg>w3?G@-xp3FPhp~LPR}YS>s(lKX z9W`}v-T%eV=5ik$Z$E(cmdm5X^TH`+Q1Hf4`v(d3GsCy1f#O$8SksiTA-@W9uESGB z?ZAl3uLsdeBC930hB6|?dRh$OtZMN8?3t^qY-O6f$>z<+g+*G_150`ssyLfMBkhWYr_d3xhl zN7Mb4zA{6hlK8}L4e!N<%*_K4vesP4IaDQ2xvtL!4crhII@R|ok%`&&1EQH_A#Ic&!Mq1 zs`VKMbH`@B!=l@e0|5YND7D9CiQR8NGlq3{Lub8I-?wy$(JtE&UMl z6Syi(w1;tE-6$0W1Rd9#r%L%-_Qf!Y?D6i3U9<=ZF_TDge6<3b z`}wId;GcRwGG1C|JAVRkCPtb<3l`EW8uv{f7}lP|t%fVTakM(}A0vt154_c)PwOk<@s- ztnR8uZ&o`$GU7AhnV`Kkkn70gY$?qzC?mZ&sjhO{Vdg%= zeV!rry+tj+|KcHX6kgjJB_Ly0vCLw5Eab4r!93?$z)%yBkq#MWSsgxBx+*7%@t%mf zJe9pEBtPeU7x=rc^WItUqXZiO(B{5Q#y>tt*{&M=5dgU>@!8hg9nWkE*`qO6=+J76 zDUexvstYBV)o|S$=m37{Qzo5)8SnsMVdAc^J*ZD)3hp8IGSYD`R|e@&+Z3pmHe zKEppQEH!XX&*jm%!_lH;>hmNFm3xlFCBNPZHD5#@KOVJ}<%r(d#BwY-em@md53z=sJL%r_XmaHmF9;i2rQ%<3V$L(jFp$-} zJ*VeyS%PT99LZnZ6-M!v__mi*6KO|c;=Wk9q(kN;#oQ!G%KK}^xZe&YuCcqT008Bb z-Q-BIer`qj!10z)OM4KP_6^rTU*BV8ZQ4C|h*?j)f#=g_;oe;3k&cepZRV(xPJ6Ch zUol4Ul;k;kUOWv5-^7tI+Q6Gt=wBp7OL-%1!yY9ii~Fkgy|iSv(I;(51LgR6xNZ-#n$vQ+_AF9Lw+?LPVQj`sc95&N#w!1S z$Sx1s?4*eg6Dax}vIL_MNdNsoieAL=#FuYNSubGMxX0|pDQVVuAWsz2n~9eHP} zsIAQK)|%U=Q-Ut6GY69eh~#O*ey8I|+39s^ews_6niUN9=6}%oVG-lscD09zcDl@@ zdhSkkJ^Tn_A1G`L>Y$>nD5CL;jHb40I&QAr(LIdbtP9y1a*9QdSE3W04tAbBkMpn^y`a}#gVZX0pRy0oor`w@q z!C?B&M$u8TZzYT#YGJes4nuTSUqMr|8)oy0Sp@%6s;5J`9M*~GR)S67zvrAHKrshm z7ZVn+M%bw($~iawsc>pv80z;|t;$t12)-52mj~m0L?HJXl^2($PYV;qzCU@7CEkkHj;o zlK-7Q7`F*2AgygGV`k=>gG<8qWG$!Ilw<>r^_zOw>vRnV(%U-fbck?Z=2t2x347Q z2mlq`lSJ0$3t#~-F^iJ8!htTJ!S4)?sAaKF`ND9*D^GYn;;UD*X zQc81{SUWB}WZD2^2;x+J-SEGK+!k*8keboN5_;Fg7`ZRQigH}n&S`<;0rAsk^b zGIwy4zY6_S`b?6s9D1)q@S9TmGC=PSg?@pq^IVO(8krYS?4>qegYls z%Q(1UhRe$R{Q2D zrKe@rnZcH^(Z0-BlN>0#ADz!xJ_+d$lv+7yR^EHHmCYmNP?O6df1HQo zu>6d9i;Q^rV)*uX#}A3eRrNC~AkF!}Jl|xSp!iS==2Oi-_KMcS$(9r~S{a^^I$3J2 zax$G>A^4_^u@YtABR8kZ(xLCbvKeo07!&xZFu#!N?G$}@i`T2j`((@cuI{nQx$Qs0 z9KgHO2<^`6kPh*UTx-6myrdJK+`?B2kyp=~-t+?$iOHkj5u)d#<)o4RA2*wtBPrFE zvdaUxw2nnQ%XKvJbCnIzlZgCJj{*(Qh!QWl5;JJDGg75JE`1Ncl$#EviRbWYl~LKJ zlkie66Ug}44|8x9TzfHnv$BZ-fp$DjCAAdCgI%RIu`Lq5UbeOvfQ4Z0d;7hgvDXj# zByfqIuEegP|SDO zAWG4R&U(WGY3$TY2w}gh=ksooU{e(8NO(wmMqCc3~Z=eR!XW3=IT&`b<69(z13wRh>JdLw&sz zjk|aVhPr$nj^!R}imLl(I!SWANWk*H|P5>n=a5gFSbMhPGxfUSq zVfJ;p6u$FBB@mDH6CSds)Z_kXWBAN&I8w&<`C4l;Lxw!)baxGp^MN|l5{QL!+!lB< z4$2Q$pSpj{;v%H;!FJr$3g)d>jn*D2`anjOBB9}+qnQ{Eh^=(& z&}*Dsk8c1S3y%8=`!Ne_H$EdMY9UsIQ`M8~94~$#ZA-e37#j`yf*LUR}xK?AE$rrL08Tht9y$t~(D26lLp0 zWml)}TiG`#)^GWl&u061KbWU?BklJKoviyhFZP)>8J1R`+}DtG$o5%WA}E4q*Tpt%E${9$J&sldFslF`YSb2j_<6?x>;m$tX4m8|{H`G1Mvi|!(+nCqAbn;& z#nrW!;C&+%KR0O%S-y$-kE0obiXK;6>J*SDfgCj@=d-1ZSW0+>v3beb+$mUmTjdHd z^tUj{XPO6SY9YidN<)*QT8U~SwMHitU74RQqK8NIIEK znctHvD5b7TSv>vTpPkP3T4$}&l$fy(q9J*ze66)VQfed~h}CY5QO1su9c2W9szc5S z8>Ga}m&)TvV?MnURXwHFdIfv%=(+}?WY-A8l3>{(3RCYP3&~2CyV1^(VaF0l<|+{gqeUOwWwX?9eqAD<0HJFp;5!~n z#r*a9ltG<4J!|UVi1$4@C0*=*S0iN#ct_t-0k4pL^CWxw@KP9(QnTj@zp%EfsF93$ zDWp`d+00YU#F87Yr+ci~C_NCzqC{JYtrklL1SK{o4ue%IGu<3t*RtkLo;$stxA45t zFuz_655o#cJz^?t*blwSzJ`Mq*3gkg1nwBLPkJc1Rr<&=4p5?qv_cdRp{TWtE zhn?eIrXYG|bbu&YlGC@z>5YJrbe;plmG?s=H|eXVW?t_}C9Ld}9Sx1xwRu`%Cs^6^ zJj`-`@N2cO1Wa~yog8sfBJ^T#yj}07plR>0d-QzGF`8}X7k#1Gw3M|_(NmajI;gt6Uiqj z-`NVS7Pn_^5PsU1I!!)$(_UtI>sjP2WVqJU&=+s-_m9fEAZqK9Qv6~2;UUbWB{2+u zBt?vnr&L5q*g@>}z2c|zKGhdzTDYHZ|7VvPa^!vxJ|GSzarKL%&BVw1-kZG*3-Na{ zbUi5FN^SWXP?L^*FFfH?nGf-N&aM~*OhRgG5(zWvuY{*CF@S*%Z*HrT9#W@97RaO^ zvKDC0$owCJ*?%+|DA6%`nvZyChbARZ@(*VU&;}H}6mxDysi|xqI8^zr1!ZW+0HFe) zSQ8q^h6UKO{gnG9{Gkj_T9*XAS`{Ic2o>Df zv6*P)AB?;<<{Y=^9Bl+K`iYi;5~4}^0I(>0U^8;mjyyWy z@rV&`94lBdt6%JA;i=U&Vc|F!G2FMNOhr1&YWh82RnmXNBPiSR>%)V}41X4hUR*7*KiZ}8HAW*VpsPDGvH$js885&VKu!8G z$7BGo5siX*>Fn+8h4Io=FMLHLxXg1O9K*@89^&`URwvk?RQmH~=7)_;@^?{WW%LLpQRI{T;G{ zmfKqlt{~hk=58^kW=HA-v!B+~vf-vmY)yY{c0y;ji#XePKPbHe$W7{rnI=Q*Y0JY6 zmy9<{suQabv0VUlIJhG+vL(4YY`;0m_O z7#pdCV!+Po4{TAFJ#9zJkoAx80NJa9T~cJcqPr+O`E=WTGg>h)JiPZ+4hLrkv;Dof z=WROsqFA#8N+4sobrfqCkB~=K7QkrO=<(Oq{C?xNj_-7;@x?C}#n~zn<@_Z@_M?-Qr^RW@5--~6InPM1(Q4QAEwRf(*i9|Xel*sl{Db+0 zyx;%l;e`1hB8pl1%E&?IT7LY^s-JQQs&nq?b6Z+2 zYZkXzz<0@mGlSvB*LKa!hv7Zwm?Pxq(5v%~k&d@3(xSyTChyA$qSaHfGjc_0Zq4>| z1+@zn!MYekQ;;h+6a*RXurUZpIY-U#&9byq(p34`u-WbCG+4ICv^iudiP33;?)wYY z!~3PZi2~on?qLrz&*D-=!7e6ELL_l@ye}kPy<# z92mp~008&s&S(*n#_A4w)&@(YC-$$EiKbK zO%%I;7*-z!l?pUKg=mi~@+BTSDTe+}zQ6)$Q1eAd{l1DiTM}cmUrIFtVTp8>+UcF1 zmFVh6@oCQLXc4hlZCOsHIapJyC~*Z00xQHWrKB3ND;MH=2SCBV=$oO@w=R8AgnJ6{ zpc@<`PYSB<@In&9p?_c(a6pr!yKRCYCFvp%BMBgWZ{$E}XV-(Fi)C`Em|KT-^rjx% z-I4ejtVZ~s=KKHtJoS@FXg@AyZNU&@_w*+e7}4I24rYri!i{=NhmkyzmTK-1MIZo< zm;M>KPbZP(?XMCP_UCj?AP)fq;AV?sp}PS!u$iXnmI+uv9YU-y_B(d z=J}C(x>xeDS_x>TZ?BMF(vTbAa*J)&f?i|@mk{6W=0B>$ zJ~Gz(PB&lRvkfslpmOvpq^%!ZmufNEo4>C!)*pMzHO|hOPD7V0ZJIa3-_+v2`M&iD zJk6{FQ`#MSQ|ZqSPDM|J;fu3{!Q6@%*Fhq1A3EI7X;seW3LCX&6%@tSA-+vo<(QZl zv}{NGPT{7n*M-d)#ZIe&)Cx*}<9u@R4bi>xB!9W2h}4~9GYRG1kwJ=1d1T-niy(_Y z9NX>Pk+LTdRW1IRxp*Rlm;sxrAb48Y`Ink(ums`r+HCL?#sMQ+0Fwyb*Ej1(7XRe|9uY}D|J#2`JnO3CKzbh@RB&(VVYkcK|JqCn#w z?Q>Ctlz`FtDQdQ`N!Ka>?1ltEkI?A6d)!hW z`6Tg^`9P0Z_q@r$@jN|lSbu<{Tslx8jP{zV&az!&>3UeC@#fQ)jA`jD3)mdsS6{{sC zAycrxsfcK-EBMpw811P>0|5EzLUR~>oQ^8#C#iT8R8KO(FQOxI#?nj8t67vtDYn;F zZ87HJqpzk(ySbcv-I@NcZZ#}p+8&xJ#or0+9^Hh`L2(VI)2<&>^cLp5eVXbjTFbcJ zw$|{^mc^`EEnXhR(pHdCV=^+Nb)0)sdWWO1W<8vL4(MfxCZZhP-#Smrk}WE^x>{U> zk(=tHl1YrMqCa~!db1uT#U=WFwU#z8!{SG{*~&<%=g|CGl(fq4YV0=eLxJyI)%x75 zQ10K+5%tw}qmTDQzOrwGr4xNHirgvG$`K?@o0?C^|J2T0zxc#!7Y|CLO; zFI`94K4?L^XO3$aqK)eLXo~xr)bow=s}sFHvfEwv%97%cS#T`gT<*Nf53E4Kg)Byx zjr3;QVf!`g;aBwa6_;+)7mf7I5$<8PT740_tA$TY_16!7H8Woy9C;v4T&h?eB(3{$ z)2dNE10hMf^E9w5?%$bJKfHyMBY^W zQ5X&4E-0--UV{nC%X?*7jNr-&eEwoq zg6Vm7wXAF3@!}ab_E^Qou2DHDLg%8`5=lu71tdqwuM#vmuWb1bm!DHeEW3wa%$6nn zQDtm#zldMYbUki(-MXWqOOeJ=U*c(Us~cVuaM>?SC1)9oLh`0Q*cEifydT4_&`iO; zC#jPNY-f|>dU5~Rf!gi0W@vM-YoH1@S|xJsyZ2GLy@dp#IxbTRBfK(MjDCG8&t{vN zTD7igqq{rVtrA1K7Nd6Lak6DI1|*j7p_ya}AS+*rK4nyL;RBe^*O$d3!S_ePZLq&)pX7i10UIiR z>2>-+tcGfJ5Qszc0f_cyVai+QIzPRRfZ6`Uns4?cC~-BNmpaQT4YSwsTolHrG_GXBn@@rp|ttg7@0>CZME)`9a|cb~^$C#n#h}+oZY< z!Qhz+4LgaTKRV-xX#qLap9AyQcAYrX-UAN)nThat?8Ec)lRj8wRmZ5sW=SS1wxBTEL8m51CivCk67cB+^21@2DSzDjaX_{1Ox9yE* zt>=g!@MI9-;x@Tn@BRJz*SU|Gg=GnXr!Fl$ez@F*{2ZgjEY{M~(Xp_wIGwFL zZuN$hU{21?!T}H{UQCSb!^I}0d}f{H`hO~8_St%I>hqZ-(w&nXB`xu7=k-x3%(XYu znb^Xt3mTPAKkjJHPH~JTTczn5Gh7YUp~aCpxQ@2uHKH&-zO62EA9zX?xFSIIAf*hy zdKh@MVJpUApn96~vT9kf^905t(Mr{_+DofH-JX65D!gxC)$Y@~19j-%vj9G@iCwYt z$eP4DpemBXYr0IT!-MDUMPF6rPr#00zUZK}PNoaNO)DbfE4Yx+KI)XG49`r7=RMCx=bC%Ke#Q7?RY_ldJvaenYP8 z{+caKqc=_$b(1WU1)X|=R_xkV#W6iPG{^mz9UtbJep1HA(V@ap^d@(+uQ3lE(O??1-Z}9_`AA87G;!zu z9A`FV3V-O{U_sH5$=+oh)bh`GL8bUeyVltcR{GkP$rwz`blh5aceyvIw6Yz0ZqsoF zJ{N8EH@#mw9gq5_h~@11{S`L)!0qU`B4ezSib=B5(`^;Oz(lw6{1Ec#id zw5sx`db+#ArrTu+w|}s_-TY@4+)K4~UJt&9>FFpWH{Nr@=Z?B^B$imD^kq9Q4RW_fOzSOZ2OoSU2N1o3G>?uzsr=xt(~}Q7{Y~V%Ev+ubGh{SZucBkMa9^w zjlFYFTl;fG$sPTw(I6GExz)jyICTO51O%Yhy-LoLwQsBT8hfX8nOp4TD^4OtW)O%i zIM3&O+PYe~5Es7}9FLo|=vDjLf|V4NyxOMG2{>a>GwkiOhq&PU&}NTLMR(LgrT=NF zbHC{H&2%ix)0x#c@^p3K6Mk&ls{Ll<*#H#!3lFQr=R2dVX(b|H0Cw_?mpqw!!P*9` zqaEdgnvK0ZSyYEi0@%E=zPP5bk%J*;OlwE*=NTUsHQg`IR`>Os&aILhhruvGuLU7G zbrzQuq^QvF16o1ts|VLa_0yTH8uKuCYu8`Z&|pGGAh>Hu0h^nq@sJ9hAr^i zY9hWdS_ZE%pgvAf$|HC3Hb+}vndZRmb7C4nI zR3H!Lz??D}kZebt*urHghPF|5cfdDD8F)G?sZIa*mL#kpq46$96lBK@0EG zr{9poxxI}}K+yWL%kPPdi<_H}%cgT?1_M|_6yoLK(eCi#r=~t!Yja2cBE!VQbgH8CjnRCy7yKZhoE^0tW?L+}x00{l0LZdd8NZJoPKuMnP=6XQo2Kn$*>UELTIv zg9;L@0)e-HoN*P}^G@=K`qLEx)ETM?5NNxTnl3kpA0R0Qg zLMp%pak_kYUb3X<5r@x~{Hb`Dr*!9R8r4w(paSF>k^`yq)Jm8>98&LGaTwG%nVn9~ z^~%S?-dx9hr)9&tn0&-wT!-6vwN&Btsxd=4; zRyj)t$ML`d|K!h#j*Gjtk(Jb7nVyu8WwOT-6qPBIL~lXU2t&|7ZjVCX!mSad6CFAd9jO zZWs{yTDw-eA66aemt=dQOr(2qz%>y=?drJtHs3naHxI#(1Av30EL1zrAbh?(tXo2( z$WRTiz8GF%clb&U#ZvsI9DgxYqTu#Gn)N9^U89yIKcD#I#X#muhAy*_^#s-Cf2PmD z*u;!4oQewR9SiPWSm*na=En@sl7DTcp71r;AsR;HV)N7I+p)T(X@TEd66XY zqJD!IWZvgDzOu`mfOoS??wLUjLGin7&Tmhdzh$7@wm6a{0InY#8toD3%0wAExi_1%9cJ|)?S$n^IkV2K#(4tN%WnwY;;FO2I z3^J2wqGnEMZ7A|2-%wLWakO8#WV$7bLxla6koCdLr0>R_URQ7C_0p7}_ckNJ7P{z`^b4yEk4Ts`Y zN>$4>n`+D^`XcZdwVJ=>PI`EFw6?WXYBmXf2q-Mv?+HOiBjmZ^M27|>V=<6=`}+F2 zyG6c#zh0_0Kc~A-gNT~+^jh_1v(K`jk&%%hA)r4tx`lEX9-f}^nS3Bk&DS$lRMZI3 zFs#F)qr1atN@iweGP2zAXS9EFGep_&YYwUB?uuZHU&d#D(Oeh8exT;&+8vUZ6z)gD9h?#m`)SW?}sn#*E!`!*Sz06Xz2+59ax5oqx6l`*AP5< ze7K4O2mzE%(pm1PrDAHy;gI&Ss52#P0TcW7*;+u*Fg?E0X(5D<4$g8nU5H7OynPDD zKWmyQ>_|#TNT}e6+sWSxIBoO3Rdt=#pNpR=L+=0nWZ`tgEyMa?XO@0l2mkh{3rHWFzrB#l)j;WubEy4@a( zQ(n_WMiR)cq#QJ*#lI7%xn(1LYv#dk(9kvutbR!kG*zpu$(DpQH%fNwv3!U z^E>DsZiqPv8Wud;k=c3}wFgdWsz?#UHJR(sPO9TnEB)y!SjC0&VrOl9PzCgdw^b?O z`4uVG38ow~J-8YTvd)z~uvpkFg%#OAyl^I|5QO(Naor2l5tA^l&$Zlw14^SIEnIZN z|0n2fLqP4N_polR1U@! z48|p|>9Z9i7b(Ui>;10oA!=Fp5WVKEHT1Uz*<#&Qa6(bH%a8f~b{Vb3elMNd-U!~w z>8y#8x^d)y%VW#$@^`rPJ-NdG?eO^W?ce1>QNU+NGt{eHWEL_|pTj2&Ow8{VQNLab zq78G&sJ~9*?8YEprrZjzU$|ojnKO9VQ^B3K`?6Zxt}kcOLd?+O^C>W1SFlm`zD8LQ z2eIOsrr2>khkKFyN^-oJp@{pP1$E&lsUhGA28aQ+SzKqir~S1Yz~tZSv`+-*g~~Fr^SNJ(M<7dsTvv{+rhk!_bmh!)T|fR8;1RM z-|+@#bFtC2?)}#G9fE5A2)y6N>j*u%eUeS%@Vp-7{9YhaTvGCMJuavbmI|E>g@Y3G zy-*GprE!;A`=6#3X+ZQDlauz@@8+#^o?QLlL%R8uzbGXv5eefT1b1EoUlLkM;!w1hb8_BXo7cKtk!bAx3JV1L6Pbw@qt@=)recaFX?L7rBixtMYXbe zx9KQSI^gm3u7n%;N>n2At zojRo7Fa3$lWWV#b((>|RZ@TFehU?*MI1%2w77S()TQWIg7U)#cgZ%?<-_dK19f@v~ z0lxHz@w47P#Qb!)JrWg%hXZJ#yaN)gIc-<PiuH~c2q z$uq*V>-lNrh8=WA{tO8|waR|5ZwPZIL!XK4= zf`dMVCb%g&v(Z`~u-9Um!E0h}DS=}m#gm#CpY-qkvOjO4{yYB~nRd4o41VSMGZ!&l zF()jf1rmw*X9?@fgl_x8*6yfdgz=xPiGa3eZ z8w(ok?R)>`!Cm6_5bxK1X3Bc}Y_u*odF4jxd^FtOU_sr1In0jaY2+oa6xXfYvY>$b zy0%Gfs^eukIcJnndD^|ZPQ3$@`LMD1?)vzfk3CJ4rRKByHgFK$!oU@Z~O zbrBp!f~mK)dhMQx%0A=bY+&_UbY0h0bke@qqb4Jkj@rYrHGa}ld|F=c^!!}V`Ajo+GEXNe2o?Ws zH%W(x*U4hJ!P2_zb}mW~%;gP!nVM1pAg3aP##dWc_p%cwpU9|_86WSkHZnu z0iXVeOlWDzsQ9d10MqduSBLy(XYKX1Uc%JNU$MxZuG!oyLGi3urSj9+ziu++?SClk z*Ge}+2$gYELHbK<^1X6vCp=d}tDozjfXM0&t+uNPyMsUsAydE@mS*>;0OP5E1)!39v| z(Zb9qcTvoJ-AjhRx{Kno?LM6N)^0Q9PgJ;0ZK=WVjcA3Hn1NTN4BBT#gc$-WXK)d! zO5@n|KX-mvnu(}=cJ*m*lGZ}|rF-*0z4@{gVLs->ouIz@Ij)xbi`D~2?I-oxqh=oe(`dxttcxMA>2vpP zgd=?4;1cvft7$m&d_UQ#qMMak%=}cUpGSMQio~Z>O1tCdaf1=h@ip!51$F)f`Dqj(9v1k z9?f3wjhb7WL>Ed`TDQ1(IiA>IW2RwXVG;4WIdli11_lO#!Qh652B0D`u3cSO86#=< z^`v%uYkPaXQpeiV6c-nF(F;CMI3^(hF1uLv-xo(>{S(*IAEv3;(4f+=A3@zvu+WZ@ zMs6U-`c@V})=HVtTKnn_&zLa1#j^v0arc<+l#PwW?(J@zhGuwGLHIX zc+f9C1X$!5+d(x0+3zEEk$GxqZl-a*&DHErDYQFRi&uVhiQ6Xn)1?9WZf)A#LR5Rs zn%uAX$b{(R&QV!$u?n4+AxPERP(DsV5-x(4lY;>THfvDmesuWuN~V8iKzY^vq#xs zOzN8HTHF?2?J>f6#`BGotx~KJD&+{b_Tg+mx^^yyx&HfWkQ?YQ6g_f|&2xtXL-SWf zP-$b7hDJf(?}xnvLkIQ*3it(y6P`8L?6ENku7uRsrCL(%`~I`%_9&41XUpTu;kVa~ zq{dGus+(j?hF$2kQ2xHV>Vbd4%F@%J1K@FRE*?K8o+=Xa=yqXVx&kviUOvI) z2|aV`pi+(_sCSQja`tjzSI)|qC(Qkdx*|y$jt@HfZR|pWI!WhfoonKj5x#4K3csOlbAKJYtDWM@ICWdH&kiLFu zYHA=dHu&88&3!jP?LD!;isDgdij)CiaY3T~(FLVi+n{*kZRU zrGrNQ@7?OnNz7MYPz~8o(rvY#XnnNP1Xe*uYdQ72sa-6`*^!BJU)a%|=wVbS#&J)% zJCwW2O0b|l%?e~s&itGoxXF6*CQ-KG5)h1VN!yT*ZJSnkrbRp~QC3cmE2`)z{aH7& z;*S(mGJ$p3l_s0vrZ@cUeXWUP+AvdmGBk&5oc#z`H@@UJ4;6g zC76TIuAKK&v0$Iwr0a08Yi>7Sm|VD)u79)y>%44j-m#-l)nnNEBzpexRacz>9Nw2< z8}t*r@NNmR5_-{9jLNPhVO7L#=P#bo*vf!b6=kq<4 zSFQ21VZ)s1;*_(y6RT!jh2tCGo5$^5M2DKI2?Iqaz3~-b;y~CpZF0scC{rXU2`=@W zKb@TUR>2NvlB&ho*^GPkayU}6A0Ut-*K1cEav5LjxO0YM_gJl0(@6hRnuY|Bn`q^C zTonFS8YlqTH+$YOEwOZ#?XH61pT}$kSDsio%a;0(y#56Pko}Kl^p14L=u=oP8sCM` z^mBYXM!wRb`pqC^R_o1_G_m)~}|n@r$UneX`>Z4OM2Wwt^uwtE+ZMdoGdIgd3gr2B`T7VlBa5rZo$spp4a1U5+wV1 z^4I+v>MzSFEpCgrrPmnn?LKHiox8vWVvk3gSRt#hF%wWGw~PnZEmXSGi_q zM9s@E>=lW_n&%0yCDDgA#d4g3FMS!#jm`UlACU}#-%D9jiOrBZI)xEw-s;-Ch9C=i zgXmOBP*eN9GV=3xS2&mBmeZfYPN)qY@xbAkx)VvG#-|U^cstBwSHFDcEej5J5`D@- zoNBnxDfkR7Y}C&q#LgQ!pCN6l*k4Z2O$b3u2qQ*3uy${-O|EPseS64%uQ>B4=+!~n zyp3lX-%I%Az+uVUf={ec#$)$--uV`d_!OPVrE`L!E7!=?((_<`xS&@kO$hm&b!$RJ z@ot12*|$%m6AO;i`X*&X`XnE@Z5K{rw_btaAH33k6xe1`aKb%2RUFMaS0;x#i;EW+ z5pZCCAnPiq4)AKOHW1_SfmU4ZJiIWwIjeX3cp}n2fuDxWMHYTKh%m{SV?6fFX68Ev4(E{)O88$KS|V*l#&_GFn>f#WwZM`HZ15sT%e^^ z4)o7;P5X@Ol{Su5d^$KTp><2&dV)thEhFHzh!nOR{iaSR1u|3(#1cst=?OByMDvNU zM&B}-*^!E~a*@u*oHuiE0&Bz1006S0PYVT`C`iRJB@>j)NN~uYoOjYZ<*zt}rG|y; ziQuw~G(O9j?{RnMP-kQwwrtqifb*g0HU!Ej>5lKAw(Q6#uo4|oA_@3TlsRL%dN3!= z4==B{cvSnG9$l-;Ob>MVbac};JlaPI`8@2D zc=iFDrF-Y5U9L@BDEHs^&rO27quq%VDzuqk2kaT`gaAMx&%_5-3Ds#VTzcOO@kDHUfqrgt2J*9yv{K z`DXIx8M|E_|IpR7l>3Jzb5`fMnp-r1o9Ku`l^?#2zt9U;#as3ZT-A^BjpM>s6Iqzj zBk7}&pGHH?h=_x8x^n3q25oy)PUn^f>|f7WS-riS7U;gBW$I}rM5E_2hvA)`cR~Af z*>-wD$!OW#Ejp<+-bVV~!)BjJUDZ{2rA z_~qQiezicswz9;%75vgXZ7|D{#$@Khf~?e97< z#cEyVHCjD=!)x=$;{3H@SH5R{c*J^HB;?(rbJN^8?DH~(*Vys#`_uz(AAB322+GY> z`c82DJqZ+Waph?AfWY+9RW5J$~er+$Y;W}NW6+Ndsb>(x|CglKgHO^av*NqEHW#R z`IS#>{BlF})PfSK9=M!%WeE8Y9Zn+ew_SwN#lCQ?t}8i#)hG{wnyh@yfVJUuHmTgAsROgIJ*$Z0Tc{(V+$`WKj>26yjgvJS#hp60qs*DoA+lvNaXjBxjhQ5frNxfb~<(63iVY|uV-yY=rND{4m8de>C z{q75vtJ$w|8dKF&CE(biH;1X4(rUSm<3>@-yc<32>X2*S-!Ig2PCyKfB*f^7FJH~j z=&ggx4URY??kZn*tf+fGAE}V^$4x#wT6taz<9E(&_GVR$ta}ZSIuZD>G=1Yo!gh8Q zf)?JGRT%FW3lPpXFRw#@PZ66zUz;gtz4MZ2OB32wLy@JY3dwnzVWe;Ow`ub1GkK1! zr2XI~W1Xz;Rfa?00CytIHx}db-^6lQYxSISHf+7#5O8&@&pFfa8XVbkGv^zQN8|JF zzp%KJUew=LUG($hnPLY<8R;fYJRDt1_Mb~Lky;-X|9+=9ZSSNV_iUbZ=?8szae@oebhzX(PTQH z-Sy?`kXB{cXO3nfH9-SRBEdyMto7>aX3=-z1%DbJW5Rw|!qKAo`WVbJ`aL z|FA%ZgNwe^x~mG|i_KAO*jW3uui1^09>h2@u7@JvI0BJ`cV)WI?%vnVMI_sO0Z(?!a~);|yT9VFORmX&(z&9zO7 zukfk@Ds5#&Bu3XwbfdDryx=>9lI(JX1Q(-p@RM-oNfkNwr#7RqiZY&Os&bGEBQa`A zLPLM|9Ec#uhEihf7-~z^Ax7b0^JxsQme49Op1bV-G;ej8hxt3HLg1}ix*3QYS0JZB zr8jE`SGdk!bsVM9er|_P`xconj&_mEEkvS+>WG=&(vanTEg?%d?MV$pYTs?9wz;rf z=5S;qf=0At^W7%u#O0RU zA)-yPak#YkW;B3H%>7!9E+;mVF&g?P$Du<3A-v+~oZ>vo53Ig^g&_KV?4px)ZGdH*dZ2P_S&tj-P(7h9U`np!Jcl^LNq0XWrH+WQ>Ja&-=k~K+!JxpNW^LCDZ(TlC16~Jyoz1J0pj)-1)KuB{7Y)aH7)aGb8t`Hc(YC2Eq;jhf#x1L(7iMVR z@^u>N$>&P?+!!kRooj&)?_hZ{d=1^vZI3r`N$45p%&gfiE5`qp_ZxG1Rh<;es=*1p zm9hvmUb#FzV(($Ru-Q7NKfOm1Zu=fW*5PgFdoj@x)i+?JN9UHc8C?0cPIgwvlP`2e zXt^`u-0_aubK9RjxWaY@XT(2|F!kK)p~YFC=wv(Li~+_0Jqb=@e7x&RZ=ZYLuRh$_uzEgk00q4xF`y0o z0QB{9)QMxQpWY zEf=1awNX3K-aK}@6@kud4!7Rs3~U~9Uk*2fQ7vw(Z@&n2K&f}j#lMRvYDeCC){%>q zAON+&Z`a_rCbm&f!)};7C)F|yQXBC?BRQSbJzB3X>;6MCsHXn#(tTYZeSx5H)VbMt zBpz2lyZx>KOH%~0Ocztz(mtHXzILE+@Y3IYH$4&Le`tg*oN>%@Unc47c9a>DxwIt+ zUied?3|CdLww)w~g12>cVnXUy!ohlb(%3K8#|D>jrIiVK+7kKYb=JbqEPEQ_FQ^WF z7`SSSvKCBPmF;O##JL#o6$LW~N7j(WiSvSlRh+;pW8->PhV+xm9wBeramF(Dj?N!g z91TJJV`u7F^yaGyr&2i;NxS>~*8DCv7WOx#3=UiP_~P8QAVB7CyIK-OebBObcVYP& z+f0Zd6igU(rB3pcsX2#b*k*$mAfrE&Xxb9TW5j-@_hzbYHQJ~#{ZbenNiSZryDtUP}c zSfWL#dVcEgesz9*P^wTye#tv|`267m&ENtva-uk_+dGt9SL62Mi=UAd!KKSkhfmRm zZnWsaXR!PoEhe-ZNGI^^2v@!ZJA?wA)}2%&hz~Fl2w$LDmS&$&-oY{eygp$*`qCaY zrI zp&uHNV^H+p{Neum{)(w^kUZ_)_ii?M@fmae6Dq!P{qQNRMi9-yQ~$gom)!&ZBOsL2ZuuOPxuZgV%Q-RoGYcykFiHIR&Q?2=ZX+3gsc zSiATdxBVesp|k*6Y=_f5APXe>)WzD;&gVf6Z-tD_f8SjBU=)stTrKY{auOAyl;(Ty z$<=juRo#vK;~Y*un5znK0F$DBSt?eF%jG0qs%I}=VDY#s3@e1?L`|x&xXu!{JLxY^ z#rXLiXfqyR}(%g@yIj`l`~fpe^}sixW?Eo7t|rQyh#?Oc2nM2$(J}GrF9s21-RRju%dJ9 z6;~RCJH%^0Zf;bROZLufUU&S!n62?vr%%0~nT9e7OnUEuPe^;Ob0H)2OmytuN9XBm z@u6d5rcXbZ@gk^|ZLC9U!F#@Y_SVP2eZZkDq3cLRWfqgAo=9P^goftmmJXuugXCF+ zI6p#t6e`5H%7U)w+;d6^j{@R+DLUmlJv!mIP-LvtdJFZD2kAjsg3wbwl0qiVO^Hw* za6mE=AB-{#t^3q|5@7z57h=5j0geEA4=Sp0obyz=fhrL+AEjR7Oo3AHu3iE1vMrY49x7N&b-(ZLy%^rJ5C@I~M zp->WvlkUqv&*ly=fR7=ar%|OkkGI*cTp_aI5W$DrKAospyiVbr+fKFdv3FPYRoFsW zt{ab(b6FECi2~PaUT$Z}0D^A=%jw^W`;Mi!Z%qmRZHm0nUC|4A|A7N+T-Jkwq3YlW zS{U)r%_zdOFSo~#0h=p}S80v@Nbd>Wr-XVo`h^wYDFa3G&a zO}OO$hpVrSimLD0K7fRRponycC|%M$C?$;`-5@!1cZh&AD4=wAcMjb{$I#u~G2}P5 zx6k{$-^(Af7OZs+XU=c;zOKEUHE6A+K1b(p?Y9?c-j5@Hm*Xg$VU=OD;;!A>k z*~U!SxZsbP%bvVNv3$zq5C~g@=gSmgQ+5Z3?-eQJ${z1mf;jDWch?5&llka%k7vi9 zvulNCe@52hP!3$+C#p1CN#9glyjd_l_C|kV@Gy6*iG!=eeop7U07+Eoesz4s)!=m~ z0CKxsno4@-;pe%OSr^{F?wOz?6TR)Li+j4@y^)t^D1Lj@-YbCJLcuy0rt4(Y4K_JR z;GSKSxuF#>vb>Meh+W)*k1MZc6b^utFu#+fL$4^v{PiT$$L)+$?n-vr z+a+&Ab26&{;M(_r{$6LNLMf{5m~`aEL%SJ^KymcR>jLCqGWSQY+_^0t`NrGz9 z6+l>hXOU#3qK#BMl$d_D`Eb1CwtlHK!i7KN(=?u2|8L6OF-y=sH}dyzPQ)9xIPr8U z+cj^gAv)KJcHZ~5CV${B2uHHHX4x)J*$mZPbGq=e%NRDC`=I-2FsMXD^w*ZpeqQ-g z27DYdZD+3|zl_GZx`z9PQwvJ;@3MK`cI%31L4TChxyPX7JQo8K3X36c~1V&rTt#_Z2vlU z1rYig9$`QkN8o%55yVkG= zgiQ4qqfA8ISj*z{k(Z9$2j*_O)Cm6O_Rwo+*$ASSw0jb<)eY_s;!iibHkFZ*XW3PE zC>NpvHKf%^IY!|k2W5Glni*SvUDd*rpk80PSOC_A7~tzVA$Rh-J#WsICd1E4})p!&U$PgS6IYHAf-H4Sus zUoWom0gq;lOXAJ$+C&!Gl-kTmgLMKsJ!_9vl+&Axk*B&W`Kls}<>q0Jt@0)5{5CIB060iOY2lo}#+q_b^4J|14 zlm6CpxF-Z@`yz5Xp-2W}J1E)2XRrw-%HKlzy}{gV%67N}!94)NTK!@1WmJ*B2ard4Jxu_W+ktHu>-f=3M%O1U`(V_FzNIg$c zm|T_lYjFQ~mNhF%Cp)%P2W$mn*I4X>*wJyeyt<5^F)t@)Cb}9kKaI#LF&gs1ebKcI z7d;(jkI{U5vm3?p#xD2V(lj^@rv3HnW~n{x%aH*TBQXdXz9swC4n0_z7%jejEagF9 zd0|rmy9<6AmYz4nOdNT0mnmyP4=kN<7r<^PZWqhVW`_=xs>ert{qi-KKBwogFxiB7 z9^T&(F#_;uM-uVDdzU$vd$>sSqrdPRI|4maTre1aB8IbwhPU2L-5*JW=?&Rtbd2id zk-$d_Gn#LWP2#g2#oZ$?6)54^TCAdHM!jVk8cNG5X1u&~-TSnPWFjG+j%md?w{#6C zLAjtQ1XK375Um9u10Xt>LMc#mpsC~GxR*Wqs;&W3<7;)OjAWq8LcQ)KIdQXxRVnp; zJA^>W4WG047lSg>VO%g8>e7KJ60Pe6u5>Qm`V6d;$uq8TrlrP#6+POA+3^N`TUy56 zv>*{4J);cY1f&-yH+hL#js_q~4mnp~;A>0;Ba0=ZRf+L zcd9(5q_H4_6pJ$EgVaKc$WtE`nsb{pfC5;TvwtQGn^bx?i8e%VFZW{2Z@blLJEYFq zB8udHn^uS*#ZJ|<2U=K8zV*uLruVlIkx#oL>9B69K_MfNDc*Fi(YzQ4W%$HTI~QK{UCDfw46vCz!;a z(E|7l`YLedSGvz_)B$EFh06)0_?OP86!CT{k2y)F`Eu-kc^Sci3 zjSC^6DjIzUi?%ox{7iIN(j|%mavl%U;mp!jB+bdfg4!&T66!!dqd zPBOn9FoSjGP!Wpa(dmUJ0Tx-`c<>Jd-aW84+4{ietXGBl+u29!@O^M3ig8PV_TcH$ zqo=@<>k_-3csHpmV7Rv=sO zEb@Q7^bb8rK_fjyuM=??M2{emF8`7=?vzh^mm$upx-m*@-0V4`r8eIMwGiGJHit3$H3;i*`46#@+jU_r`^>nBCGfYzZLI4A zeyze`$0;5ft+}e>f${88kl=gm_anI~R<`bLIYmwDZ zX8Gi_(5?VL97x;Gra(Ot&hd8rB&@{j_4pYQ2jFgXoS%cFwKU;GM);`5SJtzBj-#bo zdwX*$vb=fM)=C+jXKtnHFA^$p*T&ASyrN=vSLe@J&OMDcNG*u}n)pFp^9Afay&ah?j5?=)xzNWzd9N@8>npj7P>U!{ZSg%FiHO7?~+eZ@spnPa|d6# z@plJq(PYi#;&XAHJaOPSE-WMY8vd>uk^9yL$*B&hs1_l2cNM1`XNTdtBq<7O$WRb} znMm_2=&abphIM$_9-VD8JL%#F2ys)jL1@5ac%(bibE(}|!X{R-ULT$UR(9`wgDF^< z$BTJKHA#8sgfm$N^-ToS37{k!KZ*bDN(>{#L)BZ9OiU^A7zrT0zX% zet=U0rkG^pfF=3@*@=u*7J5CN7F22=j5@-;fT88hE8-p$j+S!GX2rj{g$&r(*r=82 z`DSFS=A?M#f95hFEv>0R(C-d7G}Y98XJx(hDK9U_Bw+53=iD97Q#wCC7u1+TpyuQf zc}AwDJXYR3!=RRpcR=9ANz_p#B)XTDmR``v%`GWl__q1?q5(#|G?=` z)jl*aD6UHQnN}#%dyyXfkE}hg?w<(Hx{HaIkzxlYHPC_e))^xe=S*9?`sC@(~R9%hK2icgMhELXM2YWgx>Gn#)^~wl$8-}cM1mhzRYMSZ^okQ z?;3_G(L+23>)$oSHituSKBeS~gSBVFi&&&KcO5=c)!q+48j`plW1c8=;QiKg1&$EM zJzU~f?OS7pq7R#>!LvE2_)>F$vvIa+8Fdh%S5D_oh;xQf!cgD7MGYt>C}@gK%8BQ@ z_P*S8+Jx$@atrn+*oRfT?}VS-=1|MI7{*@+SBaPWNR}UBIsD{eZ9*#RYMy9tTevM> zSRD)<557YWCuA}!G}IVF*}aKyJ3IE8`PsZD@CAt81o3m3Li=x2j5i;+#f&w$%#&b_ z8#6bI5A^1xIN#&J8FEwZCy4DbHGFqh$x*%`IV?9*_Xb)?(=Dx^ltekv2etZF0YI0{ zWB-I!MEt}b*G!O#h>*{XFvnO|(aRo>ZP6+>A&D2WYf!TPtK$j9W**wBf5v@GhD;?6_P6n9%qcS8oDpSf!v9D+2X zD&3(=1ozg1_L?5dNz6rlSxNHEEgxq`ok8&J+LmbiDe*@Fms_|I9UetWTVF9YXVy~g z6_BwUs&bm^cpL_D18dZ~$b0BMvBf4c>$Kk%$~{~y6LV(J-P~->MKMUQXtVIxSC3d6 zMdscUYir76Ft2a^zRfMG(BvM&lZG@|Ht)FA*24xU#|99NbMCp7BH$oU3b^19KKh1C zoJxF!*@F&b0+K!~&(H{c>6gGu>)}imD8@jAulUk4)=gN+HQf^L(px>+7W6MIDhQG? z%Oy<DzNRboP=yOb@ebZ{)=1qsgaQpR=b*-+FT*FY1IrYYi_{h!i(}w3R?1F zWzLcApUTa>FJV|Ak5Cdj&#+woa?ALZ34qr9s#$na5sW1D^oLSO@k)`$b*rQ1{ZR5J z{G2r~Cz?34QV2=_5a!x@!?z|URUlWV?YOea+i{!)puQptCZm_?B(X(STIW*??iA(v zB=lo!F^u(fQrKwboh1+IS?cFgW3%42D`0y2pO+8L>m{2GlW}z`H{Gh7jn`8dn$?9r zFtCDy_XHPBl7G=#Z~fq{(s17clRFm zPoRek%#~+SsOSv_k35$)#tO`*W_LVvN0&GLjR!Fg{aw1p^Z?Ta3iGW+MqtZwv|D z6(=b1>v;9cA4=hac)!e@yvlCk+wIX^#K+RC8?8UJ;NA4T8b=Jj5;1A+&`)i=`e5hf zI*IdoCzko51++q2Qxur0QV7cmc_R?K#` znXJ@pz0yDAq-lD=v=907h{Od4Rx-Tji>A1;)4|k51g_CQYgK(4i}!SHyueLzE4#$w zPpU}}1jGN_f8bpepJQuTrH|RdG7hSGu$ru_GAw-Y!b=s=5nx<&JCcs*KQbd$rV3e( z@Z=)}>|HMlph}tB zXZ%_BWI8W{-^@ipCA;QA%h&bitH~Iw(VI!|z39`2?7Et0Cqb=whgzse_AVkXtrKy$ z9kF?Ogk1Xks@O>Cw&<|Uy5$2x&17|Py0nMTP45nDNpsO|Z|bH0^?H%BJqS$h*jR)*WjrcapnnfS$v zv1lZXf(g0btdvYlMfv&pblc@QIXNLAAqupWrKQ8$4mt8EFjJ1#QKsX$pAcKr(-c^- ze#XYagjlMYn;(3TP{z$2EiH?(vetuM&^$tJE!A%?EG&#SW@cnel7rn;nepB2M0g_y?j%&!kpfhe!;-7 zCzZ=~E?Tm-S`O)-QMwuzm!kZ%P~Bwx3g7P)27{phlMNO*@018uER>cly(IUpE0k2P zIKQ}n1!Bh~yN!p>*%LQp;~J2kxdm0`(l^wUHmpqhS*VE9p-kNR5t$}pqnlJRzvciW z&^Gy_-HvCcF(hXUe6ZvZ41>WS#k2e5(g-~3MQ&lVzGSN-JYc74GOjol)6h? z)_2F#VHAw+=1T1YK}s}h%*K{)KhcuN+>&QaaIDM_rKSMMrmlyWYvk=dM*tS z6A-KKe^V?zc0w#nd`3b(2U7$x+|A8x_bh~fMF_t3(?h8F{L2Ik#svaB?BIuruSQWx zbQm1>$FbYn+4T<$_wl+=8#3v-o-rwDD5)TZgiFkT=jE;@9x3wWIQ&dtyOr0?vh>t-CC|`x>>&MvG zLN-f2Dg|U*7HVT1rKPNOTI6J8On6TaQmunfCrwQ-7_6zJL>2Xs2NC48mmwm|yu5dW zSXk{xE{z!y5y71+>J`SEs5ackXJ?fa75pwouQ1CdwKl)sm17+d$CH2~T?N)vd>qs8owE&=){y32eaPQ?)N>%6{x zkV~NLeU#E?#S#NXcUX;m#orcdSAgHaEzZl71zovlTPUw$ZV zH9a^mDvIgF!gjK(^CI+2Dy(E0W5*2dn4FhYRZ@}|8Tw;@)?)QsDOoH8;OX}#ni02` z=wvx14JhJ(%IvMq;j^VftHSSeX={PYo|e#F?H!lM1iqK>&a5BSOY@$oTnSax|i{P7N>dsv~r zn3x!V*s(Zbx_0b>m>hthlOcv)04)XZg)>H?V`6yu z`K7ZIX=rGKQgoG++F3*W{QNR93|(FA?Y^(9tYqJlV4;zh>^sc%_U4tAMjEN7mSvV2 ze-9MomSA!sA>1<>U?+yFAQyifvJt)DdYUCEMdi~~UOppyTD`E%a3Y0_E>CKf5SOI% zd44;PJ3P)%I2Tbq7Y`ThXIXW=bZ{}0eNS$E)j67Mm6dD1p+uPazVCpSL{7QYWu`_1cG%T^w3 z2T2hjs5ssuD{|$F`z*(O|ED}1shN5;6(6YR52UKD5#9)06Frmgsz%J5ehZfpBO9W0 zXzc-HyTX$uT9SsjKO6|xTtw!98aC1d9s^r1dCjFBWO|rJ-T~K-Y|=~IMXiz!Z|YJ0 z`l^O_xt!DEJ6Z zO;TD~m9Bp!-SKBJWs!;Mv)D-;Dn_EQB1oZP~ zv#5?={ym76jW*2^Etg$!4ZT$6nd-G|OHT;(Hy||WS3_19omn7k9~59N-R zu3=HASls&ZwWo1xue_74CQ{9l;0==JV&zWupcJ$RqTB@9g}eeQ650Bc`=lR!S|lDk>_Dj^|q=84;ws zqmGw|5aRlL_g8W1v&Xs0pn08`$ywPvriOqxP@P>A!!7#;1NbY ziFqD6HPRcOhvEZMB;?R&^5>-(W;KwYd93u*hh~YS>S;{fV_SoqC~RYyn33t(`m|Ef zUgtBXaRLI^R_B1V!@u>YwZV3~!nyf<9%saqScfog?~%g04NV`bg2elng{p7Y&8Ht6JR-y)Oi zQIJ#r)4!g3d2({HfS8|xuvxu7X(U8N3$+`Z`hUNRii*hFw@x;Lp-84D3j-dSpu|-q|%mq)L8F zd?o>eReum}a+uG_=qAzAa;i39o?Z&US^Os)tzlY4q| z33V+pFF+??BgJ)U!h zTUO(wqkTE9oA$4NRK*#4qbuP*EXRM3qPYQH9UQ6Z`7w~Ag!FVxz>R5*uKu``9D^rr^wLKGE0Hh{cOt#Ge{P4qI)fp z<=>$(p=QmxbyNS(co&RqcgPjF7}0;F=LW4HvbuJnjiJY-{mjv#?wzdyh zibPmwn3$Lh3cgf_IgXOPoweXIGLFAne zL}Gtpkbke3%lcpZZ&?*YhH`z69pr_khY$m=+pFw^07C zZWtZo(y;e$Lqjh+si=CR^pUm^oo`*~PaI2P|K!a7ejIrGFFKiSTJ4{o68HE2p652C ztGsfLzIs~_v$$n&78LvhyM(*;`E=8W{dHMj-_^)+tw zn~Tf}klJhFP!NEn-=(yhlk9?@oQ4z}Mz8%DeM&j`0qK8OlYiTOYZSu&KlHy?JC_|= zWm~yXaUR3g_wc7ZIEtN7O#hRUP*cd9WE%E5mdcjs*^U*qvZa{+4A2x;S4WiM8@@mB zaH8E#r?z@rb$=lITI`jA- zai!3BHG@M`68iTRQ@Ql1uVrTxo%DD225-VugsuNB42;Epqr+sn7yBo)5QuW|pp57)(}3|*u%m-b2qb!bj7h77FFz)P z$mwnh{<&H}=)Y(Auk`%AX%G8=(5qT|X&_XEc|Z=5z`!tsHgIfdQIyuN!q2h0w`l-1 zNmbo2$$7ikLr22#&vPf6{@;5E)bXSo2+h0IU*Am6JR#K1qXuQrZd<_M3`X%?R{!o) zq+Hu>+2-gR#Wjo5C|aH6cX#2vK@F*so;-~PUPXT=#Lz;%<#ioQYIu^Il<=!W9+NyF zQbCx4@1J*Z@+jj!eY5|_gJdQFmm?`Bf305z-?r#RnrE*oThefAAI((NY-RBfPVYk1 zYj}NQ+3eNYC~wu(mF2_LVFbD25pe_4zu%vO8ZG!Xdg?Y<0uD#*9708_=RSApk*8a3 zc_%>r3ODZnQ5hHmXlXw`{;KKDX^+&%0QSqLGbE{O&y|`1+mF@+B#a7cSnH$*BPkJF_AP`q}C6@-|5WrF<;v z82aAzdKhd#FpL1L%DjH}7k75p!NOG0AG<;sff5n3fk*qwrnCwfKywrH8U32x=P{J! z&xUV6LgHn}{fGD0@u=^A$e;gyLjl{$y9KNZDM*vKk=%0>;rZS6d#ahEmC58q2XPc8 z<`VwABu<-g#*%*Pijdzs;G?a|wgOyuQ7|u?O{}t!kdO(?m0Q!hfZdiz?^extg+Y_uiSaP+Lsf9f-ac z>wAgaCb{Tc*RTIj%ANkAHs2DH_<2>iQGX)~e#bOGc)PB;d!rnbr-$nl*!cXX&K*(| z+R}>Y)1@rRB>X$OTDxUw5q=aK+e*tl*uMdFw`jqHO=y_U5E51;A6bhTXQ4h5O6-L1 za)^shaan@YKPRL3;{WPUGNX9{Ihk7>9>q0R4qNT?j0$i>N=m=#Z@I#ygQ+tLhja1X zSba`dSEF${25VHF4@^iao*VEt__SPH?fw;0TC`<``OsLplibsWo)2(%Dwie zlL_7Hex>gCrmpYFIr0$v@WPbx%OT(5V%pwT{zVA;MAv@y)}}=7$U?l4@j?e7eXZ0ZcYS*Hp!#~WPE%LR004t@^gyB1Fw&N z^tR{4_y5K=f7CycCL~>%z**bEz`|@DcchoXVD!!<&Q_`&7tM%w)3DZdo(yM>c6?GT zCjVm=ENHy*gux^|hk!@xx~N;N*IT%q}|gyfcyvex;pBT6fxw1VZWUo7z&3m;wX$^nEEELT=4@AE!@O{D*N zmmz97Q^PD4C88g^yPXHJ+C`~@v~#@%QpW)c1oUXgf-MhX~Q@_q8GJVi?Ui(%% zmg&U^Kw1V{t7(noulU3!;j8;kUtW9j%=0_lB_sB{`DOfE*G?mtLUp#2QLifWeDH6g z^48U!yAE+J3URh}(?9x<^3L0O!;MjW7#4;C6hyw$6f(l1Q|*et8hPMb@ZUb!oP;4y!bp-H;WHZ?wqEFC&rrM@9{vpbGzO zk72xvNdMGpD6SNa2(7$Hb!oqF7{y#B#Ac?{+K|aH&G6m01cm9majj~Q-EQN55HJEJ}h^CV1`^CjwX6H3-EmUTnB;rETFKiTRzUcgn2kIT3yVBumkz}0c82Ul2 zChh#;shQtHm~tPT;v0w=~?dO*Z9XdN*LKp~n zTifnZ!hadPjp~$Koa~is6^<;X0~-L>gj9-?e94-VVH*CZ1<}0Xw0#+Cl(|rAR+;>d zl&(p>G2KJ^>+UgJpCF3aVF8L;QQZ=+QcDnE;cvG&+kH|zx)Mk>4`hUzvS+@7eV^Kt zVqZ=WZa^P5n%tf4GXe&_-+pS(G=clCzdb58aQ|!k1n}M#nrLN*9o5ysha3WMgi{=h z$L4XUpDxt7M~H<@&d%_Dpki33lS`T6FdE!DmTn0CbuN^Yi#>A-2@*MF9E@t?r+;>HN;oKw9Z%M|jRxtpi zVyZ=puP+VEACfJBJB-g}Z{s zSe#ica#RSi78@G3&kmmQKCy|kDM^OE;h@>cNc+Sq0Fb`fq3b5VQAoZie8|IUn!XEe za3tX>m#xtArl5!_-8#v?=YKJc_WfS9inP>J(_7a|u7hFmF!&|%aL^sie!|PxiaUsE z3GJ@7H`OS=`(->gYh{h~3yCH!(52H_9SWG)gRa-rSq%EhfQPyLHYJWJxy8%>-1F#nWaG53j5Ny^=g+SM_xv{U{T|H04%fsE11aH;2$=`TCV8ozduW;z81hmZVpR>y^2BD&<-3vu4BT3tL`a{C z^4;Kr?%oN0Q;k+M7HW%_te#w}hBfJWIuF&-Hw?Exav1z_C{imfcXu+H8~8P!mUXoD z>Etk~9Jy!>4v|6rewf+~SSy z{yrK(PK}t~0OXEq0D*Gdmxt5AMvmR-KY(0L4FINM5OxvuZJm<``$J&%!UfL!*5|E} z>aY}uTx^V^eBO&0K39%&uwmG@+C~YAr@4k62~Ab-3399QOsZcQ?9f7wMm|{&+b>#u z`F;VZc8>SkoVIYR6zfeqj>`E|7M+WVyWN5bM}C6P>5Bt)y;8^b=3S6B{HCW#mx+E7 zlke_t3@Ua=#ggv_MqL-bDo=j;x8321%~l8}J{JC)GD{WyDQVEHcU{ZDn#h zCymb70z9^3k-bb>*4=>;QViI;=|2SB9TwwK^D+VhtZvxZ!f=Ggm>lMW8Medcmp2gL z<*O>pt<^UXAw%J`u$JBe@6CheYP)Dgfs=@@I6f%NmmRhRfoeG^A#hF+QaoaT2Ty*+ znMRHVgIY)fj-Wt6ogc^kRa~?HWEf)hQy52Z)Fn2WQ+6zUvZ1+|u&4RgGskz;-=kdI zT&ir4>5)?h2A@%L(-~ODA}`JIa$@eDt+4634>-@hCVA+Bx|O+i_j-C>b}9mYBB_4_ z>}wO|f0*h9_4%4RnMvO_ECI9Ttbw6lz|9^@G~-Z)U{C zkasj^QKH4nZ%Pk*tNBY}VCUnbK1HVzlK2*W8;2XOEI6ID)K1F_QtnHQb;T;MdE7ai zYdF5eAu)vU*yyF?QYM^ly%bHtSY$#@CD0`-H*4)dLJ{%sis+A|DdvsG0ET%SE|Z2w zk5y!@BJyIi2ZiqsgMUJ}pOUi`z&riTo}@v2cs=9sbT!U3u`y3im-}D??==rWmHPrm za}7l~mwfOcTQVTYa27FpWc*!2PGO&nNsA7x;Pt>MyZCj9wkJ!9uiVhZ?^=C zbYo$`oNA~1Y&JB$t2O_o@x5>TH`%cVA@`7nVGT^oH>Y$iJn=HBjJxLvyu`nVI-io}vp$ND+&x3Wom&FnG7i3KbDUjrLsCk7ur`dPoeM z$?C)=59AAe;CHrD#$N;T)MUTt+QQizWO-?avS0yrr@Pk*@ORw#Dc1;T8d$*5N%pWD zL#)UFFi@A;c{H;%&|!3@QY;r8{Bh=L^VagyVRyf%z4+_b)4|StRQzu*Jj(D8c@Mtj z3>L-dZRo4%>j}&J3Q*=B=V0XBSTrQZXO!EH9NVUo&_zs@~3^XH8+rUj&|`8ee)<4 z_W2RF)s%CZoih9bLr|&QOOO?5p`ZtZ(5y|6t7X6iHDZVW;Cl-G{&~%TPY~cR#jtdA z2(|wBz=}T6>)9Y*faRMezzn*S^QxLXtvh3|t)WJ%wsjn%%xapv-!8{Air5>L8q|`b z7_QsIY$)C@F4tO}-klO$@z%xKF9s4ub|(&=t<}mQ_g6ZitRHe9e8KI0-*OQ%wspfa>QV@iag`yDapb~ z9pFrW0PMaw?MnM@Szs;2^nGEHx8~sHD+=z#fsg};Hi51mVtJ-T_2hud+^PhzgJ2Kd z*VZxh0(vQ-Q^6y^SGsxy_vA;bpFWN8=QfZp6hJIcyAL!Cldf@=54D>=I-S`&FwAi> zUHH}=ji;kk=Qef_+{McDHDF+yY;*+L8Sry+PL{{U;9;hL4Iki%(r%~;z#$tQ?bhy( z{?bLkQ@{U9vwTJ(_H?{XQZ-n-+V1>p;&uD&Smx37^ge$BGbzZfWrG46#BHjE?%4M0 z<^|?1?o-`p8U|SV$b<$~9kI>svuCeWNI^e;45}e5ZO%`{&6$|l>rf`*=={jCp;J7TTBmr zP_}WOT*<9)Z;>$W+S~ayZc$3lCne#Mv4kupse*1hU8)bef`iQK++llG$*Tq4UYFUC zh8+2g{r$f~5P2y-fwIDLd+M_4AGzvTX%^F2W0=gX&knG%zWP_Sj68d=0BwaO_=P%U zLa)sFPD&1^;ngQ#>;0`Bbz zEa^s~ssKAV6y(CzR7PS-*VVbGsrVyGBi*^A{)4W{>q#!#^9xJGcpciw)BPk*fj2vi zS1s`yos62IJxbv`xMotXlr+qOG%7 zN!+1&(X+0{w2YPJz;5olncxhFM!;4A6G$A+vbWZqd)ypP540$mB&pR>cZn}?nzvmQ zBe(|@mlVx*eL(lw@259b+k$*D$*cJ&KF&~x-IDX#GpGdM`WT*z0 zT1Sx3n?=HMKHG3BS`~DpcG-zeC-pnca&oRa92z7U17oGDmAl)#<7?rjVYZu}_Nc8S zg&D5Uo5Sj&2Q)DMy9)<7l9h*3} zhU!xN$iICBo-mwanUzN>Rl1unIO_+nbgRfANOzTA9 zCbb3m7(^7rw7; ztJ(ip1N)wOe7FBT?8aW*=M(fWRstllbvYQyJEoOsl74BaqUO9s$mKfIPz`Q^ugR^` zwZqSdKu3MWVg-{0f*%%%&QG5B8mtV0T^*!R|Lt4!ya@ZR?v8)0s;W|BR4JHXgxZmI z+eSb8K2Ffiiq(!tgUi(VHz&Do{#iSC(yynGJ$*S4SI$d0)^t(t%*!C8%sem|GAFuV zDah|&H{La`ajNMSkf#Tdygo+K@Akb((?htx%{YoXt$V$pcxw0jSthU3+3-G>b*cQH z318)qpM75=FqW%5QfR&Lb=bKB-(ILHiF7(iOx}>85~8(4$%D*Jw%ZfF!#~~_^gT2I zEyAhwLhe1PK9-&JUvu^KKV9O`+j@WW_Q`X>q=Yybw>GkKa)t_!@Fe@y)P!(d27eAg z$5mk;hO(EK1kJj@AE6@Vx8kYiO)7}(Eb%(yrz$C&-y&?jv4UH!KU7|xTl|v<;c!yIr4Yh)9*+K zU@uvsD3Ge~u|hd|bBVlo;;)kl>d@iw`D z=ShLpsg5aAy`x@N$?{92u}>0>7DZ}kKFk)wnZ7F|pxe@w!xK6u=IX0Ec^b$t_->{L zDk_SfN+|3yU0reLpLl$avzT-#!lQqSX_a_ha{-y^3ETOUGRac&IhNL+x zE{@CgjDWbN$978)V)4^@362#u?vjp9Q0}C(672F*@r_$|xB=&t6L~n}6H(bjOP_2^ z9fGVBUQSTV`PQ7#J!bo4nI@V=RP1jC@P>`n-aabV{zkaTfvDvTqw3+L+0u9^cN5Fq z!TJ_fQ5=z?tpbwg=KikcJCc`m??PZjp!AwXd2?2-Mn6WjTLnlDolJg0E!|IZGnV!G7sDnv3{951{Y9h z+qwR$ml$S16^@V4Rd{GU>^ATB8Y0?@zejyfn zdp}iP(JUP0a{i5yy^&ncQ3Pa8WK<1u37*J{TZ^1S2h=Ib$uSAmjry32Xm5{p+b&x zJlYOUW^kXXgGrc96t>(s!!HTq&;CXb4}ka4?A#s0ziSPRgb8V&6^+6N^M z4#QTipSf1cw6KwNBfg{PzQzL-%>0==lgRd> znFKiL1u=yg$L*Iwz9A3JL~w_G8cpTWGEo-4rq;2!QL#H{DGk3@`Cibjg!XSx`AvXJULMU< zXqZ+qRR}E}Ind|B+=Go-5Q{Hr%dd!64u>NXw;kyt-dLO)YXHnsn1?QtqW-Hpq1AA8BK&jiE$@3+Pa@9f(e9!tj21GJH` z+kUgxaM!gi6VM8+kWmo+5@zRmM4|P4o%Q;sgr)})*o+I6W7qwV!9jjY9%r2-(Kb;8 zAN{P_^t~VWVzP`;H;LG>@KvGr$Jxaa5*7mEPHhO6gJQ%h`TK2mw}xVfC+c>0$=kSx z<0!uvzYsz(#k}m-D!+50Hl&H5O607*WD?C|4PbW`cSL0XS5gf3w6(-8Ce}}wHh+rE ze)EF!R}z0e@pEKg7+&A-gy&b@;hn%lZ6vn(g^EDx>k3YW5J7LV6x$v;9Dv5o#%@g^ z60zvp5!Uy_4zX5ysl~QS@))!HbSjWI`_j(odD@a3X915|oy)WhY(G22g;icW{AZSS zY@NNmOJYmkSo5y5cB2L7+o~W#SO`R7V0Y(-{b90a17(E=vsZGbhE}XE^IJMjRN%*? z)~@6A*t_O@FA9PI!&D_RbpSVQX9!fid6Uo5p)rl~0#E?EVmS<#VMRYALuq}P05qd+ z!_#JrBaD+#)@_d$Mgv@uMW{ZB_Qym4QGEj51xr=QLi>r&@;C%V=%eSQG32Am0oVhG(y<0xOj)PPL<2c z_Fc}-*{6lFN*Bqt39!4E@LrtSeELMYju(?zuT-4B(R@yXN1E$#)tIloT8FE-p@Ypw48QT5hgQFT$@ z@EKC+kd_doyPE+;RJyxUy1NDxkVa|gmhMjJ?(XjH9KPYcpZocqcmHv&GZ)O9eb!#R z_FlhEIN;VE&#_N%^N|#fUhSmlwWT8@q1@!RJgq8Ra7M&445vCJg8?-N9Wu+C}!t*CQ>hH;hBr)w1zmA{GSpf2&7^k%^MTbC zgB$P2hRY5jW2h9klC-qY%&add31|o;8^ZU;*?QQ(XBAb*+|*PlUs@c5g;WjA1dAz7 zkF5CCi5JG2>#H6v1d009_Jfa;WJ}Ims(LJ~byLJOlm=QS+dqC)dspdlxsjQc^_`8p zXQ-b!+I;0M>Aw9_n+V+?^EXo6MB8iMfX$65ez6o6VHo~m({Sw@(j=xm+aleXmx@hQ z1mTC^j9xVt&YTGrOC6p#{;ld2$?uS&4(Vag=RR5m)YqCfs^143A}cSd12ujpu>g_L zOp5143s%Q2jTOm+{Kj91mMzuC9bN2E9Yd=M3^#FxM*B2c>5L@e6W?&T6nuV{uk34H zNiwsf>}WWy z+MmnE<-AU=vcGS0gNJFeL12XRl9?**OL4BQ}RQjUJ zNd!@fV~!VN1h7$8agc6C!11+VIwBg`8Q;+qyo>T>fS~Dth_EoF$_jbzK)JrJMG$Fy z(N;?GxsKs^(+IH8qv06k08hD=r9VcfT5Jb*8J!$cmWfZuMgXH6oTbOt+?-)V+-{55 zAdXJU0&|1jfjqMdO*j;a?7gnHpybiU{PO{}(Wti|3|`D0faruK65$VIU*w(`BVDCp zvE#*WFY)|&TBQsk3l-9oe>YbABQ`zb;y;t4f4$a)#w8_1Q!B&GWi$l!x|~k}neyg* zKa#Zy(M8jfiVB(l4P$j}_M<7JRLK_CJIe&=65)3Di8mJXamnd;H||I`(cpb>A`Nh*eIni&dJ#uOv0ItwsOC=?|;T6MDI(n;R|*(^^d~( zzqk*ldkNGogQaBjHqVk~ECLLeTx*In(+Eva^gJ6aeI|w!z#18le0H{Dk##2=`v%kUiKKj z`8BY!pc!G&8{LsB=8H_h!hM)CbagAtw8#sr5Oxy?@O}765=F(WImNkpnAkfTYMN?2 zOhkR|pkJiTXC`jRHZT|&xO@0GhkQoS6sM?lT%oLG%v)vk{?5_8RAzW7+3P#d`cnIS zTO-AAV}k3Gr(UH*_OCMMUxKVG2-DV!3MiV_`wC=rLx1T{{Hs+gy9OD=X_7~8doB+~ z#y+NE$DMU(`H8}!me-bo^Zw2+^UkLGs@AMRbk@fCH~v;>zke@qli*Y4I#<@{oZb#5 zo}N2Xi!zOUERtSkPP)Hsi)iOajZTatTqnG5b|8@cs^jUsNU+Rw>pDC2XRON_2NsXe zh&qvn972YD(?u8{AWEj<17!-t8AJp)FJE*KFd)LA$FKrQf@cST^?*{;9TVDG;uC}+ z&^2V+7el$=5#1Jv8jhLk;QH-~t{wvx(gk8OJhl4BoNy~jE5;9`V8~Ic1pwcO1l*AD zzMKIZAZjbLFq||$a}XnPhsX;oBufs2A$;Pg-$!BBzD2E+dOQ@!Q-Elz%8DwgId}St z6L?|31kMz)3^^gL&Kk`fEjirr!Q=%!!68COu+S>23VGmsbwmWQ5KkcCt(>jI@p4tr zO9<8sJS{{9V)5vnIVMFx!V6c4To2a*WHJ)I9k(Jw^+fs#`33<9F<8YfF;OCFxr3KJ{5m?<@I4cN%JI;A++4y0rWkjmGhdC)Uv{^T0y3i?* z)wJ#)x)baw_!*ZhiZ|DopqpO54<_zH$wEhm%^$b~cf(!RKkI%W0fSkDDK6}>-|HQY z=O!o&vzp&7a@Q#e*mZE&n9R9aVqWg-0NSM1W}OYI`9NCzU5wMl#e*H#&kGV zDOXHs!(UkXDfB}|CElutIcZ|^8&*7a!uRg7cP+J;RU8$O%0AY+su#oJ4DL@_q{3_$ zi9G)E>tjx2zM1vh<4a3bZ({s!d5w|!{@``L_x7AZc;CxiGU$H&h)-Z>Aj#{IOKtVrWlycL;;GBQvgku}e9yV5wG$~nf8}JrBafss z-t;vQXK`wb<1lpn6faQjVtACpP!~0E^2u@IRDinOkk9wL-zI{*p!Y|;a->dSl`LGY)1s(mU&4^{$fgu$e9&3Y&5j^khm73m= zFth05X#Mvvm(y^)y7Tqe=|C)3Snd#-ZtBG4xv<0Z3t6!Zhb7VUx7yJJ&MCEw>;fO? zYim#vnUr=2o0FqtQ)NVtg6F}h4Q8jt_{BnZ%{y|&`$_AoY^M(!9C^XM$+~xW7Z<(n zixO>mcBZb}_|;VD3Qo|x)>TjQKKna;?rFuS551%*##<&AP2p#t`Z!%NTpFhnBGEHe zO6=xtR2}M{MzOk6ybZ5Q#&`c|dWSHYEmI;_`NMFWURgD#k*WS=pwJbYv*&4rVdgl!Ja> z?}00dRyu}A62cZ^#k*9qsC7S2p2B&i_Auy8w8>~|&c<{;l$p=}G}m<^eTyG@A7@w< ztD^|3b8pPbU7laDu1Kd5tf0LDbwgTwi||!r5fR{l1Bh4}@(BKCxMui92-tTTU6TwE z39bYn@y&)1*uEgd00=QH3pEbeKg^q>_)2p|YoomqewC;kbq)HY7xkw1D>=n$Lc{jC%p)353Ic4=)ij)nc9(b&9zcu$H+&+5t`$A=ht4=x;>p`m(wi5A z02*=tTu`fy-p^5P&O6RKC&I@v5!{s*tQ}wVIr`^3i3ky2vkP7z6~pOc9=?Zj1f7|W z56AX!XCYX>)B?dVk-(Hy3hEEv5=aRE^!vJm!SP5>Ti&@2vfJgvTgSXKF%91x#YCYcy~`=(Fg8?Adib*wnZE?5;b8ej z9c<^@e;&EuIN^@uu})LE)#~BiQip1*twxpLzD(C4+~oR9gGj2q%VxY&`x5!d!-%!- zRVaVU$=&k3vsY>wCwZ!QHa}6BW9-#tjlgyMoD`b0zCmAX%qNW08VYA(KxxqLu7&t3 zBH)fermb@dMCJ*R6ERt|!f*6ji+!Z84~-Tq6$DehF|2mskpEj1Mb!FeRC$C;Ge}~n z3-!TZLV6l8U~TPTdBUzgC+AAOpLhp_R5m}s9rsvHmh4* z{D4=nD8MlXLd*JD!ZPbD`mUa|>58Ml%%L7Lemdpn_Tr1>vNf5KVc}}dIxg_`T~~sS z$$;CNO4*XX@V8Q5mcrmY+Ele;%h7-(|F`{1w8TOTS^FKLLWR7>*4FL^2`BCNAHz_~ zi=liZ>)Y4p3Bu*b(Xi5}i&rxbW$zf{VB7v^OCuc#e}hZUj;OX+5+e=6t=@(eD?q5? zecPTYu}mmz@c9}$osw2Ed)JKMDO5&fno9JoY@(aN@uBar6DjB3$)GgX5PQ>%$A%ws zak<65cfTIGZ|Ot%#N&QcgV%!T!N$(4!|<)x^*T)bI*zd9)_JBN4vCqgv3~;q1H=JlX6*TESbfSy68PmE5YP_woe1!n~ryRcuFj@}_hs zGN+7y6=}v$EfJv-l!BZQ8vG9MhG8OC4YfTVNq)i02N`4G*ZJszd$g7 z+eec`0)2wK&=WaqsBV-McBge5cC?8WfOkZ;2axsHZClHKOGn6OzicfK;b|r6fxiOL ze7!Kc*yq1{gy4$MvsUayPUa5+bMSC_q$5*1^P^s^FZ7Duur^t9feSsi2vrkNc!^EI zsSK4A6Dxoqd68VwNcdE7!sND=D_ce2J=LJ{vXGJ46T$ZHwtTLo3n>Ei0kO zqe*yB$^zyA;uJgwq`j3Q+J_*>bA|il!=p*VE;i?5sL?8|QizG=hkxRE03acOISeh& zTSFo~z)q7H^7z8232bbBIXqx&)W`BIN;b?((rN1%52l74juhn2w{CBF6x*598 zjr%EVe`(@VVefD#bfc*WI~yXiGVgZMHvPf%`al>&=IrCJTuba;uJgK zlXWv~{yTMt;0FhvM>a#*6zS`lV$Z{Rk5hf^r`5OhwJ*33RWU5RT(rw?8#D-Rn`-nA zv&J%iRu8wV=z3m7q+)Qp?T4%i_s!1>KR9YT5&}yBn)7RHB0jkQr{NKHkiU;-N#kca3%OQm_&EL$;mG3jU^v~EFoB2m zy-RF9@e1W`RKjb`8xmt(>69lIE1K*SJ5)lj&)rHXzO*(~aA;`9c*7J6*sI_bfE;AF z({S9#o{<^Bkl8!@W42lU@+{sjIK*m$Q0Y>UnT)aaJdjm3iKIx#+uq2un6Ib^$!Kalap~(i4QLC`PS#BzlNHG&$6+Pnjlkj z|M~Hn7VbO#^uv$vX~zP_i<1Fg#7v);ga`w`6bKgt2%r`0alj%3quXf*_q4P1S~9GO z`vjaoI0DVGV5eoc}TTkythX1zn zJsW1hp8xk$RKZmna!N%YuC696lUs1}?PNnJv;}sSahsh;F%OOfuItKaqO z{d$j==Ok0ceF#h6K4IP-3iMv6dt^v}7PG0yP3q6Yl66z~#%ZqZZHAYST6fr^YUdX8 ze6e{*>gp4j-H%hO%!ogwVx$jE;ej{zNCTUNRxWZ5Z64dJ2s!mvH_XU$J)0?lxpeuu zXONM0rmciWT;^(Lb&-9#Q#Pg(AfgDoYLbeZh}lC41I!|!_)gbrQt-hy zr<8@0;Wh{0k7fJP?N%TgMsIEnb5q{S9U=EqNtM^-4CYg8!b0FBsJhK#SSAV2!mo39 zq_CumeVjzq?P6X{m8Um)3vdLMPbRgJtJ#l%7|TC{7d5Vl*)r~{$7_CpXzx8?E%D2) zTq2Hi9Ev)M?jx}sgfFWeZKtMr(9(~SDD-YL4P|Za9xAw0m}tB>?=PuJbUgH>%P}C2 zy*5yn?l2a#*XO5&Q*q6@&aCK=YTi6S;Y8oPt$MzYtKdmeou@OZ5@e0+*o&mcOCH>X zWAWpg)BLf_?VV1R*`?!WpK2KR09<&d;f;`}0gZ4;@Kd0Bb9HrTmT^G}lAH7N&6ix! z`G<#&dk-lgc}M!%)_BS!&unY^;k$ZVz*2)iDHh?2O3+0so*oI7^fS{&jQW7$(b14J z;DvlsAfE<Z7cvsX($CnMeZ0z$UxQac$Mt#J(HHU+= z4+rQ>-7ji@ZxFjy{xsA`P$5@-^${}8na+^+UxTBu zOTo==^P)RjYdwm~{VZO;aW?Fagx&T7S?aaOX59~T0Z8GZ=DEO3hc!7An!pLX=1!G6 zSjOjeX$>0p*z_#k>8qlQmU#Eg$u>?WypBvUj`aLcQ-6QHKaOgb2&k>b7I9<5e@h!a zbMd#Ck<=FI=^9ZQ=iz+5{V7{xqTma1^Mc0<|Kv{(!!!o-zpHXJ-OvuW7P@sIyiJY$ z{Va$fM-P`=GTL56Thf5tsC6epZ7`1wbVx4m5=8*xO?Y z?+@zqx$PKqAHH5Mo?-A&%{SOxGRv z4>WQL*9B|rX4d=D_;^T&X&YowvuU{iwprWoWdNhbDNjXKJtga6=EB#Ow$*6XU^ZM? zS5WhTfH)T}X3T7rU(NhqQE{2IdGA|`jp=^IK`7_t1Ej*P>5v8p4@>GV4hKrw9RSO4 z!|LMr1K^qGFg^f|oLK098VVZ60~LI?Z%IK;CFkE8@xCQM&TdqSO%L5u=R+PP1XBvA zCW+-$u1P4qN-xXl1$@$kuuRp~Fw)Ga*YEz&p$$3O06Petz$*{|M8B1#Rjc2e+X{)5 zXa{8m9iz7@^EcIN}H3uyb!LIUOQOeOm=WVS~?M+GVltJdFz z>UoK$3m0V&-JgWQ&P^-Wz#4|ZdUrrCVTb9$#M|wXU=1J)dHyR7!5A{q6aoIDw}hp4 zXXe9$-`X}D{9>ZJz(%fMEnPb&@o`bz)YR1DbP_i+x%Anqr5X<|y9^u+yH0g2Q zzFI^sB_=^qnlEnFM+;f^?@|YnKx38z34KOtfq_JcPOkyO@90#z&8C%ucL)YX{?flglE;}Td zktwWorOvPQY?=DfJ+v)DWU&)DyH%r(reKUZw*&jDS5J3j49{u5o)14iq54wjQQD%N zz?$o0&serDda9qm5P_TfQ2tB1*sZbEhw`f z3u~;& z6uKNuW53x1!`_`#L>N#S1Vc@?zuZYUVjzFsvSiqMUl8Te5^qrLd0S*;K#m+O>^P(# zl17nAx*Vr7!+$C(lE#0d?eDOkux69�nS3h>q)H`)sC+>CT=m(VLCDzlPa-h$t^! zTNaUNQ90YytoLQzo^@Fp9?E ziLNb9B#tgf489Ug8N&rK@ZA@oSV0y8z5oze-|$wYG$C75EI2C& zz6h-?$qkMxA|ZkpTwUuq1jY5H>hi@1!3|MtF<;>lpy5Gd*nIwL|o z$TN*Rt+aLvN&YawpkA3eKAA5z6iN=847x4pixT5m{-jS;3iGo%sn~_*v=bG|&)aam zbJ8Zu1uhBuJnn>k>%Yn=~4<|f&)SB_>+JJ9_0LyjH-ZAt` z2{EHibWYyt39(eW?%xkEUIhNI0Wed@TEZRU1wR0Efy`H;hi5*+63_24272K|8l z4NnRPMPe4h36~b~m;i|oYY~z(VCH2OC>376b?)w&>?vhA^NrYFF&Z(@kR^PZM6g!$ z)d$ft<~TYX3#3>D92kin$6K8!dLoaSRE+m+$@r$5#~D)(FNyy0N4yu({KydtYexD_ zp*<6(xcE$?N_r|5cmdw-Xx{;^lghSgivQTJ35o4frY z{H&p8-Rb247U_L-mUD&HH9k7TnSVMTf)#K_S_ktZfQAHS}aU@_SL>~qNJ-0 z*b=K~ZlX6XO)V`% zbrPcegA#1d*X$8R&2Iz)+K8;QT0_^Cmrn^;-B2G>s60D;WzWNj2B_?;Z=ysiIrbzc z!R5sK;oP-M`gN_>*@avTFV}F;kEWEMo}d-61zNIny!-aD`qux}pP2LV0-F-nda1H& zZn58&A!bW>EQw*#`A0#8Jj#36v}M7fj|BiEuT=?X<>9i#Lcb`gyTQ;49BnUvv_*n% ze47OI_*t9wepOeQT`jjRQ}gg@v{)qNOqVw1EV-tU(?`_L!TZ>c?5OU8+3)a8){CnL z-C9vGnY4^9o_?~HU^MAFZ^g%#nanr-*=t9R7balj)2YZ&sTqOQNCw(Qr{T@@jFbDO zt_1ry`KtM^y|I#Hj~Vj^${7R{PXRKXk^T%=cOA8?uY? zYu2)`!KVtB+wt+`f z5e+qdDo&zehb;Oh(C)@9Wl##U%c8E&wrtk+E8W4 z+xT@&$|CVDfA3{7b8{EPpP?5!Gc8A_R&RhZwL{jOywrHTHan0BU2$ zmV3YNs6OPq&e(c01THf(HC{DGDFRSBI62~5kzC-^0FDza@cMX|WX5zSv*;R%<6|be z7uc}`ftAa?*sMi%>r<-x1)PpA1j%eI`yKI7Oiav4>Q%xQ(VAB*e3X@%pio@F#7mKbR()y+=F2I^~bqttLW;Zio-8d+!Qp2DUUeHqeHXj zE>lie*NqEVj@LZklq^00Yj3G9c$zUpFK?wyq<;KL@i{z?_PFAwhCO0V-^pg8yv!m< zMbn&9Guzz7Mz^Yx^U3n}(k?Eq{NCm<9quLHt&8ZRDFC@IW7#6#e;6If4*4+knEcCH z4%Y3=@dz*F@vs9sv5?^DQ#NO3mvMZ6SPRrTz+Kf>v^1-m<~1EcR`mvH5S`&&*zY^P zlWC#~)BQJxMs-eJB0w77#d#m@hpzCOQ{3TAk1?$e9_S>5YL2!?m{;4awa_M`LEL5G z)2*=}bRIGue;2pfmc%79QD>hrTbJD3Odl=6GHXq(oKXf^-bTf1?%8Fqpm~9zC_)*p z$uV?I1~Vf0cZ)FdvZauZ<#=(S#Jl$%9X?HZiV?C%K6R}&kFh?(ds+{_UA!o4pBQW& zGot2x`P@6TzaqN|ZA*|e2{`9Dfr49+-hud6yjxLk z>bneOn!O);3`M%c)QxL?i>OKDFo*ZTt9BiPO*fcfm5;0Z$B9iKeDQ+Om`y? zoChs0`>5aT-K2ZzdPq~n8g)P)X9-ZB2Fh#b(bz(AWbnFV2~RdQnC~{ewaDnsZuYa~ zpCNvkzaBlgPDwa%Yza&~$ieQjM^#v_VYxqyzbt2&k$~QuoUh1s3Es>Vc)j7WPONY< zRiTm)mF!r!VF{3xPCS`_EXg*nF+^|XbXqOdg4>t!=N?xD$O)R*%y%+NF5=?0Qf{yJ z6HvEl)UDeHJe&@7?rk-e&v0=c_cFeUP^K*N9{GNJ$?gBfWb{H3)qclkT-D{tB=mLZ z6s*D`CgwQ*=T3Kf9>NSc%Mm{%8G%1Rcqf8UisV|Bp4Ic{L23DhADHD&&kPq983{~6ToS=8&0DTySx+FpwAKr#3q{h}B z27U}to$qH)T+Bp@NVv0|HbGw$J4|?MYIN8Z-t=(s5Wmm#Ij{WbzQtSbK5hRY)4;BM z^;p`qS$dsCb9BxL()tAa$ejNAEyJKXAH55RJb-mNj6MBlFFDhYS zr~gn>LG6=h@Zrn1be1p=C%t(J^x$!W>BoA#tCWLa?+lx|p^2^(4q=ZiXr#Nhjx|ci zQMm(x$X5>c1qWFdE8;A9q5;IZ-7@g-t)tm*^k4G(Q{qq1gBxQ+Ic^W<)fwTRAW*Zl zy_$o}Yb^mAXt2=ca5{$9MY}3_T}x2vLUdHA&1E<0G+6^>cjt^lmucn^X32K$&yc%^ zi>&xmRnI3$g&;XKp9@T|u=`n+_cB#K@Iy|#iGZWbG3oK0jvZ*48=L)lj)auo}~p!>t3F%=g%Ox0Oen=gD)Zh~_>)_CEm#uVy?!mEthKDF1wBWabCIY*LU z`m|MFG30Gk`#3y$=qASV_#muw&o-T+=~i_fT$CcPY4Z-IJ7FV(n6`j?cXRJz$)Fm< znrKx&kd&->=B0zI=U{O}VA;>-bjUa_v))W-`MCE;6Gu%@&`8<#{dJyK1DEOiFQSkL zt&#N64>kkw!iWCr!z}y=twpx?PdV0_k+G(>=*O%_4Mb5o(yo^UoFHawp*hq>vQ5<30F3e0ShGx(E7$26;g1<3c&9xdzB}xJ&-?HbsU3G1Lvh3;Q@o?Vd z(O=S3bWrH>iZOu)G#9#uBXqZVh_H(ZzI}=03EkW9xCtY`x9%r+@KowVdQ*EHdaPT< zm<$7g`1yX=MAq}{Zzv0jc*NWQv^0L<2lW@R&!3?Mutt$iTnWhqCwbOSHRx7ynk3;eq$ay?cm&Y=kZ%;F{ZhGYtwkQ+=OW2|EZ?NL?(%YxoS9x@Y{>* zdqwcTUTT%W)JR^g(;l;7uJprJQIfEO0kg+IUB14OMf==KS!Nk#<=T&Et`C>J9H#d& z-sc61vjSH+;m6~`XuaDYHKU(azk^AG&tARV==Uk6@Q{&1M^z6_c=_s_G zPQKR`+1}A(vwG4u0pnsXa#^>HBRv20f5y%h&L(>h0-$B@Og-RbHSM!Sb_>%9`BX18 zxlJ3;4`%9w0c?AMPN&p1M#a8#$w7L21j5Q(*;ldCQIokex!dZ6;*x#&MEkg286E*& z01?V9lkCj|keS&Tc)v#HMUXYKi_vlHrU?NFVnJCY>ooNZI_XvIJ-A}4P#`jUI*9L6 zwe@T&6#$VwA-Nl1?Y+Auu#%-OBOavNH!Yqyo^0}ohpgehO$isQI$NLhP&{MGu5%0Y zxfiGl9-gWuMkzlVP@w>uOqM@MqVJE5o)c5qGyM2Fq!q3BHJ~F}Q_YCc`gYBMX{|Z= ze!12%xg$EZ)cy*|>OLh|h#`KI`sUKdAig`?jKgO>O2w#hZ%pBr;s9hmvh1XB!-*x#n*@nKHhi`zo-r!NfbD6wm(i2N0s zb8eMe8A)yInR;~0bGMx=fl5~VpWtZ~uUy^KR?vzzo0j`wLit&nw6{cP2%X{bQ#e5fmn0CS828Z)=vjESF2o*HarRieo}724$u_5M5CXP`;NVJ${OCE7Lq@wW<4Sq)^`< zLHDXvS-sChU2I(M-BZ=|-h*)kV}@BNb!le=Lm+>-eYU-S-pD5`Y_hFHhX=8gi!DYO z6%8`7i>uhomA=fPWvrxQmDBfc)Gr8HVZoUew+ zRij`=ag$$f*0XJrD908?>7W?typ?Q_P>f4`8ztF4wR*OZ`jn?p$37aJq`iSpNC6*u zsfpBZ&;8r(M0h{D^Dj2$E!WUF)$c}9w%(`|GqUhxnz0%iA+CqNI+rc#KR5lHlZkT< zgw@{F$uO;Jyu0Be{A^kbC>B@svY(_vaBZ+w!%V&din}R!Z%wU> zJ+=2GWVF4*#d<*T(#nz>czRfj;v)J!3}qc9ZdN_z=jHTa03Zjl1-E!RjD@t5Ti&;1 zZ+lc({A_w^D%4DQWRwu}^Ya{-ExR?*vLp1H{ffX@Yz{)YN2Hf;U;u)or~v~m>gO!j;^Fk=@&b2wAB{)S)nprPcRi2>xV=?$e&v!^4A0u~tH4;Qxx|dvuV^~YY!;{jyF_A>!nX2JN{bM*AdMw-caibKY48kO(q-?8Y**u8-!y~&Wi&tL40%@?S7$(EBO zj3h#z^LW+mUDGUEd9|}pqrUfdk!~_9`)&i8e(7I$abWSP4PUCR&L2Nd_N$?fY_=BF zHCoV%v+wABVqK|$|FGk;A~C-4#r*ff;w&+@B_SDEvtf7mH)PDKgIR~Wi(S=&5g4A6 zuFqqY)Bcp|by!f4`ljvK#up$jFAoM5q3d-SlbX5=>vgPI`IXJ(XyNQ}IEn8Z29A*= z=vJ;;qElfyT)h8*2IlrOBmotCESVEOx9Q#=jvkf;SS3y$FG_krhcCSDo$i9`?`w{* zxAFjr2^;J^CZ5|-=;3vbkOtmFDp1KWMHu- zB~{aH{K<;;=xnHR8_D0zOv~s9I_UoWHW*mI_11AOg=Ufu`f&0qF)7x5^?O(_&!(_J zWYZda>=yIQGEH&1xKwwNKUlvRkc9(S2^1y%I=CiIHA*jsc^(-kx)-O@oz zMkKHqgCW|?%RQP?;{?T zpKNk_mo}>R@6ofrrFcG;?HnA=D-1PTs~8s6tar?#4UQ1HQWF#V?o|VW1oOPx&Akfp^Yhzt{|Iw%c2X!rbTv$> zeNtG`DmhQ(^vB5*_QM1M&^;|IBerzt)hc0;-Ed;X0|kYajcEek)x7N1O*8b!bbpH; z6pd$^3gzhcJbY|g1CxTOTo(RJn=;;y341#~Y%zuat>?>_%PD+UF+Y#J9wt1)CgkFO z-b@pCD4Z24urY0A{v-$UtR}elknHCE)I|5^qqFlkw=nE1CTO4Zkg)3Yk?|Qd9{3)Z zQ{vGnsXw}%bDY4#!2L$D_V1027e90oo80R%qhvuTDY^bCv1T3hTxIZeM4XNXG|XoC zO5v|Uq~}50OS@jTX_72=MGI%Cgah{7Nz-Pnv-*-W$q=vwy9Qzupu~)-0eN#DI2t9H zBlxuNizR${%qJQ9ic7YaF+Bg zBfcY3*i{;&I5-MT&9K@qdqZbON4!)H zOryrylx_12r-Gjvj+-4E9AIm_dv!Q3Bqa1Hl(5WfJjZ3pt-JZ#r1|4v&GKB81pzkp zo8Yy*$pRP?&E+c>JUwiKVPGZACZD$57Gc;Fo}T`xU5c->vVbQvBoF~43bB-Q&a=wq5i3=Ey8>R#n6m_s*P>Lfo1FRRKFQb)K`tR zfCaqOS1-51;?K?x#0yeA%mRZNo+upA3!bia)yj&(1J~B2|1*$!p)9iV>o$9kxWue-nqdUDX0h@;-GQ$N)(@AcSd^tnSe7N_+fk6M!1-~oL zZ@d^acT6 zQ%oKSO2(jElB&|y%wGX^exUxB!dtDyM$d)KsSJ*7l!T|}cfaPw-n(O(L(XHF|07Vb zZIm)u_DyPY`H#L3IP5+350gw`8i7MCnV{@qc$ihC%zuHK_D@9kV+TKh$0j8=cl+ja z9cF_oOa_U!sY5o$0(VunnN>$dnPG@5TwHjq-!O#-Rs*bJu#p;9n1TEYOLql+f3aJY5$>#Rzs0r8`SpOhJ#2y>J?W_i zn|4M6M6YLXfQ9bqU{Z-bmZukvGBL1LGlYYkIa=M=S_kC&W>6d3hp}63TJ+g>)+Ybo z*EAXX$_c}+s%U{ZcTEmaTFl64afRV|p@+S&?$4S)aaoE-dD zuLMs!aF3f{4ON41d2E(E+acucJLwMFLx;;P%gx5q#o8K0SDSIV7^tWu{Ej=f=i9PL zyx92oNNLIiDzK-aqM(>B);novXuuG0v`cuN*>be0KgsS!ugh5TU3S7Yj$53}Gs~c7 zl(90bw@nUrVXf}%@9!E6@Oqtv>L5T4aNgvbd6Byqhx-yelhN`1*Mt)QyLrA>mCsX> z75E+yz|S+7ETCF4J)FQn!^&z4du9xI#aeiG1i8(0@!}u(OqlAd@w&faGwhnNtS&ES zPnJ>PSy?S<2A7t*y@E~c|B?l;6qf@|o)onMcJUm&6R40nWW8Nx)VvBLl1)|ugTD|{QvLoL zQ}mzdin$>C9~)5Ip6e0*j}CoSq4m-m8ygm!%}%79r0vq{dDEwQwsjxbCmufatG{y35wQDOQ90h)djzm zcA&^V#B%`$QGppsUNMtd(*sy<&QYuu)s*Aix3qLbQ@91F==c^_@Nsa`^7lVK%Qbx) z!v789VMK&Y*290+5(lv^Ll8<*vgQ9rS;zx z`=9GIO2Yd$Kd3|00gC$CTA*!{NDL=;`T0TCvW=>5{`vX;x#ZI+*OX;`0*^~EsboGE zRdGwCcJQbR_P@d6sEme4T1AHBV7~cRP~T>HLe$_;fFizcfbx}&aIv4E509=3L43c& zfD_0N z#lle|rR2|PhCfP?=Bq0g)pLR*A*pZTEU%6HET2l)asSK3|1+0)u|NAYU&$Takjjn* zQli|ZB=7%_eCI?vvs3)cgrxnqZsNQSsVx6oDN1wcPIgP|>1K2wt_Cy3r?OC@NI~*! zPS&JS=jQagd?F2FkZILN82##l-WmJ5QAvd5q?E*oK{~VgdwZmY7@J+Y-?Zk)3jHz) ze{ptMvOMw;YfxOfYqpZZEucO;5w;3{-b+EACFOrczTx`CG?@y))^ko9@@J&RY0WyLozF(nC6+P5l_`eKmT3I7FTD}e{&oZP-ct42DLem=QVksGV^wrsa(v~VV zDXS^bt9vVSa_`bIh5A@kx}rFjNXI;nIM{ zefKZs^Y|;E?tLF$87`7T#uXHViAzys@lMkxvQu1EoOrl_;{DICW%+mPRKJ<&yCC?E z1b_cISmC0Nl?M++tUe9JTLKLdtx=+CCN)eCF<4|8RZL~7NoYEzJnfJ`XJ zXOKAaF7!Bjy+l&YtKcHx#x$K4Zbe2nHMcvwtI&+~ro@gB!Ff6Q?K!xek>UVE)`ook&1wwHMugTUz_BTsN( z_bZ5PVu|FG4*N(ffTyKlAm@8mh+fPx%48tvw%^E2arh!%S;o7(i2yLZdnHaleWzGa z`eywLV{=*shvtIk*_GH`>;iiFRb{TEYkd#%rBWHceo!9#(TGFSBIJ{fYT4 zONq0Q;oIxf&H2@?F13d4{R~EvzfS_bhNQL*@dnc&%MfNKDt0Uys1#ximETQB0j<@n zA&2KC8%5QBw*X3EVyFuE6Yhbk*56BW;^LEA{D2|5Xt{+wLpk zMPvob209?b8IKMaR0w<*C*R1pq<(rZB%DJ9qZ9E0O`gGU@WOym z;P0)B-2v-@-u(J_et8*iGG!^A+TT1hF(JasyB{(KGBmdL_K27S1zi@J{XmpwzT7m( zu(Wj8_3v@Dy`Hso{&lfF&;Cr!{~ss%ODJ5yn6Hsv>!YerG#gl>dA| z+V7&Zt<9j^sz31EyOH7Hf{DK#a}Dw9g9EAy`4<(o{sIodh4;5THuvYnbTuyKyT}wY zuv=KW^4x5*0s`ExzC+oQr5c><(mN4=k%(B47#mSZsz^Xv8Y!G?c0Zj~P~dl0e0(d6 z6eSj~*7H*}! zIqe;I5b0t9&%NT~<9Tpys(Xr3+r`BNT_x@ov)cQ=_75Jiimms-7CIoZ&LSaZS&M-n z^zUJESwz2DWE_`#qUI_fJ!V%0=g@kAlxPa8^EnC0kCss`|55L5+CM9^KqFAU-3^m) zB@Iq!@EAuG>sVV{LKow&mrg!31$kUG_Z`+I}scajP0*% zu44tF5hljHO-b~6)U4=x8N4KTMZgv;ecbJZhk0+68QTHXAlU|CC2S4KgQAk#wqv53 zgta17l9L)F=hjU!S0fhKK;E!+~ae) zciqVx`Depur|Qmd9COERz_fVEJ%-V zYr=m;#Mz&DJ#0I(u|2|MdU_(0VQ*n;@n*L$_1f3D{oCt3>~BrtwPPsk%f_Y^qy0Cb zYhk+_u=mUo$_1GcLQM%@bha4Brnfz3KYyP#@T(WZW$%-s_YjtnViOUO;apt_ey<Ic=IdA40GJ0` zodQJ)6j>RhQwj0|q+~dOPpk%sFaOw)lb19Ljjwj0Nv{{BE_rW)1ykf}0U6%gdJdg` zJqpmv&{0Y^3WEuu_E9#uCzuhG3dnWDCQ7j*4ugqm5CB{e$)=aN5key_@MZZ*s3^V= z6xg7LEggF0LYa|#C=5ER=fGwJJpC-yjuSB&U{Yw)5V80^^q#GX6VN*wSHVyS?p7&yorX;edMAur&#W#6d;4&FfgBYTif3Uv6`T%7(#vvjAu}&wkaImw$$3f_XgM)PgQ_T0SoCvEEIPxbZ z0DK|96v`WYOxt!vQbut``3VCkg4P3HQ95W`bV8PoEE6>jBn&K5h#6@lm#ZS8!gB>D zqCLQpfw=~x&uttm)L-rTpfEbVPO(+VN#J{ue8OUdEy8Wlli?RzJdSgan8ohlJAa4J z)iJ7Pm5@g0Q%J}K`W>;Pq-3|PShC``AMdaO4t`*h2De&NK&<8)&p~BAK|w*tr%Y`_ zc5yDQBEu$c`J6*A#_Q_p0-ij10;<_!K4xcU7X=k)`z&lh`Q$>)VvNU~-QD8gZ$2%PwDU0fJF(a1-(( zZ=g%?D!6t)nOkatk3WC@JY?RQEYAuG3i4YY9UYYeqft;nrDnx zMV5dvwkQPT;lqa@ex;tTzWO`f7K|-bRHB~V@9OOh3lArMRt{QjP-0Luf7F<7XSQy{ zq&!c(pg&cDGgdxTX?^LfPBk(;-N@Cok_m@2*bIzY!6@C?!()G<)CiRP1#b<+S+~au z+2VTy1q6r~rR{i&?HwN_DyF5S1rgDI=WS4A1+gIzcYW&VDGFo_X&*hA0Dn-woB5RI zGX&9^l614&xMJs_frxy<^A1DhgE0N@OClhuYqg3DQOBNJ*2|94t~?4g@+PKV5ToJe zuk!A+@KgFa#s1jgq@Q=57HU!1l;}=S7mx)s58yIw(T(HQ|U!JK{(6=7FO(ZSDUy8uR48m0$?43X z8=uN6?jL9_@B=GR-JJ0sBIsXFqk8oZ^O!Jz2q<{e>kEgtVt7Map!onIu)7L+#)~GD zgq-PHUOhzd7_$uC-N5w4l3caY#TW&PfhiG}NDhrUZ-O(rC_oKiR3PB!f#!t6b};df zN+>Hh@Em3hM^}Bc375yk)PzD%(oFO?kPB!b@{$gsjID@zj>4oOz=^la&@7`NkJQ(& zL=vHxQFIwBgXE$_J6I3kWCx%VN>JK5RjR1&hhzn8^tC3%)@b`1aH(+206l<~goq%| zwf6=UgnEj8W(+H%P5>Hl8#g8w#n3`!G*-BOp#D&AtyMa1OfwWs2H7X~?q?;9R&XLS zv3X&j7X?K-cBtmUzQSd4vv;?Tt7yUJf+DdYjK=nT{r>>GbBRTo+n|Cth=N4L#Ka^d zXgB-a-dvluhkfoc6PD{ng6gqT6_%w8KP)Qjr@lYSQ3V$&yO@|xv8LSf=MHoAAiK+b zce?6z*7K>UDez#X(ppVNC$aVQk5~6UyYA0Rv9Pd6Nf~@qiHW43qN;UX2IbHvzsd#; z4i2`qwt{sPoUhKKldo4^SL)Rl-&%DUMt3=644pYji%Co;nG)pQ#3~<8!j} znw<<3W(O6lLDlI$Cnr4Y>|g~QsAhWq^V!7(i2G<4Yd$8?*VnIhm@81t$0BCfNdZ9g zdTDvN!g_=i0EOIf2nk=kd7P;8h>NsF=5McO%qyK#Smg8-_P?FpCAhIP0#%*=!8CAF(fR? z;Y?(=hW@fca=vA1Nj5n1yN7G&PuZi9y+B`$dX`>`X!bmV?E6e8yGkgJKplcKnr8vy zlWO89re^ez?opzHtR@6rzJCxYlRTE=dHHd_S=!ss@_S-ygkzuA&q80`$TKg;@X6V7 z%Uj{%Y4;7$BH8qNO}~shcN=8mNmKUw$NmEZfH^xgXeArM7Toi~HUUIB0LUSyG^{cC zumQe}3lOhd!VxRsv8)65!fG{IhQKE22FSzjmYp~^1(hN?+ z@DRBY4j-(F>$=b&WDrt@?$u+|os8UN`!>rKf!D>5CFIAkzWx@AUh;uU=pf?@NIf7H zL7!Iiw6`q+GZC8ra)nGpJvOdXYqJHYBlc;nz*>GwCejyqf5cRwyQ$E^kEg&xl^bIx zLKQsV0BaQfKm`M^QVDKerImnzi-s7rk(B!IZ`4#Ts8c)+CPM8OnwuH}zo+UJGf23D z)1hd{s<*4olbJJQX^eUDZeUM$x1DfuXlQ79dU|1@Ex5-(I?cX=>Yt!%j!(%0s$WCc zm2)FPLIkJ?!0kXQ3`{$i3eiBwC@AV18ej)UN1>6C*clVs_;@KTvG|UT`$a`u(OqW5 z%mMz3n;qAS>z23)LV3{5!1_pS6z(D9@5-=ftb$!jNUu(?xfSbFIp=Hd* zjc1XLgd{2{$wp=cOvqwkW9MdPV_;ys|L`F=b7K)?_N!x{CMHfnwm>k8`@*1)m>0aj|CMlds_R46fq5!F;t`25{R10Y(r?9ouA)1-FtY z-DowF{`UlUZskwNhpv)9H_G(wS}=aBPowcME*}mBb4K2|iiW7~kjjm2wafL*Iva7= z&4WEp*C0)EIT`t)A1GJH&kxB0t-dE<0)fSfj_-5jvz~svJ|xwkd1TDRUAWEX2u;U& zg**#&ym}|p$=%BjD|7eGs93vd&o^z6asDlX|3z3UF5F`%_0ycweRo*1hFtiS&3T32 z$LIb5d`d-gE!yBl@XyO8c4R%D1BI26zx}CXvutEr5;ga`o)$o3cg!plkhXB530Giq z$oN80#|Op`X}88X!d8eO3Lo_vh_Lx#1i4**fk^}%=D4Hb1IPA-J(cjM$HeGhdWC?> zod_Z}BO1rLi~f3Cu~X$AffI3anHsDsB!Qf?A7NyTJ|_z1*`oW^P;f0|ogVrEaoZr- zSqUX^R_k%aaaQ95s{GLJhs+qgc<_a@ ze)4Vx7%hy9jDW5ecyN7vUGe>UiI_#*p~={4LtTCSyvG8#WnAv>%{73Z0vTI8oSc6h z9oA@T(ET|v+j|S9pO3dE7Mgrc!Nj4fIDJW}B-8KcO{!G{BH<%hKL<7MTKG z)yJ(@9(uTY0!)ffK=YobRSkRH-r479O(2Q`=)k^2yq+?DI?$=WiIp?GI~)+NFR!L* zbI6SKUsPr0qZ+1_AQR+?V-KCx`hW?S(lho(pfIcTyMbX&MjAqH-^}f$9_jQzL-Qf3_3TJK9 zCEsdHv2iQ6+%OK_J;3gkaW7gK!F6;{kBzKb@&a2BX{^*9^6Sxas@%vzX3PUdo%%Pc zq_~?OYiKUJ5>mXOI*`u5G<2J`a(f?-oq%jYJpd42N&R-ET^c`O-9QbY6tG+y61KmS zXf6@su_u_C2eUyS=>{SqQUj*wJ3C*12B|HSa@)23FsUzBIeic0*cciby1NSliHa7& z$)yb3Y;3t;mI8F0b4TiGYjFaj;^V0Sql%3#UEGa>m$y`}#zC*zE?=~9 zc6JtxMz^*;%~9Q4cA2{S3A93>FKZk5fRAsm`S;Hc_~A5<#Qm>o^?6E-TKr8+G7U?? z3i-EnSD@~BAk)+5&%0L-IXMiCcEOK=DL~7CG|-!NUT$9>NY`Q~!+Q6_N(e7drPNT$ zhh+`CSVl%hcJ_l<`PJRsUCF@fDCIQCO_0#Cy{!i}DMhulwf*(HIl$e%VoIGOHg`Rz zG;>;y5bK?;-|y`N2n2G1H;eZn6Fh~mu=`-fV~$unL{N~Ng~hj)mX)OLkJ;u;lkWfSSGmKTQ0CO8*Fw87|Wp;vEfrq;HZE#$Y_ z5UPSDece|3%~_^bKgA9cC>ib3#H-=Ug?s#N zE%DGvh%4#Xn$I&goggi)9<{2LAKRO6wz0OpnG$MRhMj-G-_+gzg!M|V!HhB?#{Z_7 z!{sJ;r{Uf$bsI#vNkp&Cd;!yX=FP8}nSL&#+8S{7%ekisDA8Tr+;^TUQM^~{b^Eo? z;BAl36h=#0#e|f_EQO2cZ+-qNJp%g4$7xmmF3{VUzmrDcUqC&(*)HoJKwYzgNZ{4G zPfRkrRr>d}3EI5Y`*YW6Y*s^~!0qCxNVuA&g)|8c0YITqvRzz6>eiH8P$-j<@2l=@ zED0Es3=*C}s0=r~O4D?muKWo)v1nkW0G_c72_YFYFiX_6?E#$i;96s5nKwO;rqKD< z=fc*T3QwQIJsR{IwCDkL6y_vi5kAr#?&6YF;0rnldcTAGA4Q&fFKfMB>9#yK`YLnq zhzUem?3HHR;3NPVhXjK9{5Dl5xLNBHrVe^XahJ8;AV&X$l$2R;5`vrX zU%mrq>Gr3qwn17S5uK##ilG3uY)J_?{IzGNyo9je>B?QcTdV6_E zfskD$yM>wAq~O=kkPz?FJ^L!%-IrYenEM4e8l)5y-zK(q^A}f>L>w8YsI01V!CCHC z{N>9R&|CHeX<*T8%JTuY*PyEm!al22c|CfOj4})i4D$^hj-hR;hK%*L6D9IFs$^}T zI|BY2SmOcu^58LeFg&bUtf_aCS;EWyH*Xc1pIvlSC3dYH*H&EKr9JWZv5kUZh1KiA z8cV;b$IMnE5!{RtQoCm47x-^WKlbSfbLO*AM)v@y>`$If7Z)Pi)))r5OACyOx>uH7 zhc%Y%1?i_PqNU0|N_qEO3a%czQoq!BV;HQOmA}5ta>tl|^R0kDv|uQ&UzVy#ucRAk zxNKnKyPw%{ss>6=w$2+HvNF%QR->1un+11bRm`mq%hTC`IRolrLL74)K3s zH4Q7U%PU>^j7v~Q-iS62wP8Mfy>!yIe&M$DTm4o}JyTj{b)^6|G_EzFkP+wa{Ljab zb+GV%=_Mwiw%L|Kb%)ekW&0bUe&2N1AN;>n3js}?7Ke8`)b_qyf}?W3UQf*2qy$LA z@W*Co7>~e5P9VfkW*u>gaG0o!nI|)Pm;GbYWkv%O=A9GMHC|BcerV3D! zodgfwK}=7m_NtW>O|Ce$SZuKnlFV z9V+x$$YbFsqDq)Q^f|N#gqaNdVGP*PnBO!_9*sj}kwYkA4Vhqi!~%pEZI5o~N#4=w zr$s@TXg*=8B1m#M8v~oQX0TnM^Dyar?|+Pjde5kh+lv=3jxRJH(x2q#rqO^t14w8I ze{KpAAA}v}?f9sk{ubt-8V1*?Y!K00nWx|w_j=N}MjM6$nC*S4q}2R;S#RR?EcMg; zpnB7Vi*Q+egB%kb!>%p^>hg!*~MGa?8gfASEprHHCJ|;RwVVV)A^y2$2b{yoe;K2kdplTelZ10gl znBlhakrk2#_W=!{6q=|K@||bEA4>(}%;3@C9n{^k--qG6kgpM5O{r_wP^aQQf1Qfh zBB(FoR|IK`)(T%3j0P4sG8VX^?y1gngkYHM-rV|GSYEpgj;p?~m%RR~tZVwIcUa3cEPyvFwqgp4uA0b2*M8%5&>gN&27X0THj z7ToaI-19C7!ejVjJy$omg}s5L4+J8Jl*qrAdYo|H9a}(IGf_=yw2kOK1~EcCg=qtq{O| zn{~~+Al#gbypOtT1z|_H!R`8>f4`PY34M^9b#7a6?(S9jJw6_!{5I_v(MW^vYMTid zQ~iC}*sP`JW}0a^RA#3MeH3b1vJQn3SjNi;J4|gRDQazqcrPm7ElnHJ)iygitTm}H zetBss-||e(A*zG>3H9%_IAx!@#6#ts%Qrh7a@|`t{QqQh_ulR;+D|`%FeSL1CwApe zwNVy6#s`dD3f|oNBn)t9VWR#a?7LpRt=pN~h70qNZUCJ~u+Opat*AP-FF_-wD=Y*N z3+K!jTYYzpC5b}-c6K5~&#pe~2Q6&^ekBGLOc@3Yb9@GhVe^1AhB!nM4KP8akXHqz ztpsRGG`}%(+2M1EePAAA9wH71FfnT5HKMHyu(&Y7B4Rc?GjO9k$Cw;p3((hat&HB! z06jhd%mQMM7)RXcgb4$d5U{r>ADYFOF9UlVT8uz+7lWbVLKG<#MgqbYO^lXBXJl<+ zf>)Bx5R1aTVss)p12%B4oU$s667yW)(FN&)A*RiXjn_zXxDom>3Tp+}<{X-6c#HId zHUg4FRLEVWXAQQV5ga5qfE8|~>-0v-heOE&>OnjeKnFE}n(aQG!*#_@f^ndrX0T*> z%7552L-wp7>5H4YxFidtdfcy1oM+T-B33T!;yk@P#r8L9Y7W$Hyqt?`D4i#gUE1j< z9%zp3_=g$fswJF<^7D<-TpLO6bLI?Z4Sc@I=nNJc4@{=dUGEDS@c!XXUDj@= zJ@CM=g67#u4;ml~|79F0{ny;vR87|trSFM>c=p&{4`k4wTj&=+V8Bvm-T0^eryg~7Ut6woPu$IuwfwBy(5IiGyq>v+9SE)FruC1~ z*q7Yg3lE$H!9v$u6J^7)`7IU@yvy`af3%nNz>VG|{94B6l%C<)*fr!zVkY`kX*RYh zB1#4N^_}|X2r~R<7#Ns8N_e$@gb{1_$|y zsf#rRkv0&$?o}b^vM+VIcP20;<?@SB0Vy#dv z?qx6l>$$a|cX`r2#q7iHE~gQ1{8J9MNtv~y%W==;{^`&}Ojnu%tF@hh;71X$BX1C# z2c`-^PYWOaw<*v6J~@N@^DLx!t0a%!^@eA=icUb z^wgp9Io6we|45(G9B#0|m2EZw298RafA0DJyk%qcP|oO#iXVl?Q+$n252F8r`PQ%R z4uOi=ZsMx=<+&Gr*cXnvx=q(J6KY$kNX8-#LxxzeZilW=0|bL zNDFJ&%gTm!N{BYfX4DiWU}q`i#KeKM+^aod#6sSGAL2^Exl%P)cV%O;`&aBQl$lB# zCAlRzXP72-Cs|Q?MXp48In`(^%lSlTVI?>HqrZ3gf_YC5`o|5$aNw|cK9$MLgwbE) zO7(w6Ae%p|B(ql4KHtlFuuQ~5XQ$D(sj1F8M}EMAq8e^T(cF;yPKQIS4Ba3hk&qKB zY%YWEd*AznkfMMCZv?+Gtn)`#n;@6AQ37wF4N~my_q<{hl=ZOx6=!oY?4KcGc#=YU z>vP9=`_{`!WBh3zECi&IDBkrs!w@ATa0o_6Lb#$_a}8(_CMX;#{hzql4m9rq78Lr8UmO-Z)f#WwLd7@C~!suJk3buxc#{ps#UtS znr-%gB}J_I)};CSvv2VKIrHI*ynMiqCTi4fo4^yo2Jusnv&5%AAZEgv2AGjnXlthN zwYA3xKZW;Acc`=nO5UOPGU!AJ6Mz7w?1xf}uoM7DWl+c5TSqFvg`)^WVf4sT0JE%U z4N(QQbfRF#zJT1qR!~~iRhUr(G1^VNeoWE#|JPH2ZFzo${91JpE$Xr=P-_n=rwWf- zbMw;Db`pj15Yu66X{A*^={fM~PwV$BR>yBfm%CZay46&2_kGUG&HLhIAIBr@SMxk3E2c<|c;&Upn(dAKO|t=h+Z^iP_2IBxIcbVK zMKSM!vb8&>6eY?lsAlQa-yP8*g}+Qvrz`Dg*VgwQAYQ9!I7wZ<{ko)y!rD($e(DpO zCUNig4%yQFD(an%p6~9}=95?WMgJ_De@3+P6-aD9Dz0V$!BP*D5jTA*HvwugX+Z=s z05t;q^u!B3`e91qn=L%7IA<|tccS(ru$m9Qj<)fC>8k=J7Lbv}{c!^b4kDDF}PM1SC|LcKD zT9Vzp)t}2h)DzhnD%YpW&16R|KR<6iXahDKIo)H%Zyoc47v4->Jv<5Us}$*&d|#6M z=>xm#4`fd2NdVtv@*gbT2Yrh0fq3b3u_6~lg^G{;F*fg`w^uD+f)^I^_X8MHkH=FS zBj-9iR*il=tW=wD0>WPF=%mDIedDWfeM<4zsW+-*PsFg!m6pz(d6>cUozBaxq2+VJ zn^U{dtsl<6574pliZ%P@_YOm7$aXsly9ZuqB~Y;~M3wnVd9p2yaM-YEN?^&IqB#UA zz+M;474eZOotfXWRp{xqcKbNF8ixe8*Vg+)05tpXI4PLUH?A$q4fA~AbWo8Ja31ehT8kj3zDe2`y&5&}?^ z0Dc+Dtq=xkxBz0t&}`H_RLhA|1RN^^(rjgtq6Tun<|KB#NsW=j01Mwv^_~6azY5EV zjFHda92wX#DBNVZfGqctD<1^KL7DDzUq0+*wWEX7@Tx-bFp)4cp?Zy914 zeY(+kEfpVY(f%i!n}S`|EUP(RGQ&pkUy=h_hU(uIYkg`8_C8~F6vw6ys%vWgfEwS! zvte8;7-&pz5*HMyU>8~Kp2BYnQ%)XtsLJi|Fni0J@~W4cCpKn%orT4$XY+09{}@v4 zFw1S^HDVtk$HWi?`n8k*mL~+@gPf7@0ha&+AD(d$9+e!_4`Iecfd&i^n+T}`GAF_p zP!mMCW^maEKy=_y!Rku?36h4!UvCQO_4^LF$zd7f*S=GGaWoS zup7Y+!NP!B_bC{maEgG4Q7)}}gIH8}4<3zIPjeX)0T~2mcg|Ky;sgJEtYKG4h5=)% z=eSRvBn5X=oHOJ9-K&N=v?~ip+da;VBP8`$@SMwFsCDU!%@2ps3M~>-o%~!+o9f+F z(o*ANiteXjpKHyTH@yx0L(`|Opo)Ls~Z7hL3KmK~qo;_~cU<1@8& zYRz0QN1igre2G3yp4HYcs+`39;NTox zz{!rBT{zx6c1tvc_CvVq7XddbTMx_XvcU3?$A({w9P$kk5=}poX<1F5VR;_lDJU9? zIY_5LvmTVrLIH}`2(T}7e?3bTh?|5+B`Sa^9(UfD0{ zA@Iw}znQ(2K?8ivtPpQ$CJbpzX-vQuyU`*R@Vi5Wy;JJ7Nvw~BHQFc%`~kk{u)saA z3zCx8byI)PS^cVIgaZCB-~m*@_Vnlx8vhDp#7@1%p~6#y`Ik<8$A1BVBC}o*9QpvL zGKt-{5Db(nh!5fa!MK3xA)5Pi32g~WpH~&4*a1`rTw~YHai$5|37O1jBLN*tkE(;1 zFhQeWfUmukEwluZkHEsgs)vO_c*@R7wEp7*4?hLdjieb}?;eoh>4{_f{Wc%-)q4;! zO3iBMr`B~@Qs4Bg3x8Kiy~><%aWTb@4!!NO4hlmJDH*Fj)De6f_kp}<1J%V>!$ z(P`f;)NDj0osNwo)wD~X`r#$Y?cjIWNH-albh@d~^odbf!NJAqPr8q$`#ZaPl=I?z zMW$Ns1f(_E4(WG4a80x<57ra!jpCdmG#+|rW)06S_TyT=EEi|*>`8t2(6p}~l6K$J z+}Z6$kjwqVonYq|BO{H&T;PYK0aHY>*lN-1!eLnCjs4#yeE&Kg=al7Q5OF9U#AviJ z46_7i0#ntxOmuk#UH(`HZ(eoH+x*sS@xbte-ywNjVGf#DjtO@f5`fNT?|lK|eGA85 zwS?PP?8sQSYhT<`w6ifn11f~bvnchIIztGS5mOmdep5p(s&Lj5%1~cmKJ=VQ8&>37jc|&g#$@ zluhP|pBaAr_ufzGdqgE7y&mSx(aYqC(Cps#^(J=NBDc@=;$@yuH)5k`7c}YqlF01y z-skXmbxKPi75OLWCUrW@NZ_>VW-$#a>$SB>pMG5?(dC(yTDBW=%qbH_pOkWoaKn{k?QzRO>#U+wu6Pz9GgNQ^1(w{fX~QfLhQ~?0PZ0UOb&Q z-NCUg6Z(VWl(g%0E$(vrDL2zt4>K6c&bA+3zti=q7sq+k9hhQZp0-U`;=l~_9#SwF!)^>8CeDkSyy@tTfMEt&S{-8pv&@;LE$pNSqe z($nG!=Ugr$sY<^2)%fGi{5%5@G@++7ZQ**?BYAoB5$9b2|HUxpU-$j!sO_IEmUtK% z(K=#0ZEe5TAXk~$vu5`@8dH>51*LJ(t3GE(O_l=H$ggj~#P z&Qo!(2Eu6s`q$gf9EN14)o<5^ue;!yNnVFe)$QYxRO6Ykv1FiU(X0H&ad)4y+4~hH z5GE|c!mrRZzSWVP?-H0aC9$gOVvb2(uOj2fsH9UK9a%c59jBpF`1NW!Sm5!qCGW(;wf0_&W$>%MqSTNC0+5Knh;U&-O z#`ak=^4qFeA&h{4vBNI4M&x}+7~HqdC%fXSJ$)#EOXA8K_dh$v$;pYYbW2Va)2+X+ z^uqjnGk6FtPzH>{>N%7n0@$EGplT}u0%uV?T@%bc2Kf5Eo5ksmsJE_fLu zt6M7VwpNDx3_Wkx6>L5lRrG958OhN9k~rQ>2xh+MHT0YE81EMG4z;1dQ)CIcar}Ye zH+|k*&i}}>zn=JGjg6=-KG*vB^p&rl#+#Mo{ed0Lrch*w)yRtTugrGB(4?^h879AC#nMFHl6keyA}^Xt0h%;!YA!uG4=1J*|U z``q<)0t#$3UAm9l);98fkF5K8uy^6Skl@w5nOE73ORRYAn*67K;MrW9@4=HAES`X5 zej#A-J!5>+YvMJW45UYom4eTAbVzUgP;kmdgShI+{t|n>$@c>pWx|Nq3W<6!Qz- zmnJ3%1Bp745iboN#=k5I7_FYm=q`VO*G=eK3V4(hmc}z%5|HWi4#%5qH;tEqq@8Yf59?A=82IV`Zb`t+*p= znqt*NmyOeksrDoT7uIuwvzu7I&1b9?PbeCE&AD=C5XiB*YJ!-l4&hwNm%T}WKV=o> z=Uz{G{Tz+LYT=Kn@q6>!aq~u*@;chkOz0OEr;70LCez0dfe&f?+pnf(DE42vL^3#a z+@X!2voJ8v4*`EWjQ0r5=RH3-d~9^>T?x-w@8NMYIF~a#yX&Vn*SROu-IJp$Wvbe# z%yYRMXpn0-GFGjmGoUPlGvt{G{dHAPiM4~`&#rnJSh}^n>yxU{3UN+34Nr5Y`WNxxHDMqGB zh8`DFYpkkfrJ3fjOVsVelFPL+Tgoh2xW|ATzrDG-fW8Xeta=OzeR=gqtA&|y?T2mm z3#WPh13_sV#+AG&{v4;VA?hBbMHa8AY0YG&L4HmiyBZrS@QyIQRZ@$Qvampi>r9%V zL|-$1SO6%H5B=l2fX>i8rRBSiJ+rSI8@slBK5zIKDSZ-m_UQ2{4dc6K|;rz{0?lPwFGg-~->)&$(<(qpA z_4N2d3j0l(C(dRugaaarACF~f{2y;R9w`JQ8=WX=GfCZ96{c@jeyAs6Q<5>BZ2o5U z_VnD=eE+@I{0R9-@P*&;_NMGzy_!GSxc7+ZlW+OefFE!jKHJlbsyi;kfKbn5X3EM$N($Mly<6tynOV@@b z6vsj#aFIf{62r;vZyp-gFLf;X6!bK+rc4kZ77iMjU)N7H38G5r@;hHhd745%|Ugn zLGhd@^K}VUHOg-}9O=OXi@H2QBo3E2MlY|XW_^yND(i|=R=0NB%W4AT9eq}~p1%Hi zZ}#LU{`PpOFWmjxFoD)R`7Jt|PL61aGp{2^ld@T@-~48e#(1d3N>CM{)1U1!|DFWB zcX))V9v?A%f*x$F5po}I((J`}T`0c3+nfqH`}ULc;r&^?a`sY>K@%5Q1AWod-gBd; zR37_NfilV|Ud~Hp?u9(r<(ZF%=(8!CIX$yEu9H#&ne}E0%>}~et=2+PXC+(pU#%WP zHTr^E1x!2`iSPYdl)%;b=zKtQ!e16v@;zJCt>xBrvCeyvI!EA<=^an6PvHXjKi3K* zjc2pG|9pBKN2$>ni!~W6b$vC9@$qLEsTAPEp{g3vlsQAVx0|A-o?nHtOP|u@D_LnB zv@CDub6Km=#`19xQKnK-$JKB(9?(Xh{sB~LkrH>^WajDRjiar5BBv!Xpy}S$VOrX? ze-!u^++BHWx+{5obv;K<|6V`zWo}HonL)gj!!G$H7Uo5R1Wp}m>URzi-j*k)8@_)y zw^~-h8SGUf{hD)B%=y-G%eb#Rqi$K_oZoucJ*HH;N;GA2?c4gnv{Sy4KmR>TP$WP3 zfO5poghSDLd7I9%#Pgt%?OWwjfPr3oAW~&-F41jhcI9YlY3!m8vggG%u*c;SBkkyD z+C`-@f|{=I25YV1DNs)WRgtR;%zJDZ@E~0%Zhc zHS&lOmzP3}R)l<2Bl&j`mdJAiwf4m9lpSmqiGfb)TP}bA%{6Xr)Krin?6jI`QCd;F zmC;6DQn28Hpw0t#Fc(RMrZ(mVP>dS{R0zNDM|5B`mR9IBg>?WU(-f>Sf0V>VH;GXT zb(}~c9ydYGqtsA%Xi`2}9ITQtf@!Cja}=>A?*!P3e$1zG^xsG%YHjIl9q0#f(_{W! zJz)8dZGX2?Coav@N6U`}+Lsa*GA_qDZCVO;%2}1|{5d<`UVlFN(iXOE0VpW5)B2YY z{X#jSd8sBMy0nb6FuWONOXI=TCW+nXckMIoqy9OjiWf;g?;W?t{29r3c1cF)<{B@n z6~Ja{x75(J_2wgD2Ax5;^u1=7_>Ao|C9&Me^Uu20mn_rF%3#LaN8AZbL z6lHE0UR!Z%SADJOfP+V(-HpdmwQpjaoo{(peesQbldL$Vlkp%2SuC{RPX-M&xlh91#y zq;zi{$AoE8cxbc>@VrTi6U%R$aZixQLj<>@g*`cEYL?^~y=tkYI> z?E@KpP7UJ5-|TNpx6)?0?ECz)ZZ51}K5H_f#Zyo%u}<5c{N;FLWL~r1Jtnn~uZMGZ zL>qzpz+Pla+0ocC(o6TXSwHjl$%zX*u6fTH^Mib89udRZpO8yEyRMT?>Wweo@!m6g zdr&ZlIv&Lr{`?XnIk$3>I;-N%Ig#kt3)aw-mAq@abkDcn$o7&wQV<9YkCqj$U$k@R_%wA5N zFCQ;&?Hw0+U77=wF6ZWtHu2uT6KZ&*vYZA^ng#-{#xDnG{m-}6(?Xo_H<#GOK6B@v z>Wx_taBHSd`diJ{xZLdPv~~E9h9-(T6LHyE5K>4AnuA5=7(AnIsV?V(XVBD8* zG@|SRI8|cDHA=~hSK}LG8BDm}T|W(vIC)E+v~u1SU!8h&6hEAl6v?%0BC~PnY|3!z zf@HPr8H#1h(VE}V9P_@|c&R2)N$6%w7Sg+nOS_=Vr7|~b`$kggW78u~6q6Az@mHP} zf$Os=`h{$=e22)#TM~ZlKij^38PjXN$yCvE%;Q>K`Mq^s*PQIHQ=8q-C^wwPE8}|< zJnPu7_ewa?3t8OWC{h`qY(&WxAt|`NB7%K$A!m;TP~2abC{>K40E|^WpXuzwHmgG5 z$?xfZe)Bu@RD3U}_1fRN?cUsO}Z>UA)R&q>|8>bOuNbLdAb8z-18n~Gw+T{ zVpR*j@_Q0jiC^zt5p)IYr#5C~UNozQPWsHpGz&`jU{94Fe$Tx2iR6yZnrmE**(X2w z{U>dCreQK)fp}jb&t-xU-$6YxEwsIIf1BBa;Comrr$Ru(TXmP*4#7EF-xhbHSE1CT zOG{n+UFKg_I4R?b241f5Ss0=V7~8{ZY5?QKi}QLd8EnAJ!C=g>cQ_hz^x-vLztk(6Qty;%1 z&oEu=reU?zTlB44LHEz1_oE#6R2$v?es8{d4~4CR6{Q>Oe4O0^9=A!KRSL|`)n>b?6o?=*8qmq*i6K1|0o|PC_c1vPKq+fk!J{?zEcO!xs z`7H+p|Lk3Mmf4FEyrE}{NNXPN6ZLiePJmF`tac2jM+0vr%6r&?Z*Kf&=1D2S>iEX0 zcPyLolp{SRh=`*oJM5*rntra(kdGhb=e;M)Nfdd@BeuJ)OxWKqe(~$kz+iP6Y=VE3%vH=GL$((jk0B_c_^;!|`m*2jU8Bl*`;=qQ&aa z^+UZF=e<*6g$+4pqmFV%WV0EIQXNI!{d?zQ0VzT-0@qcEH3#wDiD!*xS2(xzY`DuS zH&-8XFAq7Ciq3PRes5fNtEEN2i`kM#YnYTUTNuy8)386^uH4eL&NbPNKw_y0Vh*op z1BAM#8;`dXDkNQQ#hX^Y1a7ZV%r=;I;0!H}jElhZeu~$uZt_|fjZIuLKbvf-vYtvC z{d_lzqGgl(@-&~3MQWaE$n9AMy`o`z13%tHEZ-3Q|L4JvEm0#FcN+s`2h2z4@@g;L@u|>-MWc1LuT)I*BD3 z_QtIP~pY^laH}l`>W(^orf;cOvHXI`Ze|q~JZ> ziAC9m{ft&2SMIsOVOyA!+wt^P3a%TWTlx|i8`n)T#@UA2naJ1BFVnL9qFd)6z3EDF zJ+PbnBL+X^I;Fk?>h_Hl<1o^vM?W?A{%=kV)aR|wNpCKaErR6XuRJ`MVdqYR*G!0? zeR1Eh3Tfw|>uYz?)=QoG45M7oQ_P#|Q)%*Xy(Ek^Iu{BD?q~|>rXY#kOQbK02DI&-|*Nuv8MfO5jL5o%rDKBZ|$gtzO=PGTZ*NtMhQv0%_UElS}9! za|#cI<<|MRuYhAK=G|R&Ii|OEQz>7-?aBQM962Y7j4XQ+{`=xi?eV0CTNAsI$?wCJ z(F}DcD9DK5eCm*vu@SkvI0p5guU~HyMyD02vbJwe3Kz&QxWcDh-=c#;mWF;`c=1@i zkH4c~?jZtg-Wno&$Mpq1b0cYN3VPYwxU?Vh!l%f&P!j#|op<{9CUD@IPfM69-%^dW zxngkYxgt7Mae8;p$Y<^4aKQg-9$%Gcag8K@#<5Knq^8xySKFXJmb9o~>!M zXgAH~T6TNuctItHJaYZT9y-rsT??+>(!4|HuF*mq$;4XpLF4q&q`ZKNS1Qe2R;v9` zuuk;lflj`~+rZ&&`}=ZX8k^rgQ8rzBuS>-vD(E}<&5Z3-jS0j_@}cd${g^xT zwb1C3^|&Z7eF4LV(%T!YNDj+{lVYO25C+CJ-xgNNl4(a659PY>GG*9Cpq{}jo9LqBW(wUIc+U%|~JeZK*Z5uVhBMp~8aKdZvU!i-Y=SPGJt5YltT+CS=t zXC)7IQ&}7~Z$02EBK zoow$(c(4MIbd8v6;}RJ1e|`!nF~QGYUOwD!LDp+s0Wo48k&!(T$e2zY(qLq4u|056 zjPj$Hp_`8C6}J88{pP%q_4uRpK$@tDTu)igQ@O!bl;_M5H0!|Pt{mdH$Z|3dT(z(O zP@7DU4eLK*!XCoIhMMc5z&)y(FXF0=$YoSnfg~m>K7Qb&k&ILwBsGPUGW(c*Xup^H z39lz*Ad90ig)F^Z@`nZ$nkhg_ccJg6oAE=crdYLlfJR9bPRr`FO9#L1#X7~(-FX{h zfNcC`@;ofSL_T8D55$Pa1+Dkgmgf$M8l#+Fl1%f+NPdQJB-yhJl^0O@LiQqlh<@xj zeO(k1CD(X&==O`2{2l>^M!t;&Ik(kYW#g0&yPA<*J4WKEP@g~??lBEwhlN{ZT~qX~ z>@T#T5Y9=MG9Q{rejip&K{9HV0s~QM^z@^2C3IaDrQVT8%1$n%ACA-FjgK&b?6{D& zU#z?t-#maiu6^V(F(%7zbW-G(2smG3kWcAh!m|zVEQuVN*usBHvd33*{+tV*U6G=a zHN`QX9t2>{Rhf<7K`PRKH>oLk=lodPr>x1@_x;7n(te)uq-z&}ftc?f|0+--4*{6T zR@$>@RmNZJ@-Pc*zHRPMX9F6g%^(Haxf29W{t zf?wnIiSTiGUP;W8#BmzRb%o2PUwp|iLXl!x4n^=V82)co3z#Mn+vx{B;k%J4GhtQq z3`tFeIA>3ohi-DDL-scxzACd%DWw#|+a(V|`uE@aU!e5j2RBjC%vvIP>aq%Tt z7H~f0Hw1SE*h`?}WD`VHN`MUcR1_vZ`D}CTN2k1Xvqcl$cY|l3lU*1^JC%}GL&F=MO|2sxG$ZYR=X(GUsK3F!_a2>VTw60I^wv@ZiWbbH{qTD+2$ zG{E=yB_5R4Slx4Oa|lIHF=^dWUZk!%wP`mu|Ij}GH6Ry&8hL~drxjabXppFd@yDh z1k@uNQ}Lzq@y)<)RCVAhB=@}qm;e8`d*n@!Y2)>fg#4|;oby!HX)5pRXzK#^rL!}Z zBLbc`pzY&hXrd;zHy2gz@>0;sywpzm@NoO@qRu*{5qlz&Z?7Z6N(@^PBPgOB6_mAz zb##S{X7R0J6jY`q6pvo{eyM7=XrKi-j7l9G4Ml9*rc{Z0rJj(l^B<2)Z*P8oE+u{0 zcqRX{&m9xke5?lfGM#^`dx7q9XUn==y&-7Y6nim3n8P(bx}kwlvL)$HsbD>b^Q`!% zVMN`T^%QnR5QtissW3SJvvyg9d>*oU`D6B!9e*jSfnwl#v$|4=JmluLjeVZoGqQSA zaqMjZ!CVJO>6FiPbO$}53cYH%fGG`)(A)DcZ(NhlG(+)h(p-18j)6Zl!G&5Q{cf|N ze(qbl!p!7g!OUnMXi$2T;Rl*CT-Uq1Z%O#`;&bBiFCo_cUR}LoA^R=TYCDfZuOn~Dw%zI7@?!8ivoAmAN9althRTjKdflanBU_=uC-3T$t}Pktdom(#9b#W{{KOb5)NzslR`*+#C18bUtI55kizoFS|KFG{2esjujK7bV!kQ*Qn*lS&aqF53;ay3a%EK3JHNFl!@59vA1Mkzy{IgD}GyhzO64l5r!N0O|a89v+-xQ zh$=P5t7)o8L%#q_-wj@mhVswzW=RYR+bc-M*7KZpO(f^Ev%~IsIlAeQeSIrP(m2fHM8C2YIxayMsHBTRw3SZsV{}>AbVOmEf5&f>RYj%6id3CU| zNFBqya#!47)?6&ed;Rsc|0X3v?y;Ny{KV<2e@FH958OUS(VUftkpRTqrz`rJH|w6W5TY`9|pQc~-pS`O`WN{&6GT*@pUHwJ<3w zrctp%8194@3?SVW=NWh^l7xJfC(S02dEy0*X zt0!*m8j*Jc)QlgQ5BywoOX6UgX;4qeL(WW93TP4u6S91i*P0bs4$Y`NmbI6uKi~Yd z|2&X~yYi?!lr%*(0n#IRL)Xv3dpPj+#f&Q&u&cWVEHyKyaj zu>Q=|Nk=p{UH+lunT(yH-xDlHDnh*VTOqaxm;BiIU1F0lx?k?~Evv+{UV2WN#dp5H zoWpCX8iM^6&4$|-M^_;smx)>3%T>Ne@i)Q9@NU1qIKvD zIz3;j%bYcp(mNdVBHQJf+&N=EN+H|DZ|s^m<=yyNM%=zvOsKxSLu&S6uVVZw)?|jw zC1`6mhX6vnknzMxHG0r=gG~re{MutOwDT)`M-Bd^*t8GwD6r}Y`wK0Z*adw_;ygJA zZ>`%I+#%=mmkj>5kk6=Q|0bw4iiH#6hS3E?PeV4hmdMX9P7 zKXr3=nbx}Btu{pM{p_(D}g96pzGFfF4z^OqqwxnfksenF2}TJltZ zhib;{a?Y;H=*|gu1ItA!pLzSKl{1wozIw-fiIuxV6$3w?jTBH%@;aH}ZR55u3`cKZ zbiDtD?1qJsBf#-KH&A-!I}|T>i0o+Ki}QJoXIK8^+2e3z+19cjsV3knk3wKt~fXZkrJn4^Kn1RSF|=X%Yk^gr6N) zD)ary`}E*xdXu6gzNeMkyymG+{00N-jBI{nt*vk4^d}iuPV`imumqOL&yt#jJ@jrj ztYUgPQwDzI`V_g$3|qD?dDD5DLKwEum#~K!z?YCR7x_M=?R<(*w0;6aZx#y z?|qgr@nnM|J4ng~ZChKKEIt;qQImB191~CZ1FN49jmkBjbvA)##81AvZO{p)?$=u! zP0RLodKtI<28d@T)+wcowJRCTE20Ei@vkT`(c_z=?$Jc@Vmdb9XTEedtE);G4Cu1c zhl!0Gby8g}2h(*{!y8KyF+?N_e5m5?O37KIMKMrjnzNmxVWE2rQ=^Lb+tMo7Y zsn&)4pQrzO0B-tY*2w1z4mS`W-Xfk=4uND$V*H(+5fS>77dyiPfE0#(Uve#$q*Jd* z_;6=8Yz!u5$+GUC_DzwwmxZxpbm>82Y8%z-yZ5<-jSB(j3`4B76HGJNks{!QTlxK+ zj=TWZD@C_m_}OTK+(liUOjISh#5RZXy~=gEVm*`SLoojqF3#^*!!}x$5^qb7%wYZ8 z=Bsx8_aHrXbB4kb7Hz&7K4z*L@^zQWWf!gH^_=&drPzGN3JQIL-}vuoM}?*x|FpVf z6ZikDdY9`I83B=xWHB72Whi;E9zWlYy0INnYMkyA2~F~OJe92wm$$hg-S5JYI-rHw ztbK*KNFwaC7v1!C7wIbNus`4#r^mi>V6!e)gJpkX)Pj!dNUCxn42BVYPwt~=&%zw! zUw?I)Ca)rwyabsW`*4jRdvaURQEVQ-<%M>TcziCO7KckATs?M#N)|Bu>6D62({Lzz zUQND6w9fgqRLlmOoHhH>2v__~e&4(gSNT!6Hm|DnOof}?bjVOds-x}sW+gUDLys+6 zL(g!u2O9^hr|bu~mu|CUBuBk>!lTa8d$I2rEYZxF{SbZEz{hrt;bte*b-^?hh6OV2 zV32RRy&CvfKAc}Diqw_tvrf&(a zf$I#pcKr;6bm8K$|NeAabm`}ahmykU<|1`8J^78$v5L_C9l|(R*mChqHa};wp+_GR zlEU+_*qHI=&PRrhV4CN3dtV&WQhpMcH7Ort#1YzwqCdYIZuBr4{ zY@8>u4>dI}xE&9trn#8>XUizc57EaD(h?qfqfwBi?OQ!FRd2d}r^8Fz}^Ky-sS9Y)j!Sy2{B zbZZ)3e(&F(b79Mp<#Sl|F<(#>^fqLH)rO|=t~fpHj&=6CU#KC3L3%$gB)q+vz9OM4 z+*fPn;h(!(={3|Tef$hN;TTh;P9#Y=8EB%1v}V0wr1RN>PRHa>``^Mx9^U+$SNjC0 zG2#HI3LVYLnW8WBn(k=p!W|Kq;s-xvV*<~KZOtIbquR=sW5{KmQQN2 zD(1)>=baQw$1q{rx8*&p`Ldslpdz>n=S?i7LXoahYhRb_5I^hgV9cdZMKcpMkEx^9 zk$q359O&H{f*Y6d4mfe%(>~bQsXk^0vQYJ{iFx-0X|{DlIVYyZaoK;oO|-+~{-{le z4)9r4{I_K|+s^&&&9U1X#N{yj{L7<%il6ysh4FC&vGbIe z)0y#G+Kh1p{fwZpp7`%hxDMDS6{WPJ0zoZ271Hq+%+W|D)WZ&g6b{#3*H@i}4a}1- zli1))xVed|R=;vhwS|+3qS*}>GO$DhM-5wEUH+Ix@!+1KQ!}OK_ugK9&kxe%7reOG zrTNaX>)>YZxznWXCfQfoEni3sx|zxum}PIeiS~N!GkxE30{t855Afm`jQY7thz61) z2Un2d=ABS+kGNR;?)UQMo~I@r->w&9PUchi#c<&XXDabWRD1yAC|;a;<>H-t7@FB` zrns%Bu>66tY+gb&@LV%kgChkg{ZrG zpwLM4(%uMF8w|%?S$(2<&Gg}lw&`a+^Qb`9SlE^JV!y^?(6aX3dQuM92~+?-6?Ac3 zF`RO4y3D&9!*mx9;ITa`*u!pss;QXyIbM~p(72RX>mas{I6X#faHo&US1M(%NlrQS_ zTCJh1dOsBUOQ7(B*>9Ar&+DYZkez5u-dJvQPGCje+tambcWuLG=F(>~zswipS1#&) z(&Utcv(%1nx7pXbfbQPJ)@+)#y~aN{cg^TTV)s@(ew1{=#*-!^)63w1VO2mg8Y*)wNr zT+ms*Hyfe`b>i#O9+OX(PGVj(Ot>uBC=o?4n)y76kP;$Uay5sY>T7E7g+3rhF<+#p zH~ozo?b{<)=>Lioe1aLwpeHhmJWjFyJL1_oav3T0-%<2J`}$Uk8rj9X5EY)yAl4)- zg5~$BTQ8(pHsUz;zrNP6iv;`ixUS4!a&RD!yEAknLVkOc1stFG=x_WAA5G3;xF95c zv`E>i;HBYlGR7c_<@j@-gPUxf1SKuu$dYB}Le-#>7(^;oP~uF(Nb}0%o~H4Zsl2c` zKDcf6;^-+7h;-1qBEMy9d3fu%Zp_~WV`81ndpx@$Icw?G5DS~^oNK#+my$lKrJ=ZsVy~_k-zS^IAc{gRn1jFr^=+ z^Ew?%3>B4>&SG`2sanfCj`yX;jQoI2oVo@ty)*-pM4|_{K5v(s+-hORr}nUS}|gq9jx36BGhYLQk$^`^5UT z>3#8t6sjoQdFNGIdic}%oP^4!zn4g7Tr$Tvv%EBGxLYS^pqb#y6$zLQB$fhUxAOCB zR+TBHmgh7>iS~pDR0Lk$JI6z=%kCkIqO+iZMy1er9@4rRw$If5#5Ll55A(LcY=B;O6xpcq`XsYHquef-!<7L$uoNRNf84`8#H_d{VUH9xvTJw z-rT5a<8P4oWXw~{r>pUbNio05Gq_lZLo))$;9xeTXjMvXL!Eb4leKSsFO&K)>kC^t zb<}0jg<=G$BUXFwy30_nUYnC*gy*JF&Tqq;$+*27+{=AhNpDZD%V@>@_F?>25jz~B zg^F*}WP|es1k`AbqTcpvLB(|2^Vcz{x&!JI=<0eZoeq({yaZvhU7D>G20rwnK6O^T zTUn(Oyg}{Q3*izFA&*aSxzr+$oN{k1?1}G6mQhgoWqOKabDfkoug1Ib(es&a*>)Q* zZ1WNBfs>PAz@s6)txYkK?|Tt4D4=913*Txz2P0IhqCGO-{_EfJMw|NJJd;Op;l<*( z+e;d-dk6j+U-T9$#U&B=-q8$n+c0s+pU}IaN%tXtaRrkiZsSI70pfLuMC@BNblCt& zn=sS}#Cryz#r*p?LB9$TF`ErsM)NI$4BI zjg$SaIrN{bYpo5D2ksgZ;vzIao;mJ8xejqi^#)YcRm@|$)||3@ZW zvu3wQ&g9o`!7Uv@(Y)2-ok0s<7Z^Jx(zHH$@s z`)-c8+YJm2Lckg%G)Kfu;MNKh3a@&n-NHA?Gn)EP<_04&usC+7S;{ShL z_#IorsbMl1M*>AUw`nptIS&TBqbhH7mR z9IAm9SgH9{PZG{r+oH$>t;G6@)=o`IiZ^e4ay+-_z&;7Z+QROL{LtJ3*lAF=6o);2 zII{{tF74+pw9ol!PMx3jM%!ffIm(W5;}|@nz=|oVxQ>>c$N&zBfQ5}`3x&husJe}W zg6ci=!7>JNVH!q}u_+2k{Su*!Igw0meNW94D~)|)zfs`^IQ`;P+1`AGWUq88#jpZb zth857F0JfTDmGmjFa>M*gxPvhg2FOIIm9HwtppU4W{J2sJ>9OfvD<(DIE@}9fBXCF zNghvBY#3)?vW)qRQmlAfbcUEJ14B4N?^I9UNu{*+M5vyZGF~ z75?kg|0JQKz*ac!(4zH;_S_4bej9TXzi$vj=#$Q4$n!BU`SYnvaD?yGooBkafly($ zVdd%@xmjZYR?>lfwx8AxDvcI(4v6tIo1UV*{@91sZBx+Z$*)5Ftt&}RO#eIi5wmep zL*`|N4WFmUHe&^0ft*Qp{FKrPf%R~I<9V`T-?n7WVg+2I)57!dSIwqd zxx&a5CMb#HnThOmsZzO?VCRU+`J=WVR1v1;B>fUUDRSGymwR=_XIbOv8%`LTH9<%rY%!C6O8LaUk8UyS=+!y_x-<5&ytM<-UE38y?c zD<3Sbd68Mhdar5R(`wAAsE7J@z?-~~x;Xta$g@Z487}hvi-LX&B{ogfj^#d=LOu%u z7ekii@t*e9ZHkI0iP)IECb!h;p?`lc%AUcpQr1%iR0c3U1Q=AqodnCBQAA z$@T_DYZVF~k{5J(YaCd13|E>;1qNyb5w*CK7CZ86}fxX1!n(VP;hyZDTC?*HFuG&S3 zAkEG)pXJvEpUQ{Ln=6gfoQ^NbUyhbY1@oWNyyK=LL@L7Lub)j4jnz!27c|(No$X$b zaemNIl*2Q@o?<`HG0bzv#8_>AP<+Y3EN)U#-M%xE7~nBWd{hqdt|8_;E2f~9O|?|g#b!$u5aJ^# zv1)LnOTegh5s{!sVWg(4c4Vt2-mq^S7ELn6i768PSoJ~_kP zoym8&1U+0Z?WZ-~FMm|QIVkp=b$YTlU+AzUXKmtSmTPCuh`dzaY%oHqlv0b>USm)# zwbb{d>sGs%zc{}R+m}UJvWY7eo*a^$9$)(t%0J0|M*HvbDE4mvzYCBsH5;7w0Vrrv zJeG#${B*>gO0_ zT_$Qs$AGOImJ3tt+BeQ+w%db9Wg3NqAfzF>51Yv^shh{R26whL*7a<9RS9y$QiRSR zuk|d`Ua=4=mC!QnUezNsOE-`r>mnYE?$2tZ(AC56s>$PBox&yz-(BOM`>C*zr-dv z?(`2yyNGWg4?Eo~S55`iIN4a!DbgQU-7L*3ACt=P4JF)ct`9OX%3>%T`rp^w$KT~M zVnKQmAs%kmZFFA9T5ppJmPa-?v!=vpdBse^7Zr<* zX+*h9nLlvX=|(mb=X_#-`QYQ#=wEb!J29gD9_E_9K&ozLleA{+r`3bRG}KM{d#%92 zxE|ST!)5_YR9G+XcJkiL*3C#Vidm?ejOH9^gKKm=)0arPO7+G@r8KUp-!b$Jss4Va z(OlOtxtLrUDfEeH9pt;FURQlmv&n!F^E%0dT)rzcEl#OpvR+|rr|8>PaqsN7THdR` zD(5(Sac|FS4Qt6LX+r4Rb>Yt$NLgMnZZs-SdSuWe)NV3l&yH`NNZO}# z)SSXos8EmNhvJ6xnB<@rWoF`A`npc0asqMRRBhi-vQ=Lb10L+Oh+d!T*$;j>Shgis^?-S;V;dawQ5EhE@VA!V}-{YoDFP0ylJ~!a* zt`8{xgz~O9&{V+Z`~Awq$Ij>ob#n$&Z50N5vIYw&X9By|)spIUx5ITqu4gDWUq& z=!@mAtsRo51`P2jC?~sMlv;^w z@aGNM+1jc>thH}0_5t=M1s$e* zw-cPA#9Q_LH_g(vr8}4n%1IP)&vK&;jg6eIoBC=i! zO()sk?P50)lD6rz1L z59FTpQ$0rwZ1fdt5Zx_9p$o?{Z~ zct*alwSd23+0>(V+lOqRa5ytlG+#B(3%CycjNtZ+oVbMd&{tkhW}1_&J#E~Mz+GUm zzIME-yD@($c~n^Fy}xP_4DQ?jGi!!tsVjy#**+i&h1HT{uLg1R>*+}*z9jFf?2}R4 zvl=<8eIM+fGu=kVB(QPOVOxh|BC2&-ySf<$VRY=}RgN`~6*j$zlcK6Ke@FFR5#FE< z16$T;prZ33#W!asp##m9c*L_UWc=(MEzzB8JT};-GM4O?VLb{A_L8n(gb$a`p_4Tr z-DXBNZ1u|xw`#fZbFt@w8z@u0?a87Rte(NSf!=qemjcD6xdLtmV>T*yfN;S54`1B( zdxV85_c~VY<_Sj2{-_~Mu-BFz%t_ODpzH=TD*^ISVd@^P2ZO`3aG>qHti2{jUi#02u%1-G&wa~@`h~-JK6dY`8pn^he@m= zsY$x*-=T@&nO2cp1Pi~3rb-dG8zY<=9d}AI4Y!%FwmfXMrpnK@@;GzPK_^0-kL#{x z>w#>a)?#n3;nb)lCf!~J3qo}p?I6!4DZW;bByH-=-!5wBMr?myoxj??lg3R6W4II; z61w-8(1vj9kaK+LFcHFf@I1W#{KK0Xh!aFSkFAm*Rht#jtE<@sQgX|^6kQ9Ar6VH4 zMTY4NcYH{HW^f&>{{@Kk&P_mwet(~gfWXAc3P4ENeAn+Y2s-!l@&YJNhkJXsd({9U z3h?pkIWj&+GawY$+gE&ijaUh!P>U0E-kUyJY5&7RFE{Mo9g>ui`Vka#ba4UD7$8|$ zYkr(xz~_SDn9}}{qa$G;{68Hv=7dBijkRNMkx?Jp{P=d^2>xg+7yN{G2 zEO|H#^|m}9)_LVvwH-}2%+2K$RuYto{;Kjin@oEn3BgzBq%vRcW?yvOCl_$ozx*OL zy8as5HF12-=ic!;mVzJs7ddwE+mDKy+=dzr+qyJ&q)c1KuOjBEPt4L5C>M(4$)b@& z?3{CDIzc}gEy>ncJx6&Kqw?WHohTx!Q7q0l4>#w2I#8{Uf_gFaxn8_)3Tod$ zpqp)R`d3cPTI)f4)P0uM|RK| zK~~-cjRiys#q8W~B-+^`#SdocZLT6(wTq^xw|G@2%mgkSQN>1O!U!U1=4TMCc^83P zGO4=+#~<`lRLT^N@po_!vsKj#U~=`(Q@=!Gz(dr??7VeCPL*TWKeKD{70)YfY#?h} zu1@X*|Ng$+b%Mejr>X&U)*NDiDBB0i9&~w?sv^JwSTPBO8r$GTpM!@~LBDQe7>>-t zT}$+vIUU{mJzi9arA6nepVzX{E-9MjaIM&^W{Re4F{b;Pm3(qCqKYFCDk?0H^$m*y zL{&G~@w}j}5Io6@GO+I!vK7Pig^qc%E0vi0;){0hW>;NU*5IGzM02g=p4L{4&%YAi zD*LTHaV3Q*duH@(zf0UY*`WC2CqYR6^ONi-KpmPT{#u0a#qn`4z?3^`$2DeG&moLcZt!`yhu&XHXM6_L z+Hd>-PTnZ(dm}dDf!Prd=m$6x@-OyhlulaTwX*_CbpwNgIM~=^@600W>JX46?*_M1 zlkl~!P~x?||p^zpFn za58D6Vc;S3Kv8s)N*R$qF6W3Z5=OuqYxC5N6GW6z5y}(k_Q$8J(#`lZ#ju7uQ7dP; zj1HL8qu)qDKPv2FCu>w*FYhd?n>%wAkruo4EVqatUddBCN2`H$Gt2N)37B>!G_Qv0 zL=}dm@3n4mr!2r)Vim+tM(P#&I z`+I1!X>EazS??76`wagsJiZJDP$ZR=a{vUjy83**H=2-9?Px@TVkboC;r0}Wju?-= zE;e}t`up!KLDTeq0U;E6F$;#?o`m>eBH5C5uN?zy!g}X@3w?b%09KYan!(Ofdv3Jp74isWxUi8Pk9tSGQlGrezO~y$x&tJfddfw6 z!(z--4wdQSEj_F{Ay8FD=lKA!QswUCwNOa7;0uzw{(y<<$G+WNq{;l2;2*==`s0n} z_hSy=aZB@zN#w@FcN$Od#@8_ZwvR+^MfZGcc}!(poUmH8?k`%LA6-}&7!kp|Dfi=N zp~t?gWO`$z#}ye!0RQ`-8lFb9B3kE^u$C7YDDrkd9+v3N2;AP|d^!wYV4nSJBBP%$ zFfjqo$%6v4itUQ3K2SIre~sl#1ptP$nl7VcWZWB*7v3MvXa|sr=jAe;fgvWos;&|ore7IdW9%OlQsZFMy{Ik`_t>~&OeadBAK z0DxJ-$LPEPP^vx;mp&XO{U89dH(sd9;wREl!72Up6JVLZgW0|%B($kZbZ`@Ex`9WS zt%eK8&@wYk{U+Dy>n&l&QNHEbOf}HBiPv%=AM`8;>NXbq**4Qj(qrN8rzU2DC+!T&?b^a<#{P8=3(3X}KATSC7 z!_gPZkei!}+#1cYv1=H!VbALe05mJ+03EKQ0CxZ_l*( zW4U3?pM;q<=Kged_h7OJ7ylMfaF(BTu9fHZDUAPKhf$gn>8<7NRbaV;e# z*~Inkoe^bZb+Cn$j}o#a@7->YeDywTS{xYDq9st{axv>seTpo*XLG~c5OKA=aDX#n zI>gz*VlHg}Ic!`La9`=mKuX;dWgDu|4s5B3%{2z3+9WQ>Vo`mnjAoy*kVyM|UfrI9 z!BPur?u89CWvpB+oyzZOCXvv6B%B0N;*$_nHK`F_h$O`G!6WP5h*z~{wp3EmQwqZM8cJ2norOJv|s!uW! z>dci~!v6SW_604faIflS&H``crh!3t zP2#n1Qmc>0@$XogCCD-cCi%hw*TYy|BV+J&TK z+ipHbH}Apqb=HVV(Rx~#+5Bx6XX9RH12*|U5l4ix}p`4t+1md(c?n zZC;q#V7*soEE?G14ove_#J$<7UGWjBW6;Ap5bBeZoy9BW#OK#+cH-E`pLin|dw6v0 zavQFyl1&q{z=PNy$6z@m0u5V{uz6~Bs-*WrYfUyfIprtx(#5>)@|ww9cIPnSk81b$ z`Gv1sw>XMbI9oJ0nFd1yPL#9nCo08>-knd$>eQjiYoCwXtI=H)5jA$(W*IoGPwseY zrjdSLSYWf-ALtvCCaL@ScjjCVXjke}wtf1zQ>2%BpQkwpx2#BEss8QX*ex`%C*g{03Q_q8BaUzAB z)Ix35+-I9JSK1O00fgJ-f5g@!a}6yl_{wAQ@=Ot#HqD4X6fAft)Y-{N(DSP7*y&~S z5x~_V<9ANxbFu(Z-k}u!%ZoW1fHk+eYEY-81K=5~HUI`JFyaf&S7S*7Lk z9m8G@p4K_M)c-i-zUZu-ApXmA&4o;mKqtNXk3704y&;gO0_@KQ@agGkQvD5!FF?O` z+#c2k7>ydmH3!v#g+OrxOw)JSlF=41GS{8PR#w_@uzB}$(H@7_4AKBuZfU&f6}B`L zMk)eUnw3Zw@{^hD%6jF#K@7z6%hku#VB(dkerGf>*ZEcEJ=~`8RqKNC@5_<$Hs^2g zMXAr%Eet75LMk~9llg1xI!CTDulH-(jFumqStgkF@4ujJOR1I-FftZnZk)Xg{S%TN zoF*q#eq2yK{m~a*W;f*+K|xg8(KedR?Tdy{Y+YPmIT{ILptzjp5xf%?ZRB6G_uCJF%4 z2_xn(8A=sA*WpbY^10cE;8F zhr;ugsentbyX6ejIRu*TtgNoDj)dyJIN9^ar=uDf0`WB0X`d1pBP#mI1p1N0bNgZl zxIX2g|NVEbk8FZnq%C%FB7wWH*}AtbcZICP$H)pAI(4%>hwB`ooxEla{CT;Bhl*(LV(Bt4hp4&l&Dcs0iIjiHa48g_leAAKF5SSC~U)n!V=e(0Pgp)IG zTFAorU2b&m8yzLbtfn-PvDAEV_1rygB+TdH-5m3`_N8-s#-K#Dtu{9D((4&LM%~5w zbHjG*+XILhn@TO;Ht{$dm2X z4F;OYPE)g8)8HqR+4-!)`IUHwrZg%VeTJ0>y1f>=LVqiv@z7S`XWb}yzS3P2<9Ix{ z_#nne^kn43c!m_@dE_5R1U-+q;uwCSkg}qJ{`BkOxaw(TfyiTYUY8a*vPEK)i6E8D zZSz4v27Wdc>`|PFg~1~DuTa!P(B{#oiMdI7iyH+5W6#eDI^%m{2R0W^I||Mo^Z#rudlJ=!~3H`O}sfE z>|0&Qr?{lJdmh2OAT}3kpyuVUGjs~239GHx$_vrgkC5eec0V2BHpSsA^~gEzpmteX z!KeE8Kvl`b*(cGk$ z>JV+3Z0`yWbJ=vM8mdEF+7LcSvR!&O>^36gu668?Vb%;=je>PKM~mlBAAX0;`7Bb& zIUU2MeZ+)nAzp3svB@PH(@f0tm9Tu5c0c5R&EV>ocfiYWBL1a9-2ysAxH^7E1BtWS z{?d-wm{t2%a<2e+-6A|rP1eCNzm3p>D!$BQ|I@jll=mtNH>1*AdsLR1xWf8at}&9vJEoU5j9AU=@nW%h_c1{6)`9heD>HI z&C>6Q?*fOIp|3m=r!Ce~&qz84lQo*{vnygAh0cWPUkso^5YT%XiNXUS z^R3Ad`)=BfyNxrC#tag8_0*M&6{SnH$cN+ABOGLmSC7j|BiztfQ9hx|A@BPQwnVgi zdZV&rn}%H)wUUS(afz7cohV87Yf`Yv_j|^9R-*YR8QmF)Qp9FibGbTkuio9~|5y#| z%2eS`;Q5{ZKr#I6SMrLDA><(8o-RT*!XFK2bt( z92oV+Y_H3?BTkgQ-_u;j;E1eo6a{?sK3u|Ge$qp-qL+TRipl|w zq(kz1MFZ6$f$UJ#)cZbpX|G(H3f*sj`+FVqIDvt~FxfiQ#oCLn_h||R^8+HRfxl3% z=`EwGAqZoOl%aG@aB>DG*mQ@B;WLMhM+>JN^n3SWc+_0f%ek0@Is0sIA@ms-ROLt6 zTXWO;XP$k{w9nG0)ueByN+sbGx359p%@&gmh$6uo&UL+Vpk5}z7A+;4rjBa=Y)8cV z@`V%4V0?smrhWCq{|G^8%lyjZ! zZ?lAxRX~H7q;%b2w*t8Q=I&I>U^<9;WQ&8NPUVLxc0C5jEMPv87{8epFFsmOz`C@R=w8!#omXDC4Q)Vs4Ae3DzhI>E!yvQK2_IRdtlrxn47Pz48yIKBqPGI&47VCOYc+K ztjr=0)C{Jvt^OJ`(@2x0+bz=l@+CG_b=#l8Re^sOz)tq#dPi)JMQuP`3KxqhZymsQ zfO)R0y`UO{k1)vatoDD2m9ZKLZmx%zYeco4CB1~Q6!1L;L~!`BR@6+4%q`budlut| zyFmlevFVFE^vC+>efMT3W6*vEqkLT{&usYtVjec@W&A#!bj}FrO~!5%nlHm#gL#h` zIKi$>H0`bD56SZf^jg$TryDbZYATx9tC|UXFV(IzbUV-OirhY(xb&6_wm&2XA{%ih ztFF0qohXsJE=M8U?6#dH@rQldw6BlblQpK+k4G_1+I!mb96fBLtsGB7VL09D_KG}a zzlS!TH0(g6WxkmYE(_5$2SFuF&o_OdD&tSrC+faFycY@*;;9W``zuKV8xO#rL<^{g zGv)Q}k6U9@oh++8>~2O;;gEToeT{qbb=%v+M{;(U4D9tL77BEG>K<#}CB(`G-(J(| zmK(2irSNDyh(5QbJh<_}lhI9h9ioNvz=BS&ANHT%i8g}jxREb5&|a3huLCbiZml7t z)S0M_r}xb%H!adM#n$UMrn8pInyBrLO}1T$(vlQ3GHLd2!?JeN_$FT%pfCtbK`T7! zpA3(d=Y`}T=-Fl_w6KZlZBn!nG^oYG?A_BlSJipy8XP?n$Kc^!{px=5 zG81=q465kr^ZptBfM&4-*JRIT*~s=^zT2F_QBFIa|DC|6ON2}P@rpX=F`8hUWA&t0 zUF7~G?riv6gR7NbJs6|J4jQ;CkUkR4GW01p7_LBdd%!tF$%LNq59t-u ztC~)EgAW8n3oL8$PIhZM_vr6m%eSps2xxh%ULNC|H~e8OZ{CN+wuL^+v@q?v+$bHL zFhA5*xVl>nevl%^2p1lzV*rzu{9X%3i~{yHP-+P{I81vu24&Gd^Db zS`)YWfb7K9s;9G=;?;RsF(J*7PjY*5)O?ViAz08Cm32=WJETA!4#D6xG8%M$6(5o^107MB-f(2{cT##N^n`+<319V zgeudI0e&Arm)T$?bGfCX%AQR%>S^s@yZYMYnFxZ^Zhb4+>HG1NdNS!#9X7^ebv75- zJ5x7*2!7GDT=umYLcavHN+08xW#REbQr15q`Z50r%CWjw^euON_!VLd?ks~eU+@t^ z$KE{xwqFe%QYUuA+bWM=r@mU?o?05(en6wzU7^T%-lzKVbw|$-O2F%n!*{Q(T=5Hf z-y@l^A~;w2fqtzC`fBCxbux3OH1&7>+cFg;p4?&KhWy%Wu**v@$bYe-;{u1#QNTP@z|&@H-#uR##Kv&$YX7P?M>v zrY}PO>&IpOwa6Ei0IUJc>xbe;)l0VJx>Y+pf9dC>H_JR^C}GQUE38{wAApn>EzJRi z;QtDEu)}=^cld|4kADHIRdKhz^kTe+{Q;)X%lm|Z|4sy|#eWCr-32`eU6sR0TcS^{ zs5xNYSD|a2gH$Qs1StoCjv2yt}acJwf7Fyt>KmOJ*BUca~Ii zKaHh^Jg3IsdP@?!Zu{e1SG=3azijmbkH>f1mlH;b!8WG-!pE9_PCgSDS8Q$T4GYPe zQ=vS<*q5A-v_@L#%-fE$v&n2Rdo2E#E^^&#`PKXtV5`R%e)Z<&AfyFK9r5f7!tm@h zn*h1}fMx;v3B$V}}v zK@yX76zW@?0I0THRCHs5Fl)9)wMS*WxbcY1xR#Ci&saErOz?tL5a%VNYr!|)dOwGT znEsnm!OVySYU(ApDtSlqk)6HnGiTFh%j-tOz;CG0(9p>Hin<6_q1>!H)uoJligX!V zX6N*8N2|NwKG8?GNUKi_^^@;}_oFuNZQfYBTF;yQn}C+77J;We2>=i>8AIVsk0;R3 zcr-jri6#jp6)agnnFOZwyX&o*A&Cb0h3rj~?Obr1gEyGr_WYa&NZ17O?7wt_rQUHt zeHhQY z?v!;gLS%z;url{gmwszw`6nz#Ym$q0SnmnOO1cxx6rE|qyk|rI|BknR63OGp|I5Hn z#5KyL8X1QLnYYii2?@~fYmbdbw!gbx_yAWIiUKR40$((>IuUG`L7RFD`SIbPwl29C z_XC!q^I=%Q!HR6!-EmBT@i1c=O8&Xd5_W=G{LMGh6B8N%AB!JC(ur1&^FcldS-D9iE1fy$LRkp?_ z4(b^{E|YyaMr0A@{E#Y9S;=X{lXmhA2B9|m2liOE$SBmu&}=XKzOZ*777LY}!?YC0 z;mp-|N^)OeRETbk3=t|f($g{Paz(~3f(1gYjheShq%Psb<_jG+StWpWTKuU}0pap7 z^7!YQxBF!-Aw()q4=pD^q`(O9Pz3)Zc-nt!W1$OTG~XigF1D?F{0COO*KyN@#V9Spq^$!~mvtpvT@p2a1!P*Y(Qgomi;i-q>)s9nh-7Sv7bpvf|8fi=FVD7+3{_+*>-b zeP_Y0s!byzkPWNG^J1u?JU*!CUw8qc1s59x1<%`O(-{B1>75SC)dV9CPVjv&BX>#p zC*H}9Y9e4SQgQpVP^WsMmV0RWf3~G!-m`%8faLQfjqFt(z@FPx=Uhr>_ZdT*+rx)Op{u#P|MhWI)F~DVkbG=krAE+Zh46TpxRl zGj(!>@)*0jP-E0K8X3-_&qYmc^&w*06+v8A+&=Zy#-Tr0)PdG>JjGx9m(PR zX$4S#Zt9KZIt7Jycq-<@*!$T5qB2 z!QFuHp~w9v_UbKhREFHE6aRP1x$iOkn|tWe3UdQz@HDerw68>FzEz4r(%n8bS2ztSQ1c?6!Q{O@sHnW|>hPO@5s_M^+zBwbLzQf5Y_(nq83kKM4x@1g(#9*V`_CN;HaOli3Iu zgBqM)1{C$iPb4O^xK$i?=P!{GC;AtrlInv}2FSmo$~J$sK=#wRP2O!xE7Z-C zI3ByCSh=_$85Z>D2%y72w%2GbWrIOl2HefHEeBXw zCff85?rB(@kvU0eScI1SE#m0ia(lEAhur#VCtKP|>;v!7fc&?6nW zr&b?m*-yr8R-;I0H=6C#eb6;ddj8nOP9rP%w?Zv!5aODFBcYz4$$RCuz#yQ50YNLY6KpUT zu@);;r!Kgwm(34F3T6X(qx26)H~1A)=@-_EmWvA7LeYbfdyfd?0Ttr+ea8ZVj;YYs z%DZzw4ciLS_%0Z_4+P=|BT#26&RcbYGr?>A@WA4Z@_+>TXyC+u!ZDe|)cF()v2u$- zT-2h+c0a((M(XqIOU+W857kN3R@I&444(J!K3SsqPtw2J0}zGu*IMlXYDsrdk@2#i zrzgZz_GGi?EtSy_AgRq(X|jWaLXewkYfI%dpDM=2vPp$e!@dIqmg=34xOjMwn_ zV1|jeIZYavTV%uHD#jl#DA?aU;WaVnX_%T$CybIIli$v3CPcsbyF9IqutBdb5i|4E ze|2VnP7eWb2~i?EDZno!Ao{ASvy1A zBK}p=ZSTB`CtFi2Hsd3RBTxGttn=+QU}+Q=loisViy{AbjpqSa`Ll8ki+IP=+951m z`H04Jl;}WM_xNf|iafXk8-V_Sr*0*qa?=S-rnyKxcMQ6_Y>lSk--Coqd+^cIR~Ho< zur8M>&w6lkUi!jj-hRD$jhUjM_`d$B3CasX!hWel<$6S}r+)(+7Z+QUnPu)MQ}O{5 z%{QpbG9l(1fw$c3EG}1+OWVzv?%hosWS+PtOUE369=~%u?t8@Jk6U{dG(IvyWI1wA zC{LSFUugJUX?UH5U(t<|in zpqSpt)7x>T%;DV83)mZOZr_;_PX8JnIz-xhBb5K(Qp@rXo-u>t>h_ZgCmhPzgV+5& zJxRh+v9B0^i)St!N?JP)rOp&lf^SPcYKTv2&k-aj4y950t?=W|Q{De^0rU%>AaMLp zhuY3ZDteaAtWq`S@A`^WTrQcn8a!|eUZxI|t5k{_;$RR2{=-rRYo4;IO ztG!r2p3{D^GIiEOV%aufJO0mP`JJVB6pC%LsNBJhIHx5FdneM=^;Fq>BKbc%k^s!$ z;YlD*agK_P-fxYU^#Z2A4nQxt*yy^vw0rqPLsJ1T$Am>h0FnSRqGCkrK{-%tu2*>- zaO2St5D>7iu%MOr2N$?nGkDg$MS`z)Y(#(GE0J(r5WR-bRaJdXt+HhEjN*$Z`Cztx zEZ1iqpU6g(z^BGhUrgm(ol!d0I{ktNLR8U z|Jf@@qF^sW(&}vd@xxKT+&5$pN1Zmp8~Tr5o!^4K4POl;vo$9d%N6q`*GJ9rW;x4E z{f?RY%ntV!yD$;|>vxQ-?kLbvp^U4Zns}>Y>kTr>_WZlcDQ#(b^k+*=j>~nh@yRtf z?P%+hC4XZWpASEgcy+>(i%(ZWA>r$l`peKX4;{D7eYs}@#{J_axa1O*N}*|rb%{Th z@VUSpejWzZ66SoO+gb{2ionh+aJsgiCh6@8a)cT>hqz+Zv_{p9Jj^fS#0}|$0 z!5DX)MDF2D@hY#%yNUHdZqMycC;Q8`-z4um4t4(w`VLkHG^wBXS839~q*!JLv{U{6 z5cQ}JcqxyQOQ|zdZ}smwi|$0eV5$;!c;8IU`Ef$;+cbx_Dp_HFoa$;|GhEdDRK^3M zR(<=+FO5e)K!B5^llxKJO9SrCf?SFdC?Bd?XPal`F1HZe9h(z(#oF+iIJGd6g)App zvitbvvTA9lzz;EB%ner%d|wRID9u4s!M~ln5Z57)a^_84$T?#xsc&vf_)5Yz|>>2d%$Z#@{ zIKPM>V1GA2(5@TYes#=}%~y5S8c6FY^Q=DHBgZ4X9|oWEjMN;b6Pc{g(Ttq7!gq8R zhU0p|Ii*D5f|$9q`eJvpJUf!EOKZkiI7w))zdIp15XD2{Z0LqECkQR`9!|f45Jhk$ zsG#D_J==krzvMr`gqQM|daoks!W+7I|0t&dum19(8D+vxS?=lJ4cFRXW!3Ks%q zeblf~%+Q%(RwvmKNa9O(Wq7*I<=$IYCJ$7 z%!mnbarS#7UtWTs{Bvb<_yF4O^71k?EFy>7Wo}4F2teyr``TPvJ7vc5=g%MNq>=u9 zQVx!#xj7~B1V%(u42+b71jPkkU`*-i>J}-arKG5`<Z zmo`8$0Bg0{`48(C}3_ov1g4lX1&moQl0jc4QW`arYEf-Fw# z>HZ=oIT;-T1LNbzt<%%5pTlNm05MPx{>P7UD&;B_0I9V`yQ4VAB~Q-nWPujAbz56o z5OoqY3rk}eJMuqS14v->{_^yUe6vaW@8EMX+eZ#J?z;>*TwfZ-u5^?-H!Bsde({b- z3%;pcOz{>@8l9%KHm~DJb|V#+{gH!1=qZu@LY;QEX*QxBa&xO;$6(dW6h$Kg^CK(U zKEvMfJ$3UV_@nxGyz$a&X68Z{74+n0wq`FgR-m~B#N7NSDC!GuMZ?L4v6O46QGMgd zBAaa|rbpQ+vmNu}@a(9m*wFad%x#tbzT6iRbe?C(H?`E)Ng~I+v!&mu)n~y@$o1@B zO2cPsHrHB@$C}pa)2$WQ&()H@F!`83VqI5L1>*2B(Asb7KJCPd!^)`DE_GNf-fOYd zQGmo;kye0p!dEKRQE$_{gN}qB+X(D_^&csdvfEW1$8C?$#OTw_v)PRRW58*KE^JPy zDQBwQ778jPxiESm&#(oB%LV#Dn8I~3Eh{%5$!T%JR*uD9q(eXFftg8tW_Xt0+n%;+ z#nz>>_nkRA+xXAc$F2@3f@!3Iw!hPm6nrjCzUVaX=lk*w8{1AElhV*}FU;4+jhDRJ z>&V2H>kG@qg25>GL-v44G^4eRdf)q)w5QU~_RsSqnr_-2ZZMHnB#e8FFm3Q9Z#i9% zXv=LcXwa*ci(g>Z{YN-TAwlq$?^>ay$|vO-n$}g#&GWAs9rG$@&5BhGRm~O^3tfVC ze)abo%#jSA>*?Dz2?W2Rg`!qX-uDswak$@rbl-1~rFfpVq4s$D()Lu{bHF2I+3HKl z2TuQneC5g=PxFCD;&I-3?zf|5Bu%|V z_Yd-U;*f^QgYOfF?$brO2846#oTO8$u}Jl7f(!Ej+!I5^f0UgAr)Hy|io>!RKF!O; zCy;b9r*w-kYF9X`$@8iILsyAz{*Rb~FzB z^os)2W2j8+?;)QP*l+hI8;Z)J(C?slvir|0+b4O_!CSyueRZgD!vQ%Ka-#uR^S4c5%@aFd6MJ*3{7O$o7{hsnKW}nVh@;h->5H<7cT8dJJP@ zW2sUmqB7f;mzTS{yY21m6A_}{zbB@on3|Y4w5<&%P!MoAiRtS4-W*OJOy;Oo%nJ$% zHZ?UlJ3FIN$nbbSHA<1s1}g&B^!N8aJU%KbDWPFtG%TM2*^o8prdGcX(217GQ9eIE zudJ*%I64AcO(OINK#zcgh)7IKOsCVi^a(V+^A}LN$YgO9DN(23krZ2fZwZ% z3VOzPHjBA}l2T$%k1s$8RaBhq4?{I)Svi_586O+t;pHtTC?Fvw9vK;_udi>_vH|Xe z|Lu9Ujj6i2I)zRf%5Q6H3pfd|0bv6K<4Hk?1bm&{-QA6iEQEvtq@RsVO;1ivL`6h& zTJ7=*0HVI&-rkJ{2m16OCQBg_K_-NPlwY76ZzidA5 zC*T<=Q!9&_%YG#xDS+rSFMa|rtHZygH9#TS#9KR}#4os(A?GnaDm%QmWU|qTbqh*b zTlaRf17;rOz&_!F%GQbRvr*gp{q<7G{$tW%nPD%p1?rA#Z3&|ACEn(^n%VQo#6254 z@DqjpO{sd={<8Pyx3#=-Y9KyQ;Ol5)Bw~@!H$Qipr{pp5nYlYB%TVxStMuBlm}Qav z^3DD6P-$lzO{^jdY!tCAUvCW#dO!##7AuA}txdj@*1!wn7S?c~wZ*OX;Br|p{aX73 z)pbCOYo-wt2xjdQH=o8X{@Z9j>M|1)h!TBrrz!MO{QM5xD!+;Py1y-eA0vWm0D;S^ z(PX@FX5DdSd;=Hb)byumY5G;ZD}A>{IntY~7%ZIArwUoVNqvphb8G02^) zbRB$fOTRv%Ru|GXH+cTzO!Z^KXQarF)9EBQC7|8KL=>j6>9bGAWAJQa*`K>&7Io#* zG~N>hn3=l)+L92NXggPz28O-q`6^}il6+*FVGIc{=s{A|%A>E@a(L$Ej0kSVW0a5P zI;;Ngy+Fn45H9%F(jpsv=O8Qy_#$jQ)tuc->5Tlu{gaVoWuwnK7rY%7TpBZuuFi`% zLfyLe>6$miM;J7=Je>%g9YRzzibeZRq z^|JT?OW!?dAIH{^3`RvF^h~bH+riJD{R}Rj=p#ZS%`Rd)9p&MAMlqvV74t?g!aoOBQMRvAm5e%pXYL8xcnlV_?E zi_QLWyM8psq*2o`27Qi(#vn~hF{ZY~+pC^a^igGS2=DD!!>3E;KpV|2D_JIo+cUN) zP9ZVN>xRX3Aw(#g-%Z!EQn6tjJxlYmUcUdit585~T?4vjTL<=uzl2hH#wDZ;)l|5h zrEOdMpMFB-lnhVMo1Fq{ate})z!*l-?njLLS<&tGC#_SXGSjOEVy|c!EPKr6jjRMl%BqIQR-GyTp-L4*~~Vl)RHc~ypIdyq_~gN zY(351>SxP)1Gdza`*1fc#c{00_N&NveC{Wh^Vf6*EO@UA1Lh{a&$SgM{gFW%)e9{7 zO;mqMXd%&!>{8|&+f}J$Ccai$J?dT+1@AWwvn=;lv|hNbq(sSKiwvi@MTy5yeBb^K za=BpwQ(u_4j;$a$I{Va9IR>QUdabg<`z6rhX7l9WO($Y%=kF#C=UW&0KS|Xge6dPJ z3_z1UnkkA#9MO1sdIAXZ06kuw67BZ(b|q|X-AA{kv=jtNlPaDz!$b@QL=oB9I%Z~+ zAcnND{e2Th$Lp09jWj7(5MD_)A0MAXTW4!)tE4$_HK3sYrDtS3dhvmP?SZMWF<3xT zQ&Uk<@gNPzg5N>;E0oQt7*6E^_N(URWlv9f-8r*C6p zqpvTosaZk$9p?{i!tk7h^8wlfAQ%85@(OuMa`MXRDv($L>lv_8DWDmfnJulXd;po4 zm;fqH5Mi8H;5Yq)<1ZK(5C{YZ2L}TkeFLk0twxt00Rf>&W89=c{cG}f;v;v-FKB3y z1pIvT^fiE*LpB8QAK)GYQYsc&FOgzeSoGmGPQaWU~ z;sMjVIt8r^mg|A*=dcAx?t~lZSe8OD;;LzS^4+B8?3L=!l;lJ|?u6oT8w#>c?JjLV z4tsS?H|OL!o<96>f?&V;!j~3x7xV4z76dh?2ER96S5qCPrm)quGx}i61=4H|3>v~Q z5x8Kl_Q;x7lSy$hQ0eXZzD+jXFS^hdlIm~$EY42YxVr`eQs||g%joY)KeW6&?GT0U zf3Dv}D!Ibz$r;T9`I(iT;QCnen|7z=656=tZOFZCiO=d%PQ00Ppn?3hZgTkA`I)(I zjZCTp^4O;+++HKz8rHhbeS?@l!;c00$+c9$kp8HjY^pIAygH&#A3xZ;+X8&#UaIib zdl9yX>Vw;-A0PMUy)znuK)K{Nj$AM*Op#EF-@|7VWiN8ur0~x=3~Uq!i~GVVbvi0S zHYg`6g5&P!MAAosSGP%Md95DPSGJW@YDfx07zMzKJADHPBx+wM=(N5TuF7V1PNgH; zK?=wpHXQk4lSGDaJ1ma44VMa=6eWvsM{Gzu&SD|>&{>1o*+ca(`mkY;Zt+hW){^oj zV0v34w(RKjmJS%J^&e}IsUT`SuI|T~aN*)#6^qd?RP)z&@M$fFv;lZH{M6P2-)vsS zd?NS?5Yw1^r|UcC6uINQbg>!DU-w#XZ&CCSoo2P~d11Ml6YnW%#;}&toexh3J|Ojt zc=d|0yNW7XV0GMU@Bei9P!NQ`=P@LnZbx!_Kps?<@HR(9<3rr-^GBlyd2*L2o!@m2 zM`k9FW~f@?w)5qnnnlmWBc-^syVXQApRdnk<+|bdyq?4LsS|}Ct5P@aQwMvs;4ju| zd|a0G$L(b&bzA9fXS-`;PwP(?(G#cEfgn4mKGXUdTwT<8gEQ`H-vS-Cp7w`%r?Y5% z1_vd%x8h0~d;2}>MBq*$#cQmLUziSloy8REuh4oAE2`W5DS79ww5cD-ol~MuR#>DQ z^B(?o`$BdBy~w2F5%w@{c(|-fm>=42ZQb*kqanOvxbCgFaYlNb0&eW#=Z_|*leDqb zZIg>`#lr{A8QzU0@tx|-<@*(W+65ug$}q1N$$`Pe_@EYAuM~2?$V8ucbAv&thh=ZO zSt3llCl@P0#mdk&+cB@8hAP-`sbXx2z{Q5zfV+#qdgG}ma^DtNrz1o=7>B_TOE%r* zQcNi-{o;(fR}Ce1Xos4t8(o{{q)YJiRaDYeWbj&ucm2KKX&E(${3gy5#Shs#vWW&n z>2_o3?nLX6>3vIxFAL;~8aItrno38*ncs1XBM zB?+!PSrLSx_zUxGzg933J^)N9!+O_jF^fs=5ehT|`7hBfhuz2s=ADH8wX#hlNd>v7nMm z9qOMr^&0^atdyjr+lx(Kz{CYmLba82l>XCJ^~O#3e)DOvz%3A@FW7{hART9G^qFpyR>^$jNE)n_Fcu^G7Jb1CSJ_< zpx-a7r!OSVpEG_J)%36puSUb=k0ctx`95u1;30unz!eL0;h4L6Lx#geFuq=$+ZkG0 z9Xu`PO-BXm1eO|Nf6Imf_X;K;fpjq1QT(fi@59;XBNi<8+w3 zjRh@XMe*k#>zs30+aKrh{V$aoCY<^|D{7=~9GjT*60DJ&$x`lW-Uvm+Eb%+f{UM$MOk2>@q1g|ARsxPyz&Mof88#IcaRXeW? z$$ZZKvzmAZ4d{QA>h|{irlPGgSG=#Wvch$DQZhVJ6}xF4Ln_P+bh{zk-9w6};CjLW zsYSJ=1u2@ibE;2Qz*rFSYG*|y35Nx(&|e(K!aEMt0ff!Fq0J`y;lD?;7hXKBUQ^lH zby*J43W$;~V?a=vhfUfuC4jPCVwcxR(nbE@m>~{&a;?gR*SiVKmt< z{(M<6uE{h4#d29A)HSP&5GnkU(MnAs80d1u_m=;KQlpJpD$vz}7EOzQ!k+|8PB~phdzU&B1#rIvPX;( zP$DVuJDQvPT%KR?y;0|%)Lh_bo8LV?@9?39-%E)cnlJ`IO%&ZZ>iZs}fl3o`BTr7c!~L5o{xbsYcJqbW@$K;EkJ z6%-rQF3G?;Ou(@-P87T+uiInS-N@VgFoT5yorWwOoB_w4!$1%4JlQ3y=!G6S3tBc>#9~E^bp=Bd7TwDylXB?WG7e|H9<&OjQL4SFBBBF{(fOITSLi@iI zq1$l6Pyj83dH1eX?JFuODhQLsY$}#W$mivD5zzU&2A%;94am40Q!_IP5c&l7`-bhO zCl4AL8vGbI5F%^sFxL*;T;ro{~~bLIRu9!6=1ehntm!WodO48WhsU z$jW;5oGeEM7$dD$9tRV*shP7f zHhNp_FTao6qZD@FFT7hA3(uC=C80`0BDI?IHNWvnOD}~~uRpdwCRx|OipZqB3TMjq zuUwF)y5L|){RCDBn~zt6?3*&hS&v8ae#80!CEc&4J0M-rooWo>He@~_r4sfr0ya*o z%dr;Usw=IRY017uTcCg2+e^rf5uA<(r`J-YYl||9=KbGz{V{HjRqs&YW%`7ioGpA# z&k5DV!vg-L`wdExU}_4ss-NQ&N^d5pKX}#yvBWW7B0skCquF(B=yp0B#M`{0OT!QB z050iy8`JNx!BWxf^|`f`b!ByJT--hFNQkFjk=ld#6$O~>>{?93-x+_Ip?G~{43tPs znE^$gzeuwH-L5Yzip$5kSF(E@1P%$i8#&G+GxLU~UX{tvSZ7YPdC*$Ed|M#HrJf@T zKhK+Dpbd#;30X^Q-M-UcOsi^xzuh`K9a{u}h~mY?iPYlXmB!fiZP%@3l-Qkcv0!vr1Kv-!37A3G}c!s>Wxi zAEj2}u>`P@TKAV*dKo&*lfvTEWaO8n`d>2|P1Pj3)vZ3&$WHPLL3a<59*Xi_S9ocP z12FKatFpowj+WT(`8K}|5$=m}!-7D8RQ9IrDycUv&a85K!W8BO3GlJTS(9;$rx3_S z=L7NQFiJm@wnU{-jk$NFg$A5#oTRMj{ET)dIRn@aYCm>L(97P(pi(AB;rn&fq3M*NU3x{uyTH66`$emF zuB%k|yOh%#$DR4l$#=-+Vf5AV8EuHHCr!hJ*#Q|i(#f&e^K;#mhf)@G;YVC^k18Dk zYIa@nkaY60iQg(ru(ytXFFy?!g9RhuGy`F9xpxkM*?d@#eSW%?D&zkPAxMdgWG=ldy<5*wsuukYPe%Py`bcXQ(qmY<)WFDfcR6K-m5 z4h{|md=;ammX3~&{Qdm_yx7{>+S$2f-IdR7>mLN8NrN_Jw8=XuB?aTtCwUEx^qd@i zz$a#FORq>_InJ$PXMJo;ik-b>YDy0HASNT=DcE!1hUWuie$vlj04^O7At@~#1-wwA zwgyT|Zqtj<|1`pRx+pOFij4})8S0v+v(Oxs!f*`RB9HUJ5i?Mp?r{~86cQjC=)}xPQ?;O3*Aj8hYO|8glA=%CzWb?&B79R#A zmy&`6BA!gqy%F2w^VMum6@j*&@TZpv#7Bf@;Bvh>-z+oA+=8ezQ5y)8o^8hPuO?TM zzxP`R9<-rHrC!YtM}OC#r5N{5Hbl@cMP-Z;&S49vFm1@Mh(^J0Ogz^vbm;LXsBn3|kxS+jSSn&xOm| z@LodcZ_(_5AIXjVM{9`jBO9o-1!3&n?-}+p@e6ZwDc7Kn-(+zmD04BhuGB}0ytPRP z1%$wq>DXLXyrY^KyUoZRPg);GxhIDkFYTC3!$uWpgk^_#*~tD1|AG5{r9oG2zy@GvY~NjG)F=Z^s+FX9au7X!!G)S zRQJb`TgE5w>x3Z|e4)~c9dp3fA_Z#jAtJ0_Wy|?7SV%)&UJz%?MACE)>2kX)(c|UC z%#^elxjFr+Pr2L^lgL%yV8Sq#_SeYW28qDDhU-vRRjKkyT(i1xshA#vlSKSe=@1Ac zU>zVZe~asw+W83u&}=aS{;;%g9ycvu$4%~p%}zj{cNI+wb|F_ zPv~Zu+w)J?vY~QZzism1OsOepr>SOp6SL9DFWPNGHmIb|B9D3z9_~i{R>>6|@CkCe z4~$v&$-Q97K8A9+%r|4lowJ)CuBKn*u{J4v=v{Cp%DUnE22xT`7j@&zBL`!Cb@5y@ zAItoOBZH4cpdkfmUju&R3aF!lfg$dH-=xeL2!~fq5vpw6h*Ze z3sdDhj+Z5#z{1XknqBNgC*bK{SX#7G3r{UxfY$0IL;^TJ0N-0p(YNMa?^9DJ##r|4g8eb?F3rPD1a5-TyrfmL>i@&b6o+8Ra3{fLX#>y`H>$++C0q zc|>^lOgBEeO{RwT>dSAytr0k~N=lW=0i4#=T+hnNYUl(3gYHJoH=eX93G4?1NIJm{ zz)0~#p|9*!*FAXN^V1>!P1C05 zX&vA}2xdw4L8t0I1d%fxt1!~SCogNM{iGEZ8TzqhUj&3xS}3;=T^x&0yj~!NH7-)S z@jtu_+2)+am-B1y8V@o!tE6$OyWR0ZCw?disoYx@=593PR1lj=+NXr-v#Uz-));## zrZ`LK%d^6YWYItKJd}&xx+)u8OvrC)a+I`Jmya*a1m-G5`n;UEH{(qF9(n-37Ya}Gx8eP7VCGUPmxp94KgqZE zFy_ub9h{@=ta}Z$-`zXkXYwpi{2JZs8i&x`_$2!DX8j;MG2rLSoN|MxSeDJ&|9!&@ zH^+WnpW!^e4fDn0jay%%CSpbJMp#7ZF%(IXa8KEkH7jp#Jxu>CEc9~vd9uUZ^GaC# z;97j$hOWu`x@ZvtOC^(|=K{`O>^pU;3Q-Qa@IJ)|OXF(Dvm2 z(~t-?*L%Eg@DVI=G>?Mr<)V^8Z^6QJviU{Pr{<8hLh*prr9sDey-u3mD^ccXB@?2? zDs1X6RV}pKqWIf?P_IchT*1f+-n++A$3pvf3=C>!I+s(Le}$i|l6j-M#?w!7rOrdT zZur-OIj{SBLc|xn3B7voieW1yR|K*^e0a1w3(o=AX5qV{i=eS&4&TV@#uShEGsNzp&xmrj*t9inJR7@AlTioSeOdNTMoAD!X*1*LPsx<-%-@ zw7cV+v7#MI=%0zNKULllINb>A;Y!MD9olN%Hw!>N|_& zM1mD4Kl`I@?yF>BHV^JQDccL3^sQLpzhO?zB%+Zep^^RMobSz@?-70wVLs2^oCeVmu+11bY0P|pX95-CVYV~uok;z<6< zdcn1HzAnzQqJQ7ClGZZwM>uudp0~z(ypLAdR9Sd-N3Y-gW)7!b#`oW_Dql=nZc;bC z^3mzjx@dbA^sm;Y)=^s1W1Cx5UOWl9Z#n)kLfs6J3xRY!{Z#t0uli$ks_yB&HFv#N zu(Q0)#pROSDJ1Rc0P=SBY?gSiBr+~YKZ(E&twDy z%JnfH1**|!Xn)h(t-W&G`=q(M1pi7) zOMAZI1*>aoDF~@W;aW_-1LeNY_ARxvy${;_B{4B!VbI2lacpI0sjW4-s^RJ@8U}|& zgcdLil3T|eMzixS_Bfux%34v5b0EpsXX12t7UP}Xo3l!j=?jGtNzuGPB8K9Lbi$iK z`Qz9>J33Ux`+1#nY&LyEEq-RoQ&o~CJe)<#mtHpEj&ED`%HFsxh)ao%`pzJo6B>-Q%0dQjzttY9)Z^pglK0MkG&kb#(XBKV7TSk{1Ye zSw!=^>{a@&ELMpYTfeY()mZ+!6MaMbMp9j2(`Z-ciX%{iAu;E%ud?r9AurhV4&;}Q zwbIdnHC&ilSA2TIKpg zMq1pW;AME?|NoHn6+m%q+qS!LhX}3-!QEX#0t5^09^BnMKnU)x!QI{6-3ijTyF0&= zd(M6D|6kSB)m2^9z4zL4t~I8OQQFEs63}}LG5eI5Z)XHhBUif_6C6{}<5dnR%?88?P{{tTuKW^c8-zmN694v-0DU%r3xCqvN)sarGKtyc{GxZ$(q+zTn{lz3`zf;IzHGjY^PGg`@ zDN5W%*gW)09g~oEA!W-C6r_|h0a;1YB8?JLNqQ^@h9*}LhLQsFEqFAO&_!7( z`Ja((%ee`v0KG*d-kPmrD z!9-n-*Rf^1OGN5-sxMD&0WV2)MlV)n4>bvcN~_5Y+^!+4W%rjGeAuo7+miAMsn1y? zyIteTwUrD#;#WWJ15jiNxh*_K1w}>ixq+$P7(^7l2U&V%)Q^Q91roHUr|GFl9`pCDE?tfun~nDw;MOB`0BmqWVINz7pI!d)w(=88tT+gh+6xGH$m{hWDY zrY_IZ);y6Z+1xC@@&0IsitD?jcw?I)zT(M~J8H4>))gvz>uMxwY@%WY`KnghgQ>mV z^8Q#HgZk;(kA`Fydlx&0WB-#NUe?ajW$Hw#yKyF#FC_!U};%_JEDSzrDeyb9N$pqKIfr(FFV7B2^udxZ~mUd=;kJ+C=`}I zW&iyxMcaeNBF(GFXpzrwRf4nR#gr2BE+v8!ZqrGV<7W5U15bMjZs`7FYD8#&OwCc1 zl)7u6T}}eI=SdrLkKzNY;-`-MoMcfDV6)M_pM%Q`xauAI#5*A=g`g9?VRxe@Gs9%V zGEC)?n-+%x%EiyEvhg26kj_#D4UIP)76Ret(!=PQ0uU5_P~MzeF)hV*k(c6j>Hv|p z#tuT_WgQL0QD#m2YjkQ%@6ER`gf!Bfw$q6veEILj>o0g=`b#|=`p-6Oo`<$!%PEDD zTKP5wlST&u)vt*bPhN9TeH&covWN1kD6`;oF1ZnGPircQH`p=|?%5aoDyGDo3@94P zvEiDkM{2^hA3FTmst_U!tmZEQTt#mE#27;2V}(-}(%D=W2xRtKQyWkIgD(KZJEtbE zND*ERi|*-2NcSTDNf{X3Q`yxja`|RvQ>%8vHv}Xs)p-|$(e_L46G}a6Y<(r;b=5^c z_%gfnAINe17S+ws3L`9)%j(OioHjkxo0%C*g7Bb$ajLz_!_{ZvyQ!T~Vy-D=vSG=U z7t%j1aNH|334Bglc1NOSNcU2YwcyWyP)al3ZG_`YrPs?m2@#Lo*|<0Cr+Sh%aafuU zPqjz$n$ZJmZ{obzGe}dWH%8)l0R)U*`epl5xPy-(2vV{R_E(^m@V+~S6z-6>%48bL zO>qcd_OrC!@LdhHXIlzsSx2?+kje`n=v}ya-~qoIsr!a}6fK?&b6d;xC36M7?8ia< zEwp=BwbdX^-;2pb`u(K7d{XiLc_Lpr8KMJ^VIc$2??>ImrJSx#rF+I03q>2FS}$yQ z>H~YIFSA;Yloa&( zC}=fh9iL1k`Y&9Cv8O&aV%;I|g`wR2yFsPFKswMMB)m6u<`NPT+RDUyb|E3h0@u)@ zv_c4c@AeP;dDB$2=hpWGCs41Rx{6Z?uP%3Eie;m`8n|;*?#hW+9#9}sN0nyEDv!uj zXdHaQMLlw(e&Ohzg?FsRcE6X?=c#x69#86|akN8@EPuQZL8r+k-$r`DPr(T;PT`yS zMTI?mTF{`1aa>k6-X_xeYG@BdWCUC~TiQ-8+!{h9Y`3QQfDDs1cWW+6l@IV>C(O%t zbF^pG2zyY*r779E*7}I^#$@I`=+9!-zQ#H});%^CUY{5MD`QT2L)|acDL*pGq(rJF zDU_|NaQdOMv6xDF+(i#j|nFiY5txo zH*Z}!UHZmC@}U3+fKVb!larEuiu_VMvDBsP0-a`)T;EVev%-PmEW&J(toHQQPnC3< zB_MpZIFNFQ$d*zntiu_B@3r{FnpJ9wp~vAvILGXjgQlyfEgU^xTmH)y@tnKD#W?Z~ zgZfzy&0tsLVrC)HUCApagQsU6;xUZl->v!qVD}6VV!+$Q|-=_xobj2jJOdTqq zK>(h+a=tggxwiBj$tL6at^}(BH|%NAs@Gu!M%W9dc!vT@fF(~)vGiT36fyr+Pm$U` z(3l8Te1bN=^ypm~hcHKN6`QG<@v2Iaz-Z7!H!f~o>nmdb_`3CYe|CQ6vmv7Ka%2m3 z#CBCNHkF;Wf@de6VS{{6`Nh@bs*}N8NXz#3nI~*K;NW<;cUs~5v;IfzLTUsOtLoH6&ikB#uT8bn?DpItm&o{hpHe;RyPa|k zw4MHPU5>Ma!4e{-vq=v5X!YvrngovCmA?|Y8KmuW1LQH>3i$$g_GwH#>c%S!H$#yN z(0T6fo^=4$|Gh$xgBK2#pl$}z5vRYnwT}5wDb3YuDQ?>B>-S~Dw-~MKC6U*LaP}lU zu{{HQ{Mrx6-p-QPztmH-exF^K9ka@MoP)J3moDaKA5bzU5?S$t3;UZ4=eJdwR48ex zk`|us8NaZyl&Z0;bPa|=$8SMGCPtL=)E3wJF%o3@{{5(`mu2fc*RLl&6_BqCIaP!` zIV~>{@JabgYRW%cC$sd8N=yU5TkFg3E4VD&^D8SWU&5tou#~!=?63O+u@oOF@j927 z&o6BBMCNhM5QRUP8HkTYqL*zW@E}>)X|Wp2A`?9=qaZ})pUN$@hEGy2-j{BqX~4|b zGXI)tBjHN=ed-_I;2R4^4FJsYLEHi!9Beh!I&lqIbm6FUK`TfcH;xxZ5)2x2#U%BB zVA&Xt0^>e%87M2`9-&%Wf&317R+_#3TwQ$o_7}@TBgbF!OS9Q7Yrg*-QP`a@s7BRi zd6<7zT2erq9mtr>C4`O-D>=nP^z_gKM`!>2FpFsc=V{4^Rmrh6>*%OAgvfFt>C_+y z<(~Y-9{aCD{V#j|)%wx9`F4?Ms?Ea~_8kA^*<}DM;G{axl}jOTb2M$|^p%5t@!YaI zPP9&II_zYdZJH23h*eRMSJ0x^ClD#tUF1}&^!HbMZGAK(j;DSKurgP$qp9{H6FyAD zVYFl(N(kg2AwK;YgQtH875)0iQBW`!HSOnnMI)+pJvXiKOj@Y%*(rtf{rQ~#5_a7S zZV}u#29~29)@7FHPH&1CM(t!OMQFn1#y9>NhdwcjRfmJ_qmvN=O7UA#?kcsex~fxz zy*Y3xDbTpE-pvlJENPXP&@X z!|9t|H(bOV$8w82C$9$$-ZYzpYp5qAZF=2nUYS!~{B+L*I{!}n3We2ygW^{tEbw-` zZw!Tite%}K5CsoBM<@Dc8C|!FN&5Jdb zx>10!qrK3?uJwigm1;n`)VU9-q1T}qW#Q28>zg%X>Ccxe|DOwCxQfe|8U&XsQp!?J z+qVYhga&3|CyNi3#MiW;O17rH(Uyy&ngm`d=E4SMIZpa0RQ&c<+i{W2jZ6IJvZf*C zo$!j-FX64GD$kR64ieBWv9r(5$;ETtLlX#QyT+2E#j9MG^70+@SbLz40{~dJvbxkq z{Z4%Tyl+uz2zZLxPY3nHc+{5H?JKnV`}^%&(AVi-%t$G}pD^iTG+md;x0x4D_UaLU z$7Pwt4tCmf0*kC)T5E`71_0n&8D>|n6zEOgho@H$^GzpIS-K0fh|IqGW&y$VPOTCk z3g+PvA=Mh5uTbs#HLP}@&;%_!XlRT_ANjKMn>ZdXq=TV$c_p&00plM}=a(~RBc6F zt)S4EZL;5e>IfjZ5O?zjjsGvp|C3bOr4sb6{>9kx>#6O>k6tVzX=!A2o~|FAN|1s+ zv$Zw5?CchT{XoORcqEpckq3+NL(=Q+oTJC>&QNkdW=ccUxoJYYb@1)Du_QZCoctrZ*A0(-LoVV;g&qW{bLe)zi=Mu^$sG&n@LSzixJokah{k zp@B{6+$P_^cha+C;#6}HK)azS;0^WUZ|6a1H`0$Zc1fe@B8Cm4*gh*?*g_oYgzNEvFKm&;zDQ8vM*ZU`i z)-2O;a>|Se1OO()|Ek=O;pm9(RHL*tWgWzngx5Q|yA>>Y+9y|F+O7AWo$^+c81i>; z$1}DJ8Sr9XJkR>T57`pwDrp<<>%w%&j(F;GSLx42;7Yr0Xl=-%!suV>q<($+!3;HR zi-`yj!R!i=G&$i3#cEI?r{r%6| zrGi{R%-dgM8S;Q%@BDy>_N>gZh#r#(AW8l)3rH|0NB%-4#&S^j29wpp8lU*S6XT`M{KZ!f?q$AE=*S@hgtgtrNQ zpVpG+ZsYwn%J+@3w6S_RF4kY>7?)BgcT*jsUQ20rvsXK4tq*^Hb@^`R6uStA z7)F-WWWP+YY1(}%A`k?3%1D_N@+?3JdN;kopI&do`tgI&!=o2anr(Q)yy0mGg8}!0 z7Wm#%H*IGet=xa#$n*Ea`eQQu-~WG)E9lm*LKGJ(L;!Ru6HtV}m2GVXJGkZnE{m-3 zxdyC-$4-PdVP6%q3HZ3TePOf6U3Z?6N97a|lM&HGR}zRR_B^SQL_m1Jy~P_uaSiY| z!YHl7L=E1IW~K>S+8a0eB^|rLwsCeygDwQr%S6ESWa`hfYFIj z+Dfetu+S0@qi+x+=E(bMn0+66AC|E2EIg}d;hHY=VJRqib!2vG#xHLPUzj)j6-!d7 z7YL%&Gg)yP=0{eQzMG@!TI%;6Xcz`>WCmItr`>g4h=ey`r*opXoq$&e_7l7;kFSF| zc}goy>)8*KrPKQQwJ&C7q9>cQ{bm@=SG1V!#^#<#RqDtVEWgl*%j|~YEsXg<0W#_+ z3433w4Y9O9c%jAMn}dQmPhPtVhV3#n?ss&H*Bg6m6Bd6Ozl3&Gd$nuV<3mD!w@I&T z^EvzQF}huh&`|~ZhMw#&;a18{%^Hk5MK4gtE0iIPPnD3^7DT4FnrkkeV)wFc0&fS&nwXXp!<9$n2$2k@~09s z_hbM1xjh9NcwkCZdUEo;V*a$fM{8grza}+piFpc(M9zr{ph1EhYcWQX~w|JNFTVXF^nPaC*qGd_ z2#;{oG%jvOhG|8QqFr|$skhU6;zAlAyo2s$_f1ThH8dq+MAO`eRF|*{p5~S|Anqd=W%XAycxtX_)y3QRYU~%ny-L#8d*ljfC z^;~H^i}dnJf@0S<`-aBLHKTV z$5Gmo!u;}b;?td9=C2xZnwN1OIZP1H`<>9sq9%pUF7BmSMonF}C-!O)FU`&BmxNpR zM+uil+>U(jcad*U=QE6{3VCbJ*C$l2Mq=@Hyjr#{?#0GhR@wGK-}1kuOYA7IFMtb-;VS@i*vY)N#UJmEGjZ#=b5(`t8(a6d(Ic*$xxoxCQw^L)Ai-zXtys&aZ& zII1{$2=V@kdvrV9ZG(5C@&WE@QNdQwfaJ@cdi+xzl}~s2CZx^c7m@CN!(2!Y>YhDT zus_rI?wL-7iv9g3Pkz#Wcltl5GBK0M2FZnjm$v_RjA)?Qpgcso>{LMN0|2T0RHqPe zb9F8+jC96`D%UA(r3P`=lW9}7G!(ZQtR~^E>6M_GR1W3`?<_WAjh^H2A6=28zuR_i zET-alTHoHNFnL>zJ?fyw?QwkgaaKfJ*71B|3I!|2EOo}bPIxB{Cczl+j>xq_=Zcu6 zLabggTmJ$6?k`|9v@gJAo?SyFrG)8bIM%4(^OjNvZMks`qR6cd9bLwb{s4izU%0C8 zd)}&A5l*r^Phuvolv`LWrEqv9LOYcZIek8jip_#+s_akdOenia|JkDGh`u}n-QYIg z=<5^pCec;5mAzykpmn%fjma##s`9*jCB3a+I!D$tzN3nt0DIS9^tH0#ahrmT+uhsX zoinx1w;!C`kl82HVqMmkI?{uMopE8_`K1v#v%UJ~piIN0#Tx4G-)+SW64^h0__E^rlNj;aX^Kx3Hd+Gnkip|yp7eL_ zUnL|=TJBqe-X{`mD7Bp|V#F?9j<+@xAU=4{&4wvy4i`?{HS)Rkmd_L9yl(UT^WVFw zeQVZlk;sIXyXFkBk{^aW!@pHevYO6XKlIjj6F^2GehFMY-GNfhL*_#cu#K;#0uwP`T4zt#0xMznkgT!y@Z97lPn;h#=iZI5%3&m|qvp-;+2cYa=pUhv( zwZ`D@$i2=Sgmoxf;mH=wF3)9T+Hh4!KEt87-g?vI+5Fmzo@`Ch({EX5yv0v8xly+cI3?axZvh=Rjnl-AEA02Rvnc8spc`V>pS`H{xQ+MI?gbJ zI`cb@91ZcyB}z81$!uwS=b@j8SC7Wf#QYf1fti?55c0Q|0`n~I_s%QYblUtQ*>~r$ z!4!5wAGZ0cdeK;hny_eEXGrb&c6Pq=_8$g|nqi_loI;|P#wic?@J^Kf#(efUWn~Pp z;!NnQuJ$UlacM-o1An%>?fktkdcDIQ_-iq3*Tn@KoNY@i-6{hY+#a@WkC@!-MVD}b zUk%n?Yg0~nx(VnA1Xk%6?Cnnqe|W6^I6V=|RX@{)M?&mk+tV4vDFRP8mTf?MIK4W)EW#`A86M+(PQ2$0~G3muq z{SleGaxIh6UoXvKu zQ8g|-_l6Zp|NB7?6tF<4=ybtYMSpZlE&-P@z-{`blG=7!=lL4{Jz(Q%!|yH0^;vvA z5V`6+t3EY;zu}YX-thVUqb&*QrLUOGOsu}>%(4MFQI-9XlN-G;K+-yKI&`z-@q@Zj zkBiSKp58<5@?;|a={{=hIXIo{$;Ch$!N04kFh9H-OSRoLI`EvHpPF)jn-Fa!Q%#Q?Na_ zm2*EWQa0dE;F0%7l_$JUuR?_tFO-Dw5ta`I<|Bkg2rIZ}}G*n<8$A z>4mBVFIJLC!nukrVMzi(vGzxs4`nK&~$mxMZ} zy9R>LvVF6HQOW9)Gzy>qh;azn+gO2mZ*t$~$hfV4$xqrZn${U+G2r(@bCFs9?pJgq z%(gSjis9Z0I|1o22?7%9RX;vL$Ys#X_8-@Q=a$6c@{fWbgTVLog$_zKHI^j~$`Cf0 znXhIwqqC5q#^1BrH4fowB{PjTpJC~!gHUASyh&&5U`a6CO#_e&p8X`SKS|%+HCy@NBi1(`e~@=p z^3S~JpDaYVm@MO1TVEs=C)^siFI4i*(w}%+4-v2Wt7|sfSJ#b)^s1_Ia&E>Vjl^Qv zxLN(H>#J=_h*py#pUp)C3nkRD$73HOl!uHRR?lu@e;{w`Yad8F&wd>8o-mnNy>RB8 z^8I$QFb|QJpu{pGx(>>EwG)V|0szPo6zFEN)W++3WZ|iFiyfIF^lu;nSlp*e9|YZ? zXYQ_>yX;tF@MK1erzM6M^5f07Be0=H{+n~~!+-*GaCw?Op18m(-I_!ynJp}}OdZq` zE~-#(FT1EX9R_0QtT9`!9Ws?Y@C4yVtvvZw38)gIdHd%d<>l4!)N`v1H}MdjP%&Y!zE>z1nQ1N*$@~UDADf#RM%i_#CdB+DyY<&rW!@QK#)0|1?_v z304EHusF+3;E^Uva3G^6+p0~K{m3$$_`L$2Wfptcy6c?p-j(Ob|x3=GXFQpU+ zK=wZR%{|Z0a99-@P|D9cUb~#-WCZKNOjzaBS=jI`7$8tBEx$I%US0H}GtboQELqAcM-Anlk#?#YPi=3Gag1i}U`t5h; zErn_yBqoZB6fmD#{*787Ml=Q=N4k0n*+Zm;)Lz#7Pg)yW@4T3JIF|XVPqb>Kr+f%Q zz+D%x-}BgYA?XCQgxDh3`pWFh;m0V{{mz(qp@yWa{-1XjbJ0P{ziV7?{hRnX3SA-qDGpQ;iNx?Ei{4cuS+pqhouMSyJSAPw`GWH=b;n%Z;)$qM#n#Rq!;kFfR|Al_U?)A0;7W$xPkG79lBAX+mVeGK@2prcgOJpK zESw zg{xd|Q562v?saJ&{?fgFt*wECawV@s9=Q<8q59WVWl?p)87{%0jK$)R>a*`m0Lw zGr?Hgt3Jqc$VQ^RXApy-g}Mhx zf#48WvT%uYu*ZRVlqL9PAFXz+RYgEhc@9)hqEpX`; z|Ll%`ZZysM>a`*h2|z*mUY4_YIbO4bnp!QBK8m*lcJWqVHg%Wr;pR=p&NIL5>dEOQ z(!R-=!wvD<%!X+Eh*9R+G~XS`V{pe!x6Ax4$#Wy46 zmkrTUPMO{iC3c>kYQWcfFU^B?#e7sKbw znD`_|?b(*A`3aA!x=909k(5{X1%d`3CWh(6z`*;jnh)VgwoL5uSyTO!U6t95;$h5$ zrKVaQQ@)c#{BE9yFsOees!=!bJj>DMuxH8I<^#0%= z*sV7i56~x2sUXn|3{FIISjWVPVcT}-c|ej*)edSGH3YgAx(aYx4UanS()7`5PbNbg zhXe_W0KnmMl5PVd5OeuB;Ma-kjd2Ti3kQ@V-Oe}rbrSn5w`*izGXlHt-4Hdr9=2CS zy52n21iR{nIno2rQxo-QD4?(yT6QQ)U^$)BANCX}LfzAVZ3Be_836>q0J4H6su)xM z%eJj@_RY2vbX+YJSnz5t57~pDAtj$yIHbW%BxSCs9OGHV!lbIOzai4!w`t;1ek!i_ zJxy)s(&!Q|hM0x|b~9qCHt44@-MaM2HcvYpT`w*z;MKYnEiQ-4vjy!(N0GDnuf=$- z2=xHT-q}<78v=9&mxrcZTDFZz-Lt@(^jR(sO!yqify7!V0cT#_^Piq2&r2ypzu75w z2AjQQW}(_Umoz{G@!$;L${^#@;!T;H=|zkd<<_tF)@eAu7+EZ_-$T;Z`~$9fxNUU#Pr#6uBSj3!ER z90(lE9b9pUsFfEMyXi`V9X>ti)|Qcwxo-$f_@Q%O-ZT0|Zjlo_Jj73rBJXg;R2PIi z?B7`I>pr<&;SI>H@Yw0APn(^Dy4uT)w)J@>C~;o4Tpx+Fu1rKJZf|CbYI8eY>LL}N zm!-DNqY?7e4yjvIuO}SvcvTSaGjtEH)rS#0ttHp?_R&6qRrX%s+WSk(4r+IDM~iP1 zc=wK$wbSb!Ph&Z@{Z5^AGOyjSaC3@jNw)pfa9hp8Z@#Y_f$^tg+nIxQ9hRLEr)Z|u z3el!~*RjA44;yWaWlni#zUtUJ-G}owg+tRFWGCb}sfYK6>6=qa3kfXH-6n z6B*KS)Fds0w<^Q`#_0ni{ydB%+-sj+K?#ypeKXE+!~_&`YSk)K9Qff9xo`B|D((0f zvD%4mx@5S1eu zqs2VeJ_3vg^D0pqS~YR$)ws@i?p0#m9%3`<|5$XExO%C5b8O*IB1_4KC}bZpvZfyB zG%`X)K8`|yP&E%ne9=o9#w`5<&Grg?%LLp3BnVt6_z&Ea`vDoFsH=1GxcMH@2%+IB z1VaLvi2b1v%;3+4E4&8+!>zyA)G5)QsH?MB0E@>CH?bW5mg@cKp*vM1yCf=&4k-fnoen_Om)ovJq#>;q)E#5%BM*-M+ID&0cyn%uI-jH!U*|#YFe0MQ5I=Ko#;6BXO~Jt+ zp`oFcp&_5}j$W*#&U~rWKlp1(Y#HqxQi$xf0cOJ0hr-r%qU1Il@Ny#wFZsElkJ{!q zFYC`Xuc1t%(sfD93~hk-6lQ#8Vi*Fh&fizm)kTXNz;&@KN%V+plEgoWn)Utuc^HcX zxi(c)-9PC45nf@kL&fr-M66NS~ zKDVWq{mOe1?@|o%BMX<(_L%Zu_Xwr7Xo%}omte`0%iR%)&5|t|v&^dWk8m*97P%r7 zt2$0p(4B>Z9shu=2v>>$TKcMPEB<~k8UNh)Jh`dv=AHQ9tm%<@+d7q0kaQdq{Rk9f z){eSiQlV1++rEe52?rc(>e3tK3|*M-d0;t3 z7$$Esc)vo1Kpb=(w|(>C6lZCARQH6!JWzr1hq$zx)9LYH zFRHzaEJ?b5?-IxKPC=~lwZN+iGIXK`>wp4d1B?7^ zDh(elL8}a09U#-Y&HHq}2T71vhx-%`1Hm4O8-$vs6;oI2(T+g^#|fnm4W|P$ei^uY zZbp$H{XjW!Ia5|WMqVnLb5VHRtB%jIW1>ep1i|IDtfmhOsO{Z5{WBpSMz}8$1^%W9 zgbJm~=89%aQ@{3A(hkkKsDWJe7*VUO==#w4$I6{8+xca+^7ghpNVQM-Y~4Q-EuhTRr2T~Z6xUEBsv(lyr} zu305v3MKbKPq*_+C$FS+Lkj(v$&~6-`2}~uyc(0uIo6+xGBCW1$F!z53rJV**r z-Q`|bp4A*jx=JM4_%()B(*p~Ha-M1tAGtEJMQrNb(E?^n95CD7t)1R#^k&TLADlml z??n2<35&tU<5#7F7-guch+?N?T*LP{3~eLQ(@{xdb5nvJ?FMRMM5P5?d^@ZpZ;LH5 z@?;_tZ0#L1?rN`i8R|Y>xVFu9bgjh6$0WS2U7z=f?r{ZVkX%tzYARyp`(gg=1#md* zBvj>dy*b>~MOK|GWii7h`-uzC(iU@%A$gP}=jG+K#75AGONOg>Jf?qoO$WTazO}pK z?5Md&@wObfLp*RytwBn?btEN!n(YaxC}*(VgC5x*^=vMJT>ekX&!>`lX~l0vR7&;8d%qDW2I~8?lxlM8k!o z;G(>)ae*o;tn^`5yixq}_YbZtK6*N0d)k^!qAR?U(W+N4#It|i&XNVYUgG&4Y_&N|Pjs2f9C0Qx;i&CC4>%rjY-;7U0Qmzao zE1uNV;=Ak^rf97!2p<`qkD!i$fg+giPh7h1vwArA#f9O9d__q~Bkn*qzM77_@H{+# z;F~f%PSbfstWx0{8u)Zd&CN<&QNwCZxpPj}<(Qkfj>~rw9;QQz1QSH6LHEl})ct{s7PZ)hwEwCTZ&YofHpV{^Qo#OM|#zQ1lAoywc*=eNT@ z!wPSRDgb3v?@qYQj+F%-_Y&S+r_C~~hFaaLl;_l;PP1qoPdz6ttwcEJE+(>4%{5sC zd~6G^b2nNJE1&eaP#eMPUUay>J)<}`XU9@Iyda)3Fo1aa>sN{-#2GKMdcoL|Hi6u*3&dV zmIjaf(c$om;Y~-DYYn*ZtoSXEqVD?HO5hFK?p4?Y#hnJXgHE0{M4rIap{e?=p7zoF z{FEW7&Z*RIjotY{~1oN$g+o!gmaT6#UjH&@j-XKGELrr|+zLLkul} z>-M;Fnmj0ez}wjyd6-TWjA@Mc#rw*@k)8a9tAYc;xAxb&+MWiyt18>g0#JsY{`Z*W zuXfS0p(Q}rI>y2aeu}}00p~7>6d=OWd8x`YtkAy6cq2dx$aVM4Rb&~(WcIj{x>>kn zV_vom&EH6j24xIiC85>FE|@}s0Rg<4vqSch!N_96GUHj<>%S=pQz;u8CIf+JwVVm# zpy1$xChR`}B%jWuaL|xj^vffYVb}RQVX}M~nVG48LDh9RlsZ4Qx$p6m0B$#dhKH8O z!dP~0xoha>M-nzZHh#9J)^44n-DT$Xnez$5RJo*h#}_SkaZ~Y94Wq5s7`q#C@OQsl zd#7e(5nNh7%i6$|JET)}%G?po5&kWFTM0d zhP!U}?z9Oh?o|!))q0MoXs_IV9^xNV`$DhPQ*;C622$4HUo{b7LH3?)cZMj+!QBe7 zv=i^bWdj0GWuPvfAKo~-2r2@R+RkXs*f@wk0jdaeT9-H*0(c>x7myynSB^dpZw_h% z?7Wph^yAPn33EvV5N$(fw1kR_s@_5lM(m%klOU{J03ogalZP5>CPaG^#TwZfsu4i< z77=V>o~e;4GEAY-zu!+juG%xxZ`{I@?PD=QWyIe_O@W>k6cr2+y~$$b<1nShV|(}? zi~NhXK!YK?}O@H;bc-RhbEb1LJ@oa8& z2u8-^q~2IlF)(OPW(ynV=1CP_L}wWKiD*kBeEuFHF5BATk> zl}S+&etSkuip^6{`>)&K5|rFnZFM(4F>w*2Uqk)(H+~V(l(KU(yv|xo{Xwa~AKp{-e`zjX*X#DIjJ==-jON>$6JHL5JMIj5a|FF=%HbX2*% zadANQTv3CXLk3zhG%93r~YN2*Fc5Sd{&gv;~KwZ2Fs6 z`3r&^_}&Jeudi!EST#%CZWY!|SkAM(&$bvztsNX^)h}_%*>wf)40C3o1WnwF6)A~x zI`IH+2Iu@biXdatbC<&&*XDN#=EsVbH!? zZF=g$tF=% zUL7nU>kdp}fyaky-cJouNwfxuIYHn`Os-Ho9wQUvyM-YhB_sVI1=pI)t>T*pF1}j) zuEMlgpF`i1v}Q7fx+-#pMc_R92eDKVQwROi_<(@tXq>o%X{-8;uvKl@B_oU1t;@4W z@rP{T6OknZhrVN?Wl=>>6 zU)60!nMhYJK>kYVi_1DHCXdVd9z$5qKC>KUd|Dy*y23`H((q_Os6fV!b0aqru1PI{ zj!NR^_Xf$%3#Tf^I5;#SJb;|uP~}pyJL{|6=+3I@u@p56<1<&gq056IFtUUTj$fxC z);3W}v@B3x&ZX*FYmkx` zP+b)KG?;|L)ME5+f#n?BTX2GwzzQ$Jws6ul;M&o3#-xh$p4Q{bIMZxYBAvHx!$lZ zt`PC`oK*1m7=G+#b7gt?<>?NT20P!HWz1Zd#3ke09;|U-vM55 zZAS?)mZ8#Z%gXTD+JP=yA^aR^&~57Cm5FAlTzPO?4Oh!L;t+iS9rw+_{8GvW@Dpha z{B?rUF>%-EVz;Nk$ELywE7nYYykP`B8Ju#LlfLD3&&G?qqF0k*0cVd=SyrL8fKpwN zx1Qr@;p*{vy(?FPkE+Zw8G?rX-fchf(OoI`KE-UDU{|R466soe{`;tI4&-oHhn) zFczuiBLpygqu9*1jk23|nBWL#F5!9Jf9|Pw>h(M!w%aoFQTl*{VnMakW-t+chY}=h z`E#$b`U-xL(9(uF2Ks8&q#YXRnpgi6h8gy2(;a(Br(3aiPUbr9o0RAnfqRmZ?H?fn;3cS+c%8l?sG4wDo4zKoi z-iW5U9292Ht#%N*TM#E^R3IT7Pmd_Q(=9eVek<8slfR_g=ukc`eAE(o=C0fTS+#SG zZ*-b??53L^Dqq>6a>o|$ev~t9g#wVn44}2qDcmCcVbhU)YgOn+zl;sb&&;E{m`2X# zUR{>I8pUsWcFdYme;M8-xL7o@J5FqfHTYa+kt(3pvQbq~*BtNhqFto+>}=1F$K1+UZb~PenNz$e9&LV+ z?+Ez_4S#pDXFecLQ&rlR#;M#7c&9qgfrn&x4QABvHSX4;vL&FuW>{5}${ zXR_MT_xJC0Gg_J+!!?&awrF|j=?fTqTtBk2lUDb*vusP^fmQZ686*?Cd8H(lXC;2% z)(r6mElxOz@Z6EdBUufF1gcqPUSxbTGvrMg0MNjP$2Rl*tVW`AfRmQf60Lr*JsW6) zsOoUe!~DmIBB%==qyL+vV}Lg^GlP(g7ZTD$e%Wt&*c9knS!o^Yn+zfIi`e;WS*r5E zw)8{`5?X%`?bo8un$y{A6Jl|nv_k%@yPHzx8ynq7TnR+(k7)V?%q5fl5P0Qs0o9Eo#$BXN2X(!0coW& z^`>h}=^&j9tnvS0>#f7u`j+qE1PZih1=qF|cbDKTUfdl@ad)@2KyjDi#VxoLx8hE4 zcXtcU@3i-R?!E8#dGikmPo9%xpPAV+v)8Nze9iS71^+39^Tot}JO01pJn+j0HV!v> zU^J&_znt6@o2vC9(uV4NvaIF*1$SH%af7acRbTd1o0bffks0X=L%u4sn68mPhs6;eM z@B@uPUeMH^dzOjqSfHaUS)#r zZ@+arb$#*Z z{t0;I-~QD*p9qhW*fyTbE}5B|Q@d7ADhFIPrRPrveduBVO+-Sc?8QvK(K%sgdA*7f8)WVqDE6#dtPt)97{WarpPt((jHVq%F0p{s4DN@b&1N=MLClb< zAAG_At0;)vyQrTP_zOx+^HFgn5ykmaEd0H5^zc>aWjROYA_!BqER|9)6?>EYs!f06*V9nlXTQUraP!{uh1y>4ZaIsF3z z0U4(fjmoZth5KGFZEbDmgIOFhvZYMX5L6-pc2jvz&%3c4>97zIh%=w-@$z6Ym&@V& z&e|H{Yi@4t=FkGAJcMI7oQtumt4p`uac4@;2M{-buC4s@nSXxs_h;zn^cl20bWnIP zEv>wtjgggxF`1Q}aN38IxEMmOJl~@iBkhBk&9^dtZ12-c72khkuungoot~baok4sa z8k?HD0BMyDWWp>m2eNA7f)<0kJJX!4`mo6kzpV=?I*1k@)k;r>h2E#F3nB z*4E93*MzBLV?K~HdT@#+<282S^l1e&${djSajt}!96X` z$D0wwd3ZS_T3p~+E=$En;ZqS5{hX8(i;MN%qFyflzV*zhsx}};qhMY(_f+t*-7lQO zQksDTkLx;aK#&(_%4O-Vk)#E`{I9jV076ln!xl0M3fhw|`T6<&Xn33J>*xD34i~!< zXIsN$5a;5&yse2MH7@IUZjVb#AD>6yZNmb$A8F%MUG>c>J>ZA@?&tX;KS9q42(CXP zZ3BQxRmzyouCB}7iK6!K!6Xjp(y0VS=e^0n*;)7N(~V><8)FS;fR6Uz;o!tB?ff}j2W=tcrZ=+z5 zehUb&UTVHSTxgJ#luU)Vkn+1%?I427==)b}0izQSCpI%Qth8Bb?jE}BrAiMafdKlF zW$n7v)(h^y61Ci4YV8pIk!0-c?WHa_2jrm*4H3=(ud!KdtaI8^m6IDDyGKMsG#N-d zS&I;yot`G-td;$a!e_mMGUdU?#^QS_5dDs4K@$pI6^<*x)$&1ahX@~l^3 z=L4NjFOTgZ4BAgFNF`5}H37 zSZQ%u!a_ZcR__z(-+RH&HjIy!V;#9lnmvSjr28#SQh02#OZJ-ga{C&|oc}CKb}kFD zhOBXOQAD2JYc4hI;#zwF@@(WEo7$Dwn}h-qA3!KHG%_+WGwWUBcb79CP>ApS3xwO4wzvST-1DcQVZ^+S+aXNN$LHtsKmZTMq2qNqq)+JIgG~RmdT?22>Ouy* z5s~gLL$|2f@^Ep-X)fG~9_Q@vU2-2^;^{+55KI8cc7IuqXwn-i!)0m3CM<0{LpW=H zKXk#T7~)|r2*M0xvmT>ry1}fjs9=o7zH7co_8}hvKcC^<=XdUy9{(NnQ?X#(jLLi> zckNei`qdQ&Lp1h$c!Ng$Lux@fq=8fY_ZB)tmEsFd!ASK;hBI( z_jC;4Lze3^zx!szG?iUh_OH6Cmn=94LHdabYXbNJ#?jfQHUTp!{fEBDHq^(sn&x87HR1IpKe2wyxb(EdKVU@qyiXU!iv;l!gx&>WN3JflN9!^dY3XswAevHCq(Lh2)s_UIOI=6Ip z%ZGQTF4<~mye@k6S;=&C4-H2*U!QHBi!+@Qd3~0GXRhf*aHm6=4zi5`FV3i*blbSE zMOeJ-lknz5_9&IRzqvSr!vWFHfnfmB?wWfh3IcXkC2H)$>GFd7p`oGs`<7~IY7huS z>TmAyoySqz#$}i_eT=9I84>@hY96(6*P#rJ^+B5-xI`ALD2;?= z-o}Petu@T9t+u7}gSfGAHiGFA%lX#E*;}S`98`wI)VgID&r<1~^<+h^|`)aBJXN$ zF+3aYf;QAVYI2^y%~%|5?ak&&_I@k;jBfYTf^@fk;ghD@JMx*eTiK6N5gfn?+Ryc5 zhKKkUZF;j}3d#4^aFyyUmK#;67GYXn@7~!wxCrvo@b04eav_Hj3tq2H(BaM3Uzhqa zVQZ>`jxTJQj~kZMO0|z|uFT|^mY8s%W9A$_);jl9FH{c3`Q5R8a00aA34=|zZb1f# zl1JX8_~U(vpnC)ZWjBr0)v;O(LL{yFppEbIi0=tC-fJYrWQ~0V@>Hu%lMJIG$6S-_ z?efczlLaVRyP+%zy?h99Qee&Q3wO4Cu^OJQV|hzFj?^Jr`^!@_rXyH`S{!rNgKF?t zBSQE#_Oh}vH7+j-yi6cbe+XlTXSeE`sSMm&CaFGUaL7ARyr|ZCzL7>PA=0+Cq#_;G za{JI>G}45!g#qq18nhJqo;jwbO-zUAVBGGAerk3$S{GVK}JF%QsNyfSLW>E zf&?fl099IT?Rl3audVg8IV!z#b{<>c z2C2Or3P^>FL8meRZ-PQac~eWugwTaj($9*UyetN)D)TVoj5X6C)jnglm4iI;Ig0p2 zQr(@}5%q*oQSs4nKQLdPN~zm_O3@QSGelqqNeZW_W{sM&Kkm;SYnrwcJqGMJ^7@N- zn;z8^ux%oam@0FX=p0U0N3ei-sn_GO8_GL7Hyc(y~Jsp)cZl-=X>w3yoJxui(i# zZI43$H|StCr1UJ?fo~)*r6S9t|0th%VYtL!@i;8K6#ijA#bA3^6-E1L zn5N;Ujq_>k^+j&usWXA8A^Fjcy9aV(CKlw58AWZXy*B9Uity*gsv(gqKUt-F)iku5 z-*h%ZPo-n`z9L8PL$c>7^fMyRAhR}Bs~3RL1H>$R z0W$>b7<_fL7JJtptt{Rs1WB?3$2P?(_wvgns|9a_p?y(pAqJOU$;RtG>9KCZXM(swJ!NfsU9mnf zJ$-L~^>W=BX;qssg6ZW99_}XM^N=%bZ8BqoG@d%2_E z;*m)PZOb}MBVF0uiGGik;J!&jU)^o4mEW=9;mDxSx9&P{9j<;nKTdAWLxCc_(xF4y zx2o}R&W3x8<>f=Xx59~kA)5Es18xu{g*m!$;d|q~LqiS%; zrj{Cu1JAG&TQDTOJ;4d=05jiamFF16Nf*Qh+ic2VJw-o+m<|L8$}(x4?pze%4L%;= z_E*r;E^3rc0m@ol3mH>8{s;y;nQa+cXV-Fvxv)Kut@P~po)h0<2kUgy3lo7WR|R?7 zxnGHU&^c#q*2k}fNP2lNyE+{B%m;M3X@Y^c@0I<{4^^k-drridc%TkuP?hF^A%q#- z~<2v;*PKK!eWLK1j>pFs*E19&+fXZr=ObbLhYlkPL-#GJB(fz#j{%%)KkYPDkETyLo`+M z3ib3cNZ)}((Lr$g<=EHTBLjy*)U%4{Ptn|8wK$#EjE(o&+AB1qvA#2PcL{p#mRq@d z|Eft)yM&g>yl}Hc-CnX=FAc|Uc|-q0E0@4Ks~|?x^e|16f!)p(&8giF zmIvcbYl66FF?T$Zp1Ql=E?9whQy-1&43hPHC~aafJ{_rzIVx6%QzIt*>)fF}j@P*@ zg##t+r=oXi`E!j1yGSKP$*RG!!Nn)VKJh0Nv0@UP~3eFjEHJx@h?)a2?W6i4-sjH}Uu(!4(C{}9@>IE3w z;MO7o3*YL@3W8$thU)}QGNp2`QT@?7l#`x$ajmE(JL?Rk@*c0}e)-a`&~xlCEgGzZG!;t zwmD~=K{?-&v4S(~&WhFyPM(<`9Hh1N?YkT=?V^6ELnj@qNQhjy-D|NcrmVMU!M-+c z^jjTHEe853lwjDzQ6Yf|d4l42M9HNEhe0whwzQIPk)AV(V8R|IlMHwH*DS}{2dPhb zMi80Vu`af9yBCL+Eh??wZNM!Tdt)4~(g-e~JOJZvetTO7LK3g;z|h(@l!(iwPvs#w zvKR>ze{?=;!taU7c*nQ0P4WzJdBCrDr$PLZa1ZD5B1^Kpy*(NfF(a6y=e0x_xq>T z;ycTB9*> zjB7Qvj^T9^nvZ0uj#SOka4v0aeoLzZulr;o^njMf`Qyv%hJK_J$!0}|?S*kMK#QA*Zf9m^1rI(nJgUzXSd@;gnM^~t z(VpNkmS9M6=``36HAup|zjmo>YG|oz&hT(gnYUivH*HLW^CM#r`CP95+^xNQ_lm`H zY)0u81Gxh)MAgmh_Sq9$jCn$LuFImv`&6RERF=H=d8DeUoR^rg0?mG)hnebTpmkul zbUo5J@&0w$@ri}84QsRtceQRQJbNKX@rs+0!i3+3(nvBYoI-^3vu9Ca{;(**Ay{&cAxFOfF+Ma0vhOn|I5=0Pv z#|j9+Z}%Z}PUHGIdVU;2qd}V%(@<$-NJX_rz;Nu)G^QbN$S<`t*i$UAXL!rKC?=hO8(RJR{LR|E`K zuAUUT;*R~E3kp?|H{=3vDvIH{J@NvJtmG=Rj-K#%aE&TO(Ew?SNkf1p zsz(CTMn_hVT!*+n6qfZeZ0mCsQXTxl_Bl@+3475iy%x8%kMpVfTjlw6CV`~Vx+{yk zuI4gn8Z*Q6#oZCsYb0dNyF+K{gZ~udPu(yT{+?s~H>PB#nFW*O;F|v%ex0Z?>9Vv$ zlzhc;?=`0oC*^cKwlhJud35WMrKrN^c|IN?pqHswFm^<-AemUGNQ>}wHZ=V%>T6b| z7&IuPsm8sN4ZQmg{we0_(Vz20e-BCEzP9Id*4Ea zM>!v^dwl(S^p!Ctfq-9!ZDqxnYH1=9&47<4xeDYWoS`K@wv%j7cXxj^{_J&De*TDl zenzM*B3#UGP!o6*DNBKzFJ%Y6X!3CL8f8-r`7= z{viqh9UU2Tde=VYK75K$Fx1gGn}n9rOh{FTtwyC_+7?`Fz8}KX43gh*1ZBLaI>27H z?D?g&y=?RB*nW*I@i0v5<}F2Aas1n(cl~oWRVS@0+EV~3cQ&-FAK4d8=xAeiz%aGT z=kn~TLVP>Q%%T@8CDJ8CA%Rf2PZ$$XuQAm{aNNxuv0KwwmBl2=zJfoCwIZtbtyK;eOKu&U5H)wCJK_oTY?uGDWiKNKy~l*y%&+iB6z@0@7)^_A9@FQ%G4RPSdZ?zRY?V__O5>FL_ql*R@2$29^5MY%h&?jhZ0q!cqn*Q zikOz5wlE<504hg;x`);@2B7Coqoe(}S8G)3U2AK+v-Sd!H0~{9d_VFxJ2V;J}* z_e%~eXssS62Ya-*x_>w@6ndI}6@G*dIr$uEn`nF5Y$Et)UiD^$lRfNRt+Ytt*>o@9 z-x_aK;BBWyUC2MwiD*ItjEdc~arHAM_Q0w|Gz{+tgz{itS5W&tpG~4NCP!&y1T<7O z;h?#RA_ZaKkX*(Jl$Y7MRBdhBQ+&c`RbfZ9+H9?mO_#erIUDnGZToq7`;zvdsol^k zhH@d8tqSwh<&{L!d+jK9_Cw*i)1kl9_SpN^~;9`0dkO|YRb-Q=_$mLlG2Kf5Frdim!b ztC3@EOFzJdhEyPYKPyB!raD?3Gg+d4VnmM*=6_oi|GMcHp`&;-j7Kuf+DrGY%^K?J zg%gFjes9(&y{-noVC^0yE5H$kV9kRx*9#Umb~)VTF6&C%0{W=%2m-> za+TwM2Iy%qi8W_h%JCk;ntZ>;zwc(ct2gQ<^(e81^363;>!F5ch18O|kcxEl z;Eh>3W|(f`eoI*-a4@Oj?sU_)z31-ms!gM$I{8)iWW#WjYR;Y`{TFX-f>fDs z(rUZk(>~G)_|tcx`)BtrgybdKwo`S=ryQy9xgz}?>%|8q36xZ#<*#qr4?Aa$wW$eE~a46R>NTNa+S0I z?baPjx5D!5EDY=5K8JJoq)57`v_vgkc#W#X$ntHZ1T8B}i4%JJmWHJssy6e)v_?Qr z^26*Os~DNg81yXMTh52vzqa zrjk9rc3#hp`pS<*8bqHzsR*NQ_HQcWR%zIb=Um^Bq)@3KEE+?j#j)1(DCiBApi7?V za7diFe)0m8)2#Fe#_b96jD{A0l8{^aGr2s8Rpc-ao7@xb5uB@f^REt?wb;#h=~iMm zxQ3NE8=vD8Eo8bhxhwelShqNcsX`--sZlX4As(qZM0t09F#!Y2ve#B7maeiq3WBfx z+_Yo+<>3~fqCd1*Gf8Khvd2@Ru(_HFe@c1Le|PvNGw#82B$=d8{38bhAA;+zP#J#C#5B|Hpv-V=#a@fu`?oZx&ig zZ7k>rH{gSGK1+?KKiV5=-*;ibO)X%$*wE(s7;$(iUwtVwVZt+3OqTTEf>uhJF2|li zX{l#GBwa%zC8+?HTS-310oMN!GVQe6#Yi2^j6MLhn3R<+}<~3di)U499qUw*L zGmwbu(C#W`r^~~!nY6h3Ej@9OINzSnwh5QSJNGDcPB~*iSYtR@5XUovMS(2-8P-H>C8zKI@37*#V1i0g{^dCNng}GWFJp^V#)5!_iqaCfyEE z)h=C8sKuyUG%ei)1Aq5LZ4{+0R$KwnBp;V}xARBeg`pIT__TYWEm|B=$<7@vv?37g zc~?%L4%Og~$DcE)J&xh`@?>PUecN$A3+$n=NzGx_qr;UA8Y8`&(&ld%sy45>&Aafo zWU;N*3sEQ+==4YIS(Ks^ew~N~J!yPqgTmmbOu6{N1^rkeer<$eeIkM&dQTQT_tI}fv z+JvH%)cj24AfSb#t7&;4;*Cb>7+z33{q@8u#c#PBqm0t@z^faq6E~Uob6R{IUKxBU ziFubT1y!wvLKj3$>$lSlshZUxx8xC7 zA)j<}-UkT1&VPeo`k%=5zlPaP2g;ICS8iituc~1-l9%uKjWom4%k$PP6?sPTnQc{V zb(w?Fn0)~^8xOxsenW1J(YB6GS;6{OmaOhym}e`M*(y9(nM?ssX+X?gryb!KD#9_8 z4JggSf%&v&mtpBs^P%(w2hwaLyy4z0S&EuJ$YPw0`>SC-cq(8JuLr~~yY5--E@~=9 zXYE=g4V)2tSZUI$1;DppD%aNp z3S7hKFd750Ey?x;YNb*?Z&JfL`nY$3n-42TL;krcCIU4ntOp&>#3qWqS}aGD_<<4OMM>cF`vwf5}=V08ZOiH}+&|8{mO|KO#PnB<~ow0+-ZI_-ly*+!$4N`YQlMk&-&i0T~~ZMhY7`j-{#t^em7q3(=q9R1{FI$ zc*AH2$0&&~zC&aPnBSJ}k2mv)b|f9K?wuUD?giTLxf#B4!MeK z7+aABsyL*P&Ot>9)tT|3^Dk|xZhAa?Ykh6};$(Iv_z35-@VZ;r>~-6N3D@;-oT?k2 z2=0+Ot*g8Gha>$T16<+3kK-}3<4Kwu*_bi2n=!L8AVqq%w-Vrh#l^1PA2w%y-N2AQ zw5w842dfwBV#n}sQJSvwr&m#&pOP&~)=%Vn=s#L~tauS2y1-wvd{{1avzyLewM|dh z=-cCYdNt4;G0Z;3Vt8E-PvH853G!8lb-wS1P7#^6h18dg?IE;6s~=Cg^%y;ku{Avn zC-JyIsXjRtNa`CAONsZt+Bg*Qr_UF1WoE_n`=X5pBmo5+B8&IJr?tPPES66b-Fwp~grL(i2+lx>sY{GAT2+6;vNzU?~^e25F zcc&w7ZvFvp)QpxCLE&4__NXfzl#7cP`7Icf&e3$K z5(thA!2+oQ0~mD4bWxuR(*rN-M?`F@dV5N#OEuQ3K8wEFeOPPs?*2b({GYwZ6vH%J zNWYzS*i(@nX;7cV6iJ3_B-CWhlBPct*Iq|YGbs=E^@eLpi%B{>rsLVr+6|HM{Da%U z0(gnxxF$b%wsE)rfCd#5N$3`#aon5zkZOg#J+}W{D-by9JXC=4skI%vJYG5FLUF3m z53Ue#A$>mOLg=arCTrnx3T5?6}yd}`$GeX(%*7cDNM>%jNKPXd%RP64Va>jei6 zxty56qy>uoit|#v&1tr@)N))Lif!hd7#=_Gk{aB`zJY;63vsw*W4vOCQ!mp~LcUU? z+NtR!UfoHHSQN7N$cO?B$k9ttF@oq=!% zF~P02$vifT=YH0V#vc{D@2c0d;!|Y{O>x%f4syejuALJwNzkbas_4nN>>uNrZbPU? zmNl5d(_u|F$DA6mjZZ+W!5Vc23)asv^ndY&-|Fef4LhsGBWo*f2ug4_tejp1nh%!S zj-MRmxvLJdoR(6MgMDf0Y#rOaxQbT8)bDX4i1{aHy1tMz(SM(Jae6G(*x;Qh&U}(f zt{R4MUdgyn9j@#523Eagqrcn_uvqDh zn{ORW_TIAsfuw#tIg~(q0;qWtl&>j!TDeec@HMJYWn0!ObehW}6X3ZC+Vv5=ztOa5 zkRPY@`0)C_+K@GW= z)YM2-Zlbk@h|^iyDXJoA&(1DV>K}}*EVk*DMtJ%3jOpoEAM!d@O|ADHecY?Hnvjud zJ7isHI6!NN6-`}mqD-zJ;xijJX@Krz*bHxhSoH8`q+SusaZ7^f7=__l1-gt10{SmY%{t$k(G~<_o$tq(8f`I zu|3t&g675i3R{lQ&b9My&$7g1E=`V*bUzF>;nnZPj~oHV`Jmh2%*9ii zVl&5N3VfRbf>#M?l6kM)s%{T9sD7~~^y&>Boqb-S5?@OMt>=`RGgL%Zz#n#xY*6x7 znGPSxMF;koJ@>1YCU;vd+L&sq`|PXh`>8K_x|m4u?V+A{-jJ%FhotEbO_V*2@9xHD z911$Mg-y~~3~xx9JPdI>XV-lJ$~ts$ps$Hi$E>-><0z{?=N*2)|oLTQqlC3!D~ z(|p_q(^#AomqJ>;!Io!QDh(ILohi5??(DL^AF+52RE1f{6bh)5Y5)BCzHc7S>h@$o z7U@kM)?G~2OGlE2%`apO$A<&3ItCd_)y^|4?Zy&8Qap|_E1$^JN9^$XM9@$^ zxbY+TN>2B^-$CFhtsTC#nG?UWWql_KmhT&9f)f3M1t`8tO2osMyFWN54c=aZzfX@P za=IX{aNYcLn0DknCi-4)^YJVX=H8R_SbtgeeTUO|Tv^(gL=Jz0Sm3_Hi?P3cZ2)Hx z!-Ai;fM6YE334qSH1Ip)?06A?Uw)%1`i%9Kb5Q1!!}QnYw6TW$!_wrKcc9M| zjpOwTbQcY9r))H)P9sixG(;+XM&Z7TgZVIzH|j1;@BDkZF|4p2h2aA{y|1vLxTo8cy;v?h83_q0h|U=Q7?Q` zQc?i;9e_jxPRQaC_TuR>0)Q9G%gcRzQWsj7fv=uFw>_M%i=VbJGuv5Nc|lA(FCu7L zHxB?HNCmwc3A_T)30?vxa?0bTxnK{E+kt_B&54))Q!D>*$#0Li#H1mgh-O?e3oBqT z{c)ajT3X_2YATM}0pE~8gqGwGs;uCu?zcFh0O}%F-p*&K#Ak@^0s+yu!un|O*kw`v z_61~Kn|IYX`qxylQiFpcy!m#c*%UMebD@A-%A{O6Z=n^fnZ@!9B=Pq&E)Qr)b1uds zZX_r+FWedm4VJyF>@8Y4EJAbrVuO%%5=mYpVH`Jj-8}1KJ=8-lErct(cN9i8oyl%)sLW+>+bJ}z{oao< zJdk^gmK2V~Cd7QI@>MXfnEk~3YQg<1_=e%GRqk2>>?Cf0)AOc!Xy4(kM6jxf(`@yC4JS7vYm<@%Hwv9t>ql9+gq$DP9mzP;Z^N7iLhqAHS>D1;r7_q@IZRh+S=Y)DkP$d@@y> z7?U%`+CRCCI+hybl;#m27k4N6Y&mz`G0TS&n;^I0?d_J-4GS}riblD$eEvjLNpOCBM_RSA42+?m(h#&*>tyQ%35J`&*H^UF&`}8>& z;-@Zz@Eg3KX&EPIi_wGlP6#LR*}he3Ki6$`d$ z!55T6W<)Tk=a}Rx4ZsNk6DB?bscWr8DlbqXMQXxC=j9I)geWsWeZC#M5p^%WgC|>< z^<+@qf?i~RKC~L@2UstT5IRwT36H_n;0<2{>O#rXMG-+nX3$&n+-5Z&ACCQI)ntyN zsLfwvWxCKR$#v6hbpQW(HX(Yju-~APF(qXVP8{~LO2d~G{Ra9bM)C|o*%-vL$-Sp+ zXS#Lcc^f(Y_8GrEKKU*!mH1KfU5RgVGm9_gLgN6=Xf307p_cu-7bZtWqn!Lr0O!N< z&dzUYPwbbuldty@bWp=RZ@v~)OqD&{_`{jf@ha=+ITJcJ3A+M2r9!pqU96C@C|2!p zrs}nPon$7Zqza1G2pkQdH$RLW+)*Vv)iFJG$2_mhZzQp&QEPh3nJnvV zOBFbg)6jsq$`W=sCWUh39bT9n;*(k^!yz;J*1+JEnCbJc!MAfhnsu(Q!mURycS>QZ z^!Ok;WudxXA~aBHTrf2gUPsTgZ$O2EkD8a{K=qoNa&=o9*Wq^!lKI9fO_H}2r+Zsn zy^VIPVqHoebH#k``21N9#`>FSq?!T>u={CB+_v7_>=T-ZWE!d=>!=rb=l;;fWPJq_ zWv*l%ZKI9rwsny;u-~quSdJ{^Vi2{`cLN(z6lBHlL~>)cDaZ!_YquMKk2I#-OM~!M-F|$3&zgq zFMUkfnrO@n1L18WUV#I_7i`p0jy0Dqt>}peiGpezFRpMS!6qQw=TGFrsPEE*pK>Fl zgKZGG{FeOApj&hxB9PKDul$pzzK6aZGI7Q>+xSi%AWmE2{3q@pTC5A%s-*8qIlxN^93}^oB?2ZR!=8ugjYH7{o=Tj$@G)t@>9UZN%Mkgk!)5$nHI7mxK z^z`C>m^m>4iOFK>NAgA%riqN4rLB7dCJ#`^krwS`)6 zCNSFr%nIZk_IxV8yT1p}T4-o!ffz*2cUN}efMhWd5o3adzW(ck{`c?S=P1%dbe~;b zZk}DgjryLMKRrENPZO`BqXPjj@KF-qM4ZI1Y`Ieeyl(J`h>D1SJw1X<@L{^#kT%YI zxz!J3J^!mspPBMwqV3()No#9sY%Dek3W`9wo`ny?LW4_fWu+lAJ`WGiix)50Wgy(# zzZx3YetZo2@RE?QP@ekA)BEw^-i7_r99(`BNY|2rFQJZ3Bjj+`Gd`!a_p> zFiDb9Q%Q)3_I7s6fE8G=t*ERd<8#fBg+fc>dOuTAQm(A5nAcX$?2AW`*45WLZ;ya| zm1(WqGA;gZMS9E>_$3%Qo5DYu&@4T>_`|2YjJ)AMb&>bOvp-21-U;N1OiadelBgU! zswh%WOBkWr))5o(`T67IG$tqa7u$V#$H|5Ekw{4z^Hli~fjs=JV5v|^ysSdx=S`gP zaPukI)-U=!v*Pb}hgFSU>m#1V|FDd`gX^^k>xEvqB^*uBoUq8fJo=Wx{XC8dmcxXR zwzC|ul*~H->mBk{Qlee|`pk4PMlt1b5?6}<5lc?oN4+I?Br(fLCTSsO!Wg0jk-|+$ z$*Gi*;YtjqdNZYURlH8i1FHLg{6kw599amW4D5I^z%XEH_rW@KY09C}UWJdX@=_=h zH&A_UhFPEgek5q;f4hGi7OD z^Hf2~+FRO79H_g*9Un}u!KNttPd2AwwVwnau=(9HO?RQRMzor?zEpoUkFp6q(6XFB z)F-yB*@TiDKR<@e6l%3HfLhPos&Nr1Tm6!%3K0FyTEE>OZ2xQ}6~a>27EHEByXQl7 z#XANhgWlLan?||Ic`rVAX0AJ03-T2NfudULb1fNtk1XhYbnD^ZRwPw$5eP)&7vU6h zc>m-MgB}UN_qG1J=ZGnw_}YL{N(9bUB>fMmB3ns#NliZu$4`89TiKRyw6;w^JRAiu z2*L|rahbZ!g(=I*P5v>KG$QR%)!BFU>>lqOEKN&*aDzwU#3J+@v<%6}4yRSK7;nmO z2sbWDYHF#DG!&+*PZ9lljN^)l83-%UfiN+Z0gvyR@`407?mfGm(*z zg@uL7%gd|7FfcIq`S^y?_~~MLZ_anJ7y)P-Jp)6N+Zkm-KMBjH4+aK50V*suHaZ#_ z7IyZ}z&4HC(cYe!o&BLeSrNPQQ#qqf{qL=_uV23GqGphgkPHqE4yQs;TBoL_W@dOO zC@83?sL06?g_fGV4h{~Kl$0z6VN+9NY{nmTh8=+!0Kj|c=t6ow)5i6E|Nb3-MQb;E zYUmIFS0W&IrhmRYdUA5&WHBMKWpaFc%Q59kE+j-2vW|KJvc8yt?VZt=0w!5q(R0gtFy$!X6-BjG{EB#WEy5A>8jZEsiS2} zuOC#+_>IbnsSEsfO+X)c*(@)cVa1gT?|?EFga=Go@ucprsb?sPz|z+cXh#jY^* zaN*fzv-$>&x|*qs#kBm_$7hCKM6dH!$~BBB`7$#3(sQdwFrFZ+Ecp*1Z0q{sUt#1o z-M)Q^k9^hS3a-*DRC$)~dkX^5WGsCuvYv2Tz|aTJTvV2G61fP0i=l2Fh~E(!{ptX1 z^j4Hh1mo83)=SmXv(L+P8E8U?<*k_dl1i3;Q`7@%j0g@fae#rZ7G+s$nI!er7<@G4 zy2+5iREDk&N_kRJHJ~Qqi>wV0$%G||kQ`CNSaumXbif5@aC6;v2yVWTgIa?Pr8CBZ z9@&O`wKwj1^6QORgG5P&Srk!cnOTRzsZ*l9KQKF*NB+2-W<`rRu*IS!Mf|JbODIyH z{0Bu0EGJ+`fDcq}0F$NE1)R;n zp@=Lg{>P7k8odJLNtqQ(Bcr&qv=Uu=bMr!AJXcC8I4&+OBm_+c2FwEkV{rg&AU#kT zqi1AhW&-m7C2lk{G+sC7EF2s%Qc{Yv>4R%4ySwIQW(CT$b=r)99l*q&uh`31uYeo? zmy9euK7KOy&HI0yGym6#bdk}6)KS49gGhx&$?P#5Pq0yGK%9Ytndea6i}FI4ztvoS zWhyhrse(d3Qi~NBNivE#-B@#WnmVcjLFnt2wINE5y!vZIb&bxq1pPC!(P6}zIu)WVAf_Uhl~j;F+$cGf!&gZ(?A} z7jUd996j}mjXo__RNvs2_`~bl?BooxgLIj^c7296f`#s4W>B4@f*Bo6yyUo4QMk*F ze%`ORw#oU#IczUmieU!Z_5nX-M6b_WS)BfqJFz%*EWsgC`G@hG0lxY5zq^J4-bhkQ zP8>!y7Qe*#82PwuP!&L+`BLySJ4Qj0DhssYYqit}8K_LqIgi`VV;Co&Qp1NWYg`b$ zp>DBt?Ku661j?rd6U}dRtASi~!}MZHGwS7Wxa?mhF$OXXy`(YtwD{tCAtOue^4?+- zwNfbQrgOYR-(-`Ljyg}l4*Q0gNx!JUzHnK8k&R0~XR^zOZQ1x7d1^INh5eHGkcj3421M&m174)a`0(MIGA#o& zHAr4l6PWZGFHI--d8iefiHeG9J)hr%h<-3v>!ZuVfE$vUk^+>CovZG0yzp1FdQ-AMqM5c6osclqsJx*CvVwIbhH&tu1RB19S zm7^#pCx^kA-`n30htG2T4*ngaV$w0x$|EK6n5A zot=~8v^!42%zS)zedgr9^LJzUpZ4#{#n)gZQikVbp(sgKd39*Ly;RhjG9hxDJ;K8Ml>uh%Z{XQ#sZnogP1kw6iL z?+S06iRRNu+_W&26EV)rU8M_7XBPLinua4HgDXUk+TAWd|!X-F0cD zFD8il<%Zs&T(T*@>*g;;P){QH6zjvlezJxYFnGyRzpbiqVXsUt3_+9BP5b36&fXmn zyLP{s;J}xk%|_JR!c=m02JZe|iY;kgOHWmKdTnv1@`>jcpBH(?;t>ZQCP?yNjW@Az zjhdv&I#rfe#7t{)XZ=|;o0|OmB~{Wqm44a+6i1^XNyN~vs`%K9JSZ{X+3@#(Z1A&6 zUD5FcNAvv3WPUHfx;H_U1EbJaYDw656K_ZP{De%=0C#2cCDz|^*ch$HJQgtxQt1Y` zdJUK9BT@2wdogIDlTju{=4M5J39Bo@L=W^_Ed>x83q^fEJ2X zaHl|V_u`ZmC{o;lySuv4MZ9LTz}#mL`whOu8O34v@Uo)0^GzGJ_NPvl5V_i zO_VN>rBP=7EC*k9U|=9()9->~Rqk|k z4GnWUJ92#d!R?E&@$n-yTzq^lPtQ{3n)-TG{Jp(Bem*`S-=~z`zwjQjufIPrIhmM< z$nkT=(9rkryw$ZeK9UK}z_b0~@o^Ui2dN)F3M^WD9&ZT=3AI_&DP!RmZg^gKMa5id z3cQ_#uflC@Z{K@&qypQE9E+1PVq)}-jg8@86;BkT!4xdky?-SmBcsZIn+@h=FY##* zcd7VBIkliJ@*P0ezO4~~(Gi{XfqfZGx$8;qiFZJiMxZ)9OXA3rQDBV(mk=OX&y zKiZy+p7Q+A?U<Y2%@Y6c?oX@&uB-Wti-Kj5 z4ViW|678J?M=GvMsaO?o!NLJA33a1yYrGb(K@}wpO(?g|^`MM6{ubQUP<-U1IKvSw zF2?JFiLViUB?G7}mFC0sIr%!cPUAr~iqqEfTbz64+3z{!k_bXv9M$u4HE%pXKCy^R z^VHdjhgV}$7Dm)aY})dY+W~L27i_`-#6n+h9>v`nfeapLuL`0_HU-viBS_q?Jz$jO zd}NJ`lpaFVCwV^FbLy}P%&N6(oOiAXgozV30EEM)@D5GGe3T%(dIxZKl@Yf z?;Cs|THld&D14^G-jEIY2E=aSa+e@|NWc8dkW|lyKM8U>CtyJ!$Xl%YEer6BLw|Yl z7`9p0icc2!77~mr?Lk8-d2RT7vr_mhpPBX4^ot#|F;>NB_br+6Pwg>WdoQ0KAtlmG zkad33B#eaVN~_M5ljMALyr%e}5oW79qdfBA%}*W)GX!V)92BORdD(kw*b~hx4-#pG zWz4!L!eoWq@U>d4^sCPZYnGQnNY$?Zf9BeJ0M@U52iOJN+`OI~*PUYC!~^J)vX$=X zQQ8w~=!nExatfBXh?^(**RqU+WMzFsmW3pzAoBaRr=rWEcp^!Ney}H6MANR^t~7m= z9#_+6{KqyLiLJH+r=H!eP7tDbu#Y^|_Lycf-zLe-C+4z4Eg8ikP|U}}wZPLrsq@gW zQ|y(yPcmL-?5Aaj%ZCRcL;nv(;6fBvoGFs;bUB)bJK|tqV89!3cu5-{8-ts?(BMT_ zabn`*;jP1sa@4DIy@mC172r@qTtY%vL{nV6AO0^-PfvhimC}_)JM`e5p`jsz-VlT# zFu2q0EUU#tO)V)hG7_#Bb*?K+m!zViYHx3cqoh7V2Oto*#!#a{f{%|+KtPu~3Idg7 z=iZ+G?S(x*MiX&|7aqc?MVAnZdM1rbUCmsD*&hR|1c!%*g@uJwl2Lyw_yq*CDt@vs zGQ#)u)fp1&Q>*jy^5FI?I@;PJqoZ@v=bEdlt8gPAc+LMtsrbksEG$e%N5{?0O+jHW znezp{zt53cuI8qz^>}x&4R7A>9v>NT!k00Y;D0Dc7F~D<=PJU((crYBnc03u28pEP zAl#H=dU|>p--cA-KOfywqdg(t{RMe?JD&G2$JP?b25&WaN~nbOiyF_!G% z5%=l&khYM?$;QL!QMpdt+`HlNpOn-9T=05(I;h*J9p^qa_Rxf1Zc6YE33+EdiVrjO zeW=7K-^(iiX4d^n9-cG0aEtzzr?L{#xE4&HD_vKX6q3RlnM0US6RA>+@5F8F6fcAer*s@`y*J)z2&Be*J&<=r;CiS!Alb?iFPg* z&wr$Nt%&n^{nXU(n-qs3xy{q|X$_Xn+G1`rIK0Ld$=W?e)wEA=*}Ym?vd^VMi0JfdlAmA z9ExhX?3LG=P}lorG8qiz9ee!J@HF0S*9m@Kp!;H=JbP?6s17aoNdIgG3+*QdXYyOm zuCj+n$FlKKpFUN_qdM!PmPkER-iR#uz;2sTv+e0y&ZTcfE@GEeNi*tVuBPnXo@enf z4#ni-><;R!*1L`$u6mN^3`}*}E}Go0F9Oj!{vv)~5r72wNCAGcGU?cMY_+2*@7?4u zI&SyecDNO7a~4 zprjg1eKuUs)6ZjFsECs3(!&P%a^!j9~jnf-0OgC(;MDDBXQ)<&lmHgbA;vG0f1 zlp9OyNy zhZ~!B%DXL}OgT~S&56%Kr2>>g-9&osq9%x1-;Yr9ZycObm%ymK0Uj*M8Q{G7E_9 z65g!f555X06J29;7Z>H*oE@@Gv&uGOvPWD*9YdJ_1aJqQcz;AcC;_Qr#-Z8+%F-Ii z0qFpm$@Zu*b$k}YUP$W^4ja)OdI~at;7ta~jz2vc;b)AW02sjkeb>81^f8O@>=XYo zf9h&Hjg4Qb*a3}jU`_A~xn;ft4Gyw(pKsygqv|}(21IoK@vhJx7rHuSPySO~OZ>s= z2!5L~`pke$+D+`7oQX>AhCJM^Mt`hdNODMemKiN29S{_ic~mU7kBGlsdme0JRw%k* z5g?5rxpEuYTs^g+xTD|UX#kb6&=-*)`*!B*KI1%A>r@pNcNuOK?K#Yx0nV zwKyvagljNlFZO7<1%A({9X54hibVzEJK({&BDg{?7+!Mo^Np?^AatQOmzUCVa%6Dh z2RKUSf!mb%JUGMo9k`p^LU`f}H3lg^m!P1w)upM}iV|;;i!ueGEgU)CB`YW>Seu(q z&8xK$Ir|#DjV~J9Iy`K2+B_fzym|2eddP*$M;-ByHW7MWj9omBr#GgXWfw5=$P=$L z*5h4v7i|n1*PdvpwKIV~z?caL(h?%{?%dew0n1sy6lHBi#-9nOoDqzw0Q01p;6&6S zIWK8&Z6>{BiZiYFqjI~}w@5&msfqQ=^Ju-e3g_!dE_Yd;ia0AQD()l%BEUF2$X*Sb zqvZ8uUq1@ysCPAX%-&JF2duuz0x0SGK7WCung8G{I(?jT0{2%b zNJ#$LU2hN8z9C#+J-xy}q(Nv9WuGLUM7iaez4ZC`Dg{j&;Vs;PlwDL3AgaHOc#z+v z*oAdRRBcy7>jG4KbI!u{n(fs$QAJXpTo!ErGLn}6p+DB=wvv4vAkE zg8^j%VdW^|jyS)hg9wf3bzT65Kk@?t$!4kTy1B<}{UjJO&*Q&yRZYjAkRh2UlJS3N zr}XPaEro&PqqUwFxcz29`f5!2E?FFr0VE)kKJi|s)i@u}+w z_Z%r~Ha526QutnDB7Eur=TGYiWB=W&S$~b0$zuers^Q%;Ld^TRR)Bc&aH$2iK#Qht$VM%*_L>n)y++n6A(D)j4N!_ zIZM!Fh80>`u3dXlA-{v9eQ2R_vi;jlKa=x)Vaa@dh&*6Ry#|9RzYFJyZVy_!fFC+u zz`QK<2v4!&Z@s`*GYGtuA?9K4i5@s5of{H9vFiBPx>2KdO}O*laaJvNNMl3gpCSvO z(c^dAj6s20w)g$qL3>dy%k|ui3M&#NbXWJ=EgERO8@fKvzXNkyGJp-9kRAu2Q0;fE z(#n^0cP*9$`MDxI6r6h~@yI2%?q$nZL5XMkB5c zU>X78ed%}{V&>H=t1R_oTv7xH#QXpt0O`9`9-EucCc-O(83aHooZJ-IL>xm2^mp8R zHL2S8sY(PO!sW2w3{~Yie`Ajzh=8l`<_XQaFr8PE%!T>dDd^wwoS#IJ%YPY1B?;n% zS7eshnZM1 zHzYme3_grkh^eFeFPpebmOM4X)Our49X`Cve-~K?CH-)5#Nbd^*jl{LtiiFLDUXmL z;^Mw+GARFzi$|FkrUYMC-Wn^Az@R(a$p#We+ z%0_4T6_Z&`By>|M_DXpVc3h~z*$F*nPxP)kDLa%`QJ{tzvzn#g*Uf{`$3k9PqAmjA zY5Z5uQZxHVO+MQr(N@Anpk}*?d;9O3pM`{;cls7dJ1eE6eEX|E6 z8L)ky*86$_S6OttI&LPm-jc43_UAF;%aN=;f*!2WLkO3wZ?Big-;(h!_o*l_Fx~Bj z+EMn{@$9|AziY#ctX$(kZfqL`5Ai6v zQ)7FvBfq8wKE??1c-maZPQu08DV3~#uL1V+vxr_tQeoiVf<@Y-t0$vn`w*S|9KH&P zO#Ar0&WH`j2P`p@BjNXZkOy4hvvA{URA^n)w4|ms@2+=N$&)V>X){?yX!tI%rC1{e zsSV*0or3-V@senmJREc_usKlb-JM5{$OgaBWJE1tC|h(JF$x>;VdNzqL<%I(+939Ls6~K=?Pu*5C?RPePc-z@S+a$>Cqi@i8^|igSAl zyV8-SY;DzEPu`LiKmNN~?3PP7dw*MPJ1*Cf`o(>MZ`#lM?f-Vm^n{rIsK^*Wkcv4Bq}9 zkyW>+gIFvVlJ6}jmFq`ZZM)C|xqL+A#T3rcS~2h$@Pe)CX8QY#cV!TJYk6I*Y^4=5 zW}zJ`EjDl8vK@_yf;V6+@XiklrSfRq<6~{~mxvZkdUNv49HurB4`4T-OJRq+ zNvFE=Tt^VWt zS6E-~YvcuS$hz~ul`jqfpQa_35S z$JgLfo&UapS~^CrH0Kk&JYVFP)PG(}^J@RS20qUA_U#_-q9%&Tm8sN8Q?ugZ3cpn! z-T}~s*V5ND*SB+aa?*YMQKs|GbsH{A6*fxuY9xDiy1v-y6|YnRfh;}dgVoZ3 z8f2?iS8FTk$(4&^`fREU)c!1Mo{G}WNY9rhLfMR16XeeW2;4)CO=a1C#hkKL#0l}V# zHoh;s>zw7k1-*|gqo#*Il4i<6q60CedcfN44&k!3rr~(c1-FE@&&O&Li+pQsBvsjd zJu3JO&@*d_AMKvvZ`sU8CY?@t+)y~PGdf+lbPE@1%Ff0-$ce3iS30gj2A_NR_1hV< zv_;zaJAYfv8KrcP{jgGFVcWTnru<12wCslUYJZm7}!fveZ}okfw-kKIE0h) zvR_B95K=ZY=PRp|n_1lk2~xin4f+b0w7#N9-;=$J8OUyPq|P)DnH_|tzZ;7smC;Q8^c$a-6j##K3`HGtsj^(*tQWfd;N zB^zy7_+suvB`m56Fa$ImK8|^5_m?|VojV{<4K242XKBw-QWyjx4B^&aDJXn5-qc)n z&Z|ZH(!7%Hh+80KF_xw&WtjBFc8~Nm8jgh`gVQlHDZ%yjEEavKR$(E2LM?yuGG~?t z0~O{Tft|Vx5VG7V`=28{N%Uq7zcPDeAt$!gh2S2)5oS+!WwMr4B!Fzh!*auERo(!B zRh7Zo77{MryfFVfdEs@E76}?CMQ7DstnaL&M}lkf15Xb!5Yi?I)cjvHP0gGBTPyxQ zKmMu)_5TGZ(BnkA!W4ihgN@au7fm@S*210JO_mf)@#FJ8qR2t)HuHhx>V8=n1W20` zC!3Qopbi8hGV%491zr_8oSryCK)?7sbDcI5OGP;o%KHz#SMxWc&uyXeXgNn)n~S-2 zo!v77(6Z8Ufc?&7LoOA~?eR9T=S^M97exay|F|tgK)hL-*KJ>J`oaW~dmNUZ`Nlk< z-Lrg!n60&hmOxQ}Z~l5F9~Unzwy2zIh$$^sLx!oW&CG=D(66wwaiXluMC6=2ZKgyO zZk`U$R561oasl{4+IeYgY}j`dh={XxN%eWOjtD}GimL6S$sGC1DpzB*r>`5lu7v$T z6YYCWPw#kfTuR%3`_GYHeGdG>-r{;zkfiP9d`5b+`BvA@yr8_Usk}*yL6swNtb93@ z&vk$LufuCFe>Sp~zHLv;9$r$XY(UsI$Ng_TKMQo*$G0j)xWm5SON5A|aLjCs8~jj9 zBk^CPoScbe5q797mUu21douy}iHz-U_IeWQ%mb-hCcZ&d$fNO9SH1C49;+$QyhRsF z*brU7LqPX)= zuOKp@VB_54Yx9K&>~6FJ4Irwaa=2wzhv6>tZB4fgndt8S)2}!|-qM&gU=1Vu0X4uuS6KVmDe;KV49!!W@KjiCy z+mihLYjA*v&&iWjsw&{U6O612`6Kk;*nlxY7j$%{q&BvlPF!>&W1TclbZwkIb2o?<0`N57QTrGeL?;0A!_@DaL^3$TL`JlwMy9Wj|y60 zZeo}#LR}X7!q23HMEY^h2R-*edW)9;AUqI7dSs*zP`vM5&~fxhUTvv&2!uxI|Ig~U z*pWU_tINq&1vB|RwX!cHzDlmX{=-joyqVc?vw0g^D@HiKi1Wk&iD z)v|0C2QecqBS@63?rRUA03RpNtH%MwZmzIuI-c z-tj2BpT`Tqu+a6rml=J*0Bquuciy{CwFS(`R-Kiqe;~Zv8D|x^cI?c+dkqkJCRLGw z_80+}Yh{Q82Peo#hMx!DMa+dOweLhYh=Z07A?#8n@GXL26OMz`x^JHT1tleDyFb*u z8Q|ip*DTpO8)f0G1~=HBtU@zMNK<*N$GR%_TF>wpKOU{ljp`;ja49kGpB$grs-#|m zJy2z;9*V}wy@tg(Tlm~!LrgETE9GMS&a(fM7Q$9NSXnws%f7zT%Ai+(2sFDHs;xzh zCLsB*mV%N6sLnRqaR}23Pyn)uO4?YvbN$0 zvREa`oQ%%3Z~T4iXpKDfZe*w-dUmO}76!Z#h&1(X4XTCn#$P6WeOCfY4a=a_CYalq zPlOal<*3Kukb8_bF%(KK01*I^B0u=$p@dtxrXRO1eomG~kXS5q(N2EzxYe|ND=S@*^sm?7&|?VV zWR=zXE&#(O0$W9%z}5N#@po9XPAcwNWAyMKFPMDe*HG)lg$^J5mwydd;r4*qPxnUL z9VCX6mKA09ogB~Q3&otL8w|O~Nz6%KckDj{3Vjwt_}EW_sZ>2if}JGP%M?LIjlOWT z|J8P&RBj_nr^3_C!43c0{kbK{x4p5$e~>(RI19-_{#wJXij*ax1RIpSY?>z#2)$S| zxg_a(j!h(TsrZxJ1Q_LCz`K_K`!4NHSJ)sqO=3+{kAyt=O5<@d>5-p~wTX?9Eydeb za?N>|_rd3u@>yAU2v9*Ys$)XGD=OoXR&_Hy;icOuHF9mSAM0M!%WCIk!#U+j|5DMJ zxcs3`QTz$a%Yp>%ljwbKp+$++RKTJ!#(}4%W+;)A-@4w5 zRO*!~qQ{dwM|*ykho98LjTVhUfnKks)wYsKe<*?Cll|9xMvSGNHvnk=Bhn5TUjuA> z-qtv#^+tQ_&oVbeomVojW2YU%nL(&)mjlvt81a^D$0S4b+|wRtUe8c?O&)DfBsXq< z+EC~6*r^#1AiTYIuL`U%^|au|AUIIfBn5oXtN#o3A+&`MLuLCiJpi-I)BDP$Q%Kqr z>ZGn)lgVN8dhh5ckC z7Hy=QHDE?bCI#$&AsKOM{%U1cKYMz<>8qKh$)>4RecALg@yn+q|0X{1J34|)*i+(Q za+Ee`3~iee5>8H~$n2kv z7X0iKg>jiz=s%K&w^M%UK6n1T3SKruk4x$qtl}A^gh@I3HpXLb2QE8vvvYS6dUxwj z66=8_zn0Ba|0awc#fuC9k)W?g?gm$w0XR;b$|Z3-ye5>&-Oms+H4ai3rLPE~VF=^@QbpiLWM2 zjCb2&+~yO6WH3KvMtk!Au@FUv^|!0tqzO=Mowb$(U+B@C+}@tq72?^^odqw{eJNso zR0UUGdb!O!zO7VS+pDe3EvgVcU5zxPhcwv`ztg4{rmPpNSwyZ-y9$pVJlKI2R%AHk zcxLXYmd#a6>F4;(u#lyr9Cw&#BQL)p+N_XtQB^9K1;hrwS4e70o?@JFB+&Rp^4Z#1 zX^Y+8q@0AgDQ0&sf#?K74uF+~}|*n)yzQ+vA2ndodJ=*luIkD{e?`~m#5*{r1$+fTh9 z^7H+Ue;vmzrSe7g|F@?7V=oNnkjKFspfi7e7rTm_8ZRZrH2+V zNg{rvXOA*S;UV=*OG$(K=7;sG?FzKeIw&><=nFWZd%#99?EH~^tY;Wg%t<=l7c}}yOd;Teg?G-vAH^D+JE5^q|`b@`k z`S+y6NlnG|V#uy%c&T1C-U)V05!B7*v(CWl#u^~Hv#GJP*30O(u#j)&`eJC`9m{RS zds%sEXKH%7t#0--8%!I0k~r#k>2|Vm|G2M*`4$6W|933C;_;z-3q2U4YzkK|c2VEf zjxmsf^M~pO9W2YrthNtSwzIu2!8xWwVT0$DieP+~W%X^EZy`U4m5F;t3X}*LT{-uK z9+Lv2&MeJrtj#cBCq6&0#1E ze&%sK4GyrCG($fyPPH)6-Fm=8WdtreX{5@kJ?N^DB^CzM8~unkTjM#euv#XbEh&LU zz7gGzHz~^WN^zzJ=HsZ_oWkF}Oym|Dpa+}UGAprh5R?Y|RoeV3cOra*P6$vTB`eRy z#$CFI^E0!oX+^6hu5ZgqbHP@r9WUPJ*3yx%Bj_JvbDx6k$9l3RzfpO1GvA2&{UYvw zF|B(~+7eNwZ&^OJZD87kog-yhcp8JHE#1xcg5u{ty^4b@-$+~EyIjtmeK|Y&way?F zM7_egnmyCDxai{DDNLL%&q8i}oh`7tkUiihyW$8kiiA2)1AWuEKM1H}ax>oIhe_ky zZ4YeD=&6+)tbb6tpac9To9kb{^-aCw&;O$Zp!li6mRN3|Y6hIf_E5^uHJP2RM}gXn zO-R2Ot%PkfWT#xS?WO1(5Lr=`=cybI+bvxVFEFD}N$ zeeEwlNFn5t`X_t&t?zWQKO2>_2Pv1_us-|t^rBFJ@XOb{xQv%rSDum564+38SFYcI zBxz`cfUu)19TdB3@`p^dGR7IAv1YXUS%>F3qMPaL#xo(J+VdjjIiI7Qv(i&yg#yc) zKq?Qx^(K0Bw5%WJLJ+Nb-cLVcIx-^90akaK-Z zw6KdoPT{WzfcAY2Zz;dZCHP6b4dm)Z=d|3sD%q#P z`mM>G8o(gTp+Z%v4w?X&q$ld0_>@P!l{yb+CMd__KAP==-XNP)UmOViNh{%a*h3X> z9>bDM$Md8P!r$Cz3)NHQxt$>N=&qDZ?lCF!{qoEdf&AWG)w*vN=liC}@31kBis_t0 zI~QBX$x!(|5YboD*u1H`MtOAkHkfnlQkFrL-Ez%jUd&i&CqrgyCXwIBsY|Lcd*@uo zsu48oSIT>3KD~-Z?@%*r@!7$X>OgiMLzJK# zlX_?#l(N_as)-| z$qoB)o2^DRm+)X(pPmdL`mc8kB+sQ}piCu4c(;s;&EAi)2n;r+9{`zp$Hsp>(xzIe zSGj6sA@~REv*na9*OHNY87lozfBT%ixOJG1#7^_^XN)|umEr!ePLb(a*8^tSTqUq9 z+~%*{5DgjT5wEr4ac60j$LKFK_yY2yRHPthC9`g`%luZRK<{FsUp}UoZt`~@uQR9~ zIGf#dZMVE}_FIgEPHQ<9rqg^&JJw0XRW|5Dki<3A8y&%vbP#+s zx)f>EtTNQZ)g$5L$(@+BQp?!5xcIGfv`kLI!|k!5YPiUqmQF0(lf1J@^#5Ew-b!&* zSu+gA^SZu>PVA$#d=82h1G_*c+{@P>$@c1t*(sr-s8bJv?dBQ3Y=LL!D7QAh+x2vF zegTj3T@`ca%W~X{``?TyakrDDj)u((^j3g5SLu(=m!?A~x!xifG4@n_Js6$%Y@^-d z<+LkVi#1m~Qlj2rtv2!~iI5YO+C8!m*KrB53sPsz&U|q`DKt7P8NQG9aw&o8^)fwh z2{C&ty~EbCbskw17SCQsZPu(?%hO1TjPOYycu6r42{Y-vHhlNOP}rGG~UP4 zV5)7Rrk$5JbO86SY{$)3{bQy&^Z*6`97g_HO&LAj0mL7=Ot5W3e)_@YHXD$?Oe`sv zh)=)auhCQG#RuHlx-zNj@2!tS4zj`HIQ8O74Logn=c0&%LTrytoe#n$$~0S}@WiLH zJhRe`Fv+pIrdH>0-t+U=U%8=hrOjHEq*@BLy)VmFJzLqB+h;Lhue2Q(aoaXcuwW|e zh!ltdb2>Oz&bPI|gV`6ugOaldaEEd4n_2~&5IUTwh20A)15~dtF!GZI^@i!b7Y;sS z-#Uwef@u$*7)N(HSON<@GEH}M)3<@ zW)+G*hNvMf&21(Geq_lX=Wl#)%%5WpIl}l3@v;5 ziQD+kpIMS&p+mR7*`-%NR+bB&g|+_n6lF-5f`K|`&DZaGv~6B&BREOaicfcq2T zbVDK9o6}a%bdZ*35v!(`M#0bWwmkZlH~6FMWqKPR%vTiNLR`Ts^=kDmY1;)+-=&7O zTj$PgPbj3Dye(DhKKfmlS8{ou6tuLixUTVW?1+X}e~IvakR2%visnqoGjOi7a;f5( zn`X&x){bQ(0gqNiW>Wk;=+%;7RfFF^ zzSIw(T<= z6yEjRgDc7K00z?+Gg4UKX|G5>2@lo%ZNaBs8N?kGof;~MK`pnokDIVkbSV?o(JSlA zxpN{(FzFjPk3r7f8R1o-=@m7qOKy+>4R#~*xtUESCc;U@ESW})mD+GPIWs%P#IbcMV5MVhSUO{= zS$C?&nbWl3HCvB6(+t8s&znW33@WkV(7eqr)`4FSyyjLWg>g}d3d zHv6TY0x!Bpr`X1@o~X$ysdh^*d#uy7CLL$cs+)He`w5zG-4)S!D1O%gwisSLA8`e4 zX=T>E1;5$(j^~=Gy1K+*R*F;MX&P&m-xD)7{GN14QtyU)jfz~Ozw$2^B3&Slu9MU0 zw95Q?#@cDQScNP3A=Caobc54Vj$CE#uI85|H$(~D)5BG|FA`}*KD*V}8gHb-ZR0|y z|2`V%?xyi~4MBe=*RSF3==AD6=p)CmfwY2^`q!<*N1%8Q@^3jZ{u8r)UY!;f>(z>2 zfJb=wPg-7H?MS6$`Ssuc9;Umn$_jivtOvR}jW~a&?b%wpPeKwr)|0J1*w{R*%*=n+ z6XR)rk6@!4lCL$H>Cmzbb+*QsDHI5`)k8zEUwd4{(f-W;A;4<^@>MG0IT5(Vt0_&O zO~xY;seY-@>(@4c+c+HJ&#m$=y#J-)Bd2*8K&rlVD_TgWjkSI&9{5%)Si5;t4LpqaPULLz4w4Id$6fh!b~E1BaOZotxz z8hBGYgcA9BW1t`$H~j-KSylloI_TO1NRP@#zSRgJ2q*}PnHt(b4(V9D{9tr#2~WBe zTetgt>oN`Aon%o7L3={F6d@o;*8PIXWW+=I9k=bNcU;Wz)cmfGJjioippGWN_cm6Z zKtGqBgR8p-fNqx!M7zh+?tx)kRWqm)f_g# z(5!&z^yiE-mtQ8JwZpibDzKt~em}_ebiLcv<<^JKilW-bH~_PzKfJuzYm&38K)(`A z*E<|dcUy?v(3PT?zOl5++t`rQn_uX$WJjNDZCi{h76~mi>?CLR%48tm<%P(I`Px5v zO_}aVQuh@$eH{2Y?Bh~~?{L(A;5CL3?X})}hVvRJya(m@m_qTH=G7B%* z_j`Y3rN-}3)cj)G94wUD|5o_ud<#!`q*W_lc9*;KR(N|ti*Y>JQxqPN->i^~>-4f} zcf?C7@9pcT@hhWKpt)o_zo7S}Qqa+P&=Vdc=;GOTYl7_;W-LSE2tDaGy5EI}$8`zaV3+4U0FB?u>`b=X)uXeHg{j2NxP^m5(FOHgOQ=v=WM#2B~DY2WwD zH$WKvkUktO4_@h9DlhQEy)XHt=nwt$+lW#}WS5TZ~Y zCt;{fN_xbA`Mifo9(kWvjFMwVQG74fn0XE-A zXDH43PONiR^s(|ljb-+0wDM(6aM5G0`n-aU&;V>hO|8X;!$Upfl5rC#$u}j>TR)5C zNC~(ZUg+>Bb+_p=TU@Ik#*CAue>T=UZ6EEV;0~2_%k|zlA9pB=G|}~Zce1@E(i=$l zyMm>fIvo~ECjXe35dS!wt?*K%)xr*vO(nzj`dbul78Ie|%*WF%Z=JGt;kJ=Os#x^| z30IA-FhCNw8o1R+q6AQo2QS){1j(N5Ef}jiL$0YVoCS+J_S&5!7f8Go;Ju2@k#FmfM@woP)%wz4XCiBX zNQjFAw1vHud;Mfk_Ked=O}3L>of`viwD-K?F7vhWq%|d>q_$+Gb|vLSmHTC*5&f)$ z5Wqxr3+1q0_Nd$4WR+yS?1VzAR^n0Ko^j{R=_N;xd}eT%T+H@6u^@A~ShPa{5$+GP z^Me=7^H1u|mE=6eQmbM9Q-RMB`_D;~)}KwEs6>QZjODpLyP*oaM-0s&r*-dmAOOpO78!~CbmKo2 z^O-sh0zug!zFrrx>@(eJ6-TvB&$>4Y&t5N2od;BYHn1|-Cd{Y9(%IAH4SMVm?@{ut z!$e1|-wI3?`vtzqR;g*2$a_0@)qFq0ZPYs}>bnybIf#{7YBTV|eh(?w@7r&PJ zCIp_KQuz1k9V&NLyQ$wAo9j^@(g$sTriHt2mIg)XO06V%Yb!qM`Y(@CwvyE*nf;P< zZ9cs*LA-aOFW@a;^hwiGi_iXBr{)I3-QmxG-z@a2=f}Grk86C#@BCL|s8HnlOUj=$ zT~pg~YVEh~=At9-(1H%G5=T4U4z<_2x!L12xG3U zI(!XHBrdG`K&$t;-b=D=WTZoFNkLGbC`tGlJu61iOS z1G(LpF^56{IYIpqliu#*NsJr|>v27rZZ+e&9HZr=^_cy_%IG^i_i`ixAEo81i;-oU zw3Vt#mgCI|nHn^Q@#Tslj10=vr`{12BmjZ?O=$|u^DlYb@n_qZqZ0v1zaQu%3xB+- zVqy|gG5)9)dGk6pt;P3td)baQ-SWT%DRY;&nV2jOj-2ibl@rRL!oJ1GFNx3czr%j* zyOix)1>@|Zw_g9YTHxLvyxR?CInVaWLJ||!#rglA_y2P4J5-po8Nyzgj z(?6?J9-p4ryova(Py^_KTpAM|a)?(VQ#xtU3ba~sH?@Sa6WVn^&b9hg`@c!_5fBI* z-Rh_GorjtkhVLZpb|4|UqZ+>R6Sx`k0YEm7AgL>zmLjXr`M^AML6D%`=%StEsZbHX zpL4;sE9IIK5W?DiH*;;I=(+hYPWl6JpUXxg-ibTmb&S*bQ7$o-Q#SK%i^FI&3yk{(BL}0%Betpk96w)L) ze|qi1DQ?F_@f}EE#aTE!8>S)UggGY{}QXgPsXnjT&Bo>DH69oK`KK zQ=!E1Ita()D5f=~bgE5AB6YvE?9U-a(PzEEls;=6Z|lxilhh+^h*20-bJGu7R7Odv z`+`C>UNeRIevR`#9!%hh&NBlW?JjkZrUxL^r}e^>UzMtSxe=!0OKBtiLQR}vex8ae zHERu-OPM314#Zq{AnC)wkTDIHALwwBDy!8*JUviq&;5p2sLpg?oiKlYb<_lK%!l1- ze$~^V_nJ({XZ)9M;wP{`aa0gwy*A)TPRJ|ZRD;nttBQg-q5}uKJ za$X+`3qm@Ab%=aJ_0RVrfGu2V)rPVJ96`iwh20<;G&3y@@^)In94`KT%J_nUowiRm z*Jh+0eYhRxhQ(WO@h#jB;@PZCIXNrN;C)Mr2f9O5f|KabU<4xUU(_uoJxBn}Vx@o| zI=+AJ)PK&{$m`I*Y?~3+VTx1o93W2W;dkWJjQXcr`Y^cDvb5)WT<9lmDk=dgcHu3# z@^7P!xAt5W2fm$zRhBmlhdnGCEtxMWIUPPsOtJM9>gwgJo&@<39gc?ydB-=LOKzul z<=*KTDK^!h2EQcb*#eOWM9}+-J25%!KINdr?rs?HiyF;%QotNsn7m_b%8B7<>?mPP zHGg!UWs~mC@u5g=@XFnrBAOM)afUTS&{PL9%fph%Ff;kzG9l z3F02Ve4Uy76tsJ{2bV18rzeJo+>)Jknvq2*z;k6zAY%Xy8ed=19@8~KixX@tzFLJs+LR=LRl3n3AT5(|vZU311(YC9&dAuye@0_0p0oyvEM0+NcZz z4pe|>cfAc@IZTv6y{d*_|FnW6RsOk>qb?d?WQbdU9H>m%*`L~2>` zOM5a+@nRU|j!i!9|56YMQ3eD_%-K*%RCeV5Lrl=a7k!v=CMFbw{36s7<10#!O)X6k zn;`{3^{I!cw)1j(dxCT>CN@Vx-D1XOCSU^F!tYJs!2+~VX~Ybh@>fIj7&jc&5@N_u zmt_T+-H`n=0|+4MPrjZh!GU&wI{w z_+zeXE{0)d@3p^s?X}lhpN_3T?yt1*e07Oy zLD?y;(`;|v)yNUXYNGMEKueBjCP&8xhw$WsdO5>F|7b zs^3Cf%VI~;$1l+m0a~L`YY$jl`@@;I3IuqVR%JJfyl43BYg~QDT5m$^Cbg#V1t!;< zt1w}zD8THE-2;H`^9OEio6>`F0~%_-N6A+_+QyAkZ8zfZnjpf(z*&oqV^9jKhX zd<4gS5B!T~d$;W_kRon0&TsxH8iOKYvFyLVf0<-k6`n|GWB8`KM2&;M5zqajn+eM^ zAfgLRXdG}RE}@ILtDH2vjA;3pvO3|km}=CpT6~~aCnzBKW5T?!J}mWCtS1J4;K!!U zy~VF4G_eadJ{3`M-bTG%#&0d_1c5OoRgDCUdeh3TD_d7f2h`o1s91CMqh*Ve%+aQ& zmSG1S+iZ^AWak|fw?UztzhfR%`i#f&_oizRnOv=MV~aS=v@eU1qmf%q)mf~PxR6wD z;f)vRwCOb=Hk?(r9ULv9JV?#_3>@9&no@1sjp+b?t7a7=!DlQ*YGgX6BOQH31oGRK zY#CvCeYM@%pLH{auLD6DY;^7M(I|~cP9@!9c#d>!XnD%YRf{k?+U*cMx+p z)5CyP?&5!?{ZGulRiS61Wc4@z#osiRei_P_d==Bh4i%z&Z>dK!d6}Z0)*G2!PUdB1 zCHPVRV0e2MqYYS8DI+yZejDoi%3E!xpoClRJDx815TW(bWmbM8x~_!@c;F&Q8~>&D zU9vlky5jGz+N8`hPCuCVsa1Y(rT-QUmz(e}x#<9AX`VO)Wt`mDOjGCU3+AZc7# zDNk{PB8yKVsNeYdnV6WeL(b(0V0$wJG=wzaI|{C3uuO*dK(&dHl!zw;0LIt+48bGs zXFKDW)<^}BuJ>~44;g=A70+;@(jnSj1W`~x(hf^pr%t(E4+dn3k$A;MDY9`0P<~y5-`1PpIONcnkiPG!J(~Uau`uAcLn*2(9+;wY ztJzHsc#n7T=SAQATrN5}7lvk!}r8SCERVS?YmDtvc+ ztu^+yqT(pA9XA@Ip`K3y(I%ZMnTw&Q^NcS#F5H1O$Lrz0tW(5Khgxm_mTB(R2Oe*r z{;)ouP)gB%e+kYMd|Q89F_3VUN{ih#4S>QSkbSP?eRcO=%@dWqZjeTIMLSiM&J z+<^I>3=cm6Y}anRoOrMNqF`=vDW(Urk?@wj{~T;yS=r>Bdp?N}lv?XrQaW zueb?9jY0|v1zkNnl&n*9O?YMaPzz^p`}h0F#agcYc~^LqHQvKo9H6 zV@rq8lJ9Bv1vw_7u_FvKd=MXja%OJ~ftj=yLP@o@44)sYL0WEi6;SIn857r1o9#_J^s$c+% zR1!esNB5kG(wANals%`wsA8c{Tt%p;-4hC5tPqW(;~UwUbTKz2lR9p=7U_&FHj;;a z!aB>=4Uf&bm=U1jVF;sbI47DahVC9oGhci;`qja&h}T^7!tyhEkcFj<+Ua-aTCx(3 zY7dv6Nha-(r5i54kz$|Q-W&KP(YPl5+iX|n=rsWW@}km{-RZ8pZt=$Mx!xLM?k;8R z_s@a`-boE0x6u2N@Q%1qZhz99AibHgWTi+^0UL(>SQ&+rvpu6E(1iS}c%KzyWMwsC zCw#m6d?Rt7VEY|T&cZF-|DNct`k3&FHJR(YB3;g!ZH-z_C4LEhrA7uiK|(;isZk3S z^qyK_jdlXnKRzruD>6gME9u<7nAY-9laYkX#UY=aj&14w5 zI#%$;4t;fJtSJ$Pd6gWqR+;)-_J&>qurDF_V0E}YrJIG8A&(w`t`;qgbpPx5b$4J! z+s=7>gZq!UkvUpwp@q82+-O(F^RLPjK;I{y`XLdAr|BK7l4Vz>OVbkX%Q{epTdO)C*#>J zgO=t8lj;%5R3#YZOL8TfyYXI*7}O^#ZBKNYi||Z)<;0{acILaL4zix;K1EemEt)9u z*Q3}>BEZ4GoB#jbZDLTrGFQ*gQa@}80UtWUEVpE(xpQ&jmG_ckiW8BnaID&*#7t$(LJ%I%0PtBlkZ zBq+KqHGN~()>slpP|L|um29PI_RZ}k3Le6O@WYCajKKMi7f96C-!GG^Y{`)ja_|6Q z4wfZL3I7i!7)@5|wzyQuAtEZS;Z0q3sY-ek@|LdII zp3i>YqN&H!oKu%@hmMX~QeV^v(wv*~xF&rb7^v|vz3$bs&2HFtMa<64Z4Oj@XDM?@ zC1!LcF0DQ<)K1?o_x$kH&ND$u?w(6GMaa;&YbY=Rw;WwV+;beUKvQIlFWI?s*(5|E z8h*30?Wg3=CPq3D!6hGIE>Pdq-TU06|CSt2%?7Ja58lpa(3aFu#UAK0;bAOIO+)=V zH9+Z`mu>v=F?OIfl;HWMsxULUcWf3{n(mUq8DQ@y?!P#AVjH9@i6}~Ot{E7p6lksA1?qi@mL0G152&p{t5%y5{VVvF4(AbD;vQ!=8PFw*4D|v&1?TsCr5QZ}aHI z3$Jqug}4D9l1B|DYekxV47Tb8zSxNl-UB$C)Zx~G-p6K8`E2c)%PJJp!i z`#E9fFY0kf&V--AXteku7D#j~i`SY~y}~TiB2!NP*Zm{o_eiw4O%xn*o&1Z@1aEtl z9PBS7__;_(F_&v4dgVFJ2jfM9gbT8mY$l#EH_d+Y!DF#}&oGs*(1Kb8>eMU*i(+#l zGi2V?U+*612xpOm;IEZPUNbEw2-ew{#Vxfe>T2g}8I0BC7eBM!JbwRTBQ-LDs1f)l z3u8^m0KAquIJnyYGm||lH4DeK)niR&svERX5y&i~7sy2u$UC&vg1jY@)k#J)(2{e1WC*#kPTwLD6nA z^+L_BDY2I?1uxo~y!ii|>xG_zy_DGE=k-~L^xEfJHJ`;PJ9p6s)&{7gDE^olv$HaJ z;XqRA+r?4ecH5*uZg@jZO2xTJseUWg3vI65wBS`26~ZJj$e^;Ctg07cFdu6qBv^Ob zE5Fvvxtv*&TS!(DO3lNC3%Q3aWtjhm%WTrN?r0IF`*(ohotS|#F|9@|N@}1*bbwyp z8vo^0P3T$K+FFi>XH#wE8xuE5rB4g;%qNY%9{%~z4?OfHtoMG@HTn!;F}}Kd*WD$P z)>LZ0RH^$W=yZEDp8>)jZeO~>Mm%ey_FI2_`f5Ebl+t;{VpbT29mUSwVUYJ z1*z7SGW&A+8e>dTD+`xuCYXf>y-)Hy&nRa~B!{C*edzSdx@jU*q!pu5o4n;+39_rI zR0UidPczg}FaV2MndBY0nD%sMe5${Xy4@13SN)%L%>Yos;17rI*atT$8*t%Ia+%~w z7cd{Z7vHLQ)-)mizd02H+>x17AK09M&lv?I4imQJ;dGD za#RmUn(V(bNBFGkic>n7ZeI|V^WZNG5K4MW-18KCMfjbDM!9ziW3=y6GuO74dbi?sP{jDb3`g?gve#bnY$-}DC% zpj1ty(Gnmk6riV~7Lq-;<}`m}8?L=;y!(42^Idl=X^kPN8roMrL2*%t`ClIro?GtA z;`g=>(c(pzN`>+^A(THyhv7K#;`me}s91;1nFqfykt*|j!6_PqQD-% z7QA@n{mU2*Y61Q$12MO#$P2Xmsdwd$WsVb;f@;>Q%hszDPmSL^A3XtBSh}O0e+v)- zJJ{UnK!D!0@1Kc+tJ1yxD+{rg^gkQaUrt+O$jVG*u}@)O@q7n9`oagk+Nv%0>*@e_ z`}AWvVphw}VLNIFkB9MLpo=u5!txps=Oe+R<~r2MN3#`gw=e7T0aTZMBiv zKoSoN6|esgAvO_TlS|N$tn7ecdh67E70;Cd5unB2^>B=^UM<(u!lF<$pF-%tt<8GF zm(v`e&W|_KAS!JpD#%?qx5{vT@q=Z+^KvY!IEvZkhcLYX&o@o+zf}g*p2gid?Fz1ygD9vQ%D`nw6H@1-oiP*koL#vRxF}*`~asd%}|p4G#OFBSn*iOWwnu zAIc3LJjYX#biHaOlslL$6VK|kny;7ulM_+BM;i24)7kUDEl>g!{n7K+9mKW$GNf$9@=PUg7wK(?@UV=4G+ z8ka5V<@B%+o}(chV-$e_s=1mzqy3ZvjP3l~_lgzoZUa+&*iAR~dfq5~!vkdE%e7y! zAr2B#=wiy0s(mqh=Q#{TNCUK~t17U6jv1S$#`2CbI63v%bq=JkUFU2soiS@pMAiIc z=9}#Cn=jlh1P%|g?uNlpOzk?*d|`+Q*e zMD)kIoH3NDz4>&PoLr;pOd;7kH&R50^A!a-3aG$Pf$j9@2M7f*wcv zkyhEJ8!CCK5k6!OTY6_FM|#$OH74TwsP@#ijhJMa((NZR49G@iAY(MSOlYcH9viGk z(gEH5y`0FGb@lw2g713rw+8>MWdKvm;qAFw*#C9#0GD0rs}vESC2efs)~Dx;O6kB( zo4C|Vr@vIN_jJuN6{lY~uy2!8?^#eAF^ltjU}c}EosQ}{?F))UjgzrG=4(uLPEKZ? zY`I2LBN{(px7p8`?dpK-fo>Q{wf5ERr-J?p?EvrlB4J}Ti^f1k^zfnncN3PCk0m#% zu;_0sIdMY^b!RFf^pfx1bl$R9MT;^a4d>ricj4^UIUZ-K=7;%^)mrV&rg7^Zu?;v!;6%#T(zOxM+fDeiJ#mzBRn!{I}Mpz?AJW=7T#7FxM#WtaZKf#4^6;Eh0iaeO7yP)-AoC6!T z%jjF6xGW0{Kf!GbNLVsX2@_~8Zg5;&{HKbE7RR$uw@`K0nJ?BvuPKi2nMuc=Jzju9 z{?!x5+9$Gq)_{dVpQ8-jOiz>{F?RKFe+=0o{T;1x&&y6Va~mTwqh0qyIs0F+Xcx8) zJOUSVi;r*)0eh#5r0UBHUZo)F_O3O`_Y-q*ZR^(44%ulB^qzW}^zt%t62n}m_~T`+ zTA50xEwAQPnhURlwh@LCDvjP&Q&y8rCxi%j;y@#Ptm3^BM0wB-m-Ss&6<%b-pHU8;-0#xY<^Q$WtqHy?r8vIEVYd!_@GW&u70}Wypl%>p zn!b*vxE)3JP-y|B<)!{FFFe?tdgF>f^Eu*=zXWEv-)1Y*agxY_9+S5+JnEDrOgv)* zEZS638Vfmi^k>))+Eay9SNoo!7EabB+|!Q@d4}<=`br)ukRSpoKD3Q0Mjb9T3Hk2) z28Gu+GY42^A&ZycOEW81q-XuRKCJ0?R6wZ`!Tdw4yNlM+4~&>?GdG*^VD>C+rKU=X zBbX#RVL84xUAPoTet%P=ueep%M@6_|pW;K}b23}YinoE7D7ZL!ZJz8Y3tIAt_~yrk z3-zY>;Ad)Q0#1gwvuahG{(P(wSil!*`M%cvx^EU`+C5Of%9eiPbz)q8;a19DCnjZ{ z$@ZeMP-!X*r)N`XNi4?~Jhji5S!K~!@E3A%9`}82tovfy+32}Eygpw>Xh!mdh$zy{rEjpSE`*||o`ek? zG$ZeE$d>F7WkV`ttFxH@_5ko9xWR7t?{OE5O-#(67)@MUV;nW-VCYMTk5_S)_*3=b z_L6gLY47tKX3*x9#`O&&u|YM}hyj?R7aP<0Dc?y-6QWr-h=-10M?=#y7-^g}kvDeP zI@5;GvP9)zA##2wZmsW09i3kGXlMq~uyM2Ft#|+l>*_ocKi3Td1tc{Sq;Q&Yw>|db@<)}v-2^WNF?9CCunHSj2}H@X}UMvAkd3QDLvRcRPpGzL%H`Fx(by2aD_u` zXLiVd`#H}Op{_C4R2JCFL=mGiPaMd~&hB<{6utfqa92A?Enlvds=`L;8n?nVe^ynn zr)kn#ux^nn`HVXyAQR_cY4oRLzO5c6pWlYLSgH(s(gFT5z7{nlAfqs&J|h`lK0f7U z%eG)`W{DOPGvt#0YY#D2371 zm-HTog;1oF=X{^^#@$XYEtuVOlWsE30Gk52Rg1Q?Z$`q#s zCLn{Ox=8+SY0pR1LbY{brK9D$FqMY9`_lbSQd#It@S%j%jU>J*wG351LD$%*axYcW zosp0H5oFVF$6uBCP50|VNR4P;W-DC=z9E*dh`4-5`%Nafu*;GP5`PiCcyT~Hb7Gq6 zQc_WsEOfZ}fsrO+Z=uB2;_s@SS+}E_p=OePgBn`eV=Lc;7cs_7+HbI$>plkhnb{cS zD_DWfWC6m03n101C-JQW@qOiNV}N<(Y>JP#yV?c!*4Z=e``e{)1JB!XeoE;w_B&pu zlD>Sh$XZG?#T?y@9mU+D|JsjyTG99R3fx25rx`fau_M}c zrk}5v5@d756g&-;>K-Y@4C*2}4GcLr=6_~$J2j9}c|zGmesH=U->Px|M`p}nR$uBM z0|{|k>!lfxzyY4RGQ`fb;u zyQ|)|WCB`RoC?|9J4?927WNpvC;gT3x!}z-QKO`~6R8z6keoPkmDtj`XlkuF-*l$6 zc~8$ucQYVrJYm@ChE$KJo7LPh`35r6lGID#EwYSHj}x0Wr+kaRBptBerK1!e;#$4@ z)KYnYMJ50F-)uw#P5RCy9_uOukgsy4sOKcH6L`k=Lof081kGEuEtX!ouEHRwVOrNC z^){}_=lpCrosnQTWVeEntblxy@d>Sw6d0ceU17s{^lJoUi)Wcz zpF|&r48;%@+(NC-!$P4B5dbKV$kMRiqfFzRLXQN->Cx^`Y@x43r1f!zt1DdFAZTf$ z(sMX~ASAoO6@v(tD6;xo%M;?|k0C;zCRl>c0?7t;Qz_r$*I@DhzeFA{V{<@7Vd4Oc z8I}w~<-s|E#R5p(5VTcOA3lAgQ&hTJ3E--EV+T#-0SEF2Sfhvl@RU>?6?lLg4iL1c zh(I||$zWkE4_?$6ugNrxa_K*fM0<<$2qwII;cXk1%PS0!V;%X^!yf;{NJ7gdv}lEX zM{|beAz|7-G}kab^hl$9O1GH*ZAibzNd+#@+=JkR_j-7|je=1E>`zH+pf}h_97XJZ zc}Sn0{1b@sG>MU-LP_hXe~J45yuZ+^uK`J0`(@yZdKLZ(JMAYn}^~|7JzCbGIG|3b?5}1zZoUV)s-fbTS!yqB$aXn;HKnR&~lx^k?Y{ zx%?A=diADr8Egu$Jdkp(&LQS&q}sjsMEal)wN`i;~rJ^rE8CEoS< ziQsICDQ~lSUF^K-z-C$S=#UT0+)d?W2%_CONO;v3jq*nyuTR1Wq>;+MOiz>-;pyow z>Q5GUdPr~;HWV4O^p2$<(?q+veZ2urZ|dkQZaNmU?mRP(ZG}xe%|fFvu4QZU41jke zjJ!fQ2CWmJj~u`z!f^@YO_m0Dl|I_V3#EMEx+zR-DtzqZK89k`_e;-{o%rHzkx0{H z`AA%T@{Gh=xpRcQNOtoeac&;h!}C6-%dtr9`e5H`Hyo3={z~&85euQQk@!N|v%=;? z_+E;v4rg~_iT%p&jW5_?e}eWnvDR#2Sc+95Jh?fsXPW<#xX7ko-73nXDesz+R*~fP&)@J!;A6s%oj{I+Mo6;VN5Qlu<+#L_}_Hn@iWn|smx(oZ}k&~icX#pc`N^dv`kOsYIKAfjTm?L-!<4U zqoz%yXMa??=Pi+vTdQqTqc9=1z9p&n`L(R3Z`S@z?i*h&i?CQSt*Zc=n^dEq+tcrk zBcNZc*(^a^4RvmMua7c}UxG#eRKp$(O|@g82%&`w3y-4IAR*&3-{EdSflElYZ^|1+ z4a{Yv3g{rx0N}O!6i7pV%3indtrCR`DZ68A2#7@*Yzcgx4^2g#g(CUyx2TIeU_d5- z0;&)^pmro|W7C|!nQl%8WS*b^)=0oSN-P00CrSy_6+nUj{w@Dr!Oo4ISSrCNl&1vL^dgB7+=ytq<=6Ek=eNu_Km#@PGnUH57{ z+(R;j+Q67ZXqKPZKlrZK+4Sg-mxl|lbFQntGv$6JHbMouw`)pm|5a&&i2VGP+Ojgw zGfWdHC`|9NnH7J5{I5R~T~vcUidYHN$!GWIIc7y~41TlTd$70|Sn?SeALB?2irOHh zOHm-e$)wv;TH@sDbOvVbwXzBCaI&iR?AJejSFqp-;p%W>EnuMCFnp}9YC_gVhnFG0 zq)1YU>u7-Gz1@+TO&L0hG-Y>S{+K7nqAelmzT9oeKtbDvwB>L{{7|LtNvXct-Xkv> zR`IaL_I47Zh|b3u!3%4l0YuV-*i7a0>6>v=q;sT)vzsD_#casS)|B}G0<_}bK{6t~`c29ouLC%R<%<)P zCV#&w+Mm7{Bd2Q1?y$9+zMpax)I7n~RhjTUP_^N&V;ezJ9pFvUp%A+c%g-t7?TMil ztV_-`uUcLQ z+ia^9g-du=l=J2{EfDnskeHj0kh<}E-98eEzgCIj^RzKAg z+}!-0AO951Ks=P^U-_#(<+7Pb$z7POHog5#cFB7qBLl4z%#F_hF$Q|E#bydm5>vX= zXZTMl$u$%lemp1;Wl%_a*&J!`{`q6}?AUBef-9SPgod=8HimEO>hAHSr?*g|^Pwt1 zu|{8ZWuIAd2;Ety4;2#3e@{0ha(#}EX#3PjGgmotxt7CX>Wf}2)Uq}ltr;?(ar%g$Q6O;t}`zm$gW=cjL@K!-#Iy#cgfT3MBOdB>{wX*q3=`6#PN$N_?7V3CZVMnAeky2g<(1T|%Ga6DAKTCX@@1^*=Zh zF#B6XfcGRZ4KB`of21f=aXGlrpXR)&#;0p?g1@TBcdGnP{nDRG|1Oa>D7;lbqa$M8 zm@0rxg7D1?q|YXp93;TEoAh4Kp>LvJdw+Fye-34nGHs&vC{?jmR85HeB+Pfz*8mG_eho^RA>NG%L9^Gt$Zx6LF|x}+r1HZKl1Fa zUB2D2vK+e#Hw^je)G^dU-UYw8BgE@28Y-%;jt;>Fd)lM2_t{EQ3507ZJ5^5yxw4@u za;`;+iM~cp@k}>Gx#^n9KXN{mku%h5t}55~{yRPT-&}xEe@1#70GJ4({uDW-k_b3+ z${|rgaQsh1xY8iv4Uu%dVS3u)5MGft`Xp&+^Uzs93{X-a(~MhHeS=wm91Br`=`V}M zP-91;?D_tXcJ2uEyq!InZC*3cJy(s(ophke=M4#evDsX*&ajN-jIse)g(bGwiKGpl zr*w>!HR8Xd-eKa zsg`I_F&7g^Ej_hFCCBdrH{wqv6%u}9iRn7`F*J+lIp1D@_mFOg60O9^MVlL;9kxup z9NCkN{U?*kUdLwyndx@P1fNv=!7{ey1yXd8YBWcG0+p&SzHjhkkP@JR{!f;vk9Z@I ze)=0#@)p%e?XTn=K!hQt;PJDq9I|EDf_r(6U;Y?rW5~7?usnJ45e{B5A-&SYk05~9 zieRL{U;9Ajw!D^3Og4Hr^g%m+w70TQapjR*1#%FU7W*S)yBJM^70RhI(J z^U+D_O(FHZNM%rK zK~V-tzv4=CF>TsuL;0`b`##Vte^8-+_4KMU_fi(k>O<+mmE5gLP25H^ z?kR`Y6oj9j2)KD^36-e>45!KaHvTkqNeuF|{k>>Of&#?nugOD1z_43x#+3AO({gPw ze>p4=II-S4J8MVU5aC~bRzk{xq6wW5v9<}pER z@oV_;l@Mvb|EmbgGSH7&v^AXHNw2=D*=U*Uayq(uJ?l2;)USSqoI~*c4`uoA2h+uo)wJMlGr(p8_eJ!tn z7RNMB77BC7OC)d56GU-FQv?rTYWl3hkY%VK#A085|M@V|H0l@B@fM5a3S07Gh?4(T z*u$q#xc>wIqx8ptt-xNq2b`f_bl(9}OZe7+0Pq{2Y{9Qle5B~7bkQlf{8Ar-0V&C4 zy7`L!XWyU6*>&`}XgWxK0l7nVh5A>3i>&wlIY;9lX&JK4j zcD`ryjMhR2FgbqHY*P`W$SpEn)*{%#OJj79{GW4Yh@2PE=thhS5Hc5cs=|7^XP&T> z*-SMBld6kZ9E44airtc{rG3E;?WW|@|CQ%)t`^mSO0j_E>=GT_B2#^k{X^ImW533* zchN@IUyahZhcPKhkl)oRzWh4l_}30ei2mdfZTT=arJq3BoAaBr*DQ|Kp?if{Q z`R^tDNTf~G6|A+(8disg%#4%62GuO2*wRHmQGE%=NDm3_`5i{VW){3<=>1huH^F=L zb!=uhhGDG3-qA$q63%Q{IAS#q0zyX2#jCNMD2c9n%}y?Qhl6&du$pQrlv^ za9;42EEa2;kvE=RHJvAygBE|i{`~J1zSY-tQ<>*BGl1V%^w5e)H)rZcTN+McO|`R=E5T9dfz6@I zbMZ4&%cDM$GhL60I1KLV-WpjN$|K0nvw3k8xsyI2Ek({`_+r-O5}MR&RLGddg53!o6#!5@Nz|$;i!|Iva!rpvS-B-&A-v9k75?9@?A3 z=yuq2YJ49!Z&pO+blrV3l+jDKShmbPQ*zUHCvdj^LQic*cL8P8iIjq;Fs`j{jEc9p zSm!KCeRXTbOR>IFi0L4V0n-DaCZy%XrhU}!Yg6|nekrT6Iv|aM2C<_j!QE-BM(dhU zb+K1&ub;}q75Iw&a}9aLO7XP?8M&6#_Xe*MoXsmlRb}O$wn+WEgj^^2t zyRX^nuBs|I@m`4$Ta1|9H`%|E&4d*oU@_Oi_C@M4l{GB8i*59C_vrFz(OAzu-PkE? zc6G2~>5xuy+Pmm0D>^K`FzCed5TnmwU}uFC4Tew3>1wOe5@JM+j)g=YdXAn+8joI_ z5QomG`qCa$>;%aHk)Dpj!{#k z1hCa(OQz1k-QDd@7GhvxYL^*5gvllIF=~{7 z%uXsMCfn`NERZ)1LWYDrPtDiwX4&CzI65XKa@vS_NN6affcw#EgfKM?jT-sP$OtP7 zi~U%(tVB53@x?_dn@I@BE&>tS8v}`U3w0_kLu!Q+>0+T|L3gCKs?YO;XWbx&QeaKGNf*7k1J$ECptbT`MV=jWk#c)MHLrhNZt#e>wr^{~B?#++WrI$uQVY8|#EwXk*hK-}*fW>^-w}m1(L? zmP45|-6Q_Qi&f~rA=khB4XN-@MV7Fwa1IlW2zO6?6FptTilZz;0*>{DN!VqWS2Iqx z(GCf$Dxq2bjHvAGb6#UQHs@|U0Wp?Z;L#Sz?iKkcR@9j>?=Lq}~HD)@T zXZu~)W!_{L3y@^z*v?#K=yEZrE&06P?7aDM#2$>W6o2B%xK^b|3QL*HSiW0C6BSgt z-U!pGW;0VPwn@cJdfyKWtJk?Ck9UUz;x&Qq$7JO=U%he`1!Q1&Yvyd0+pEUR;B>qG zSSpCAO_QVh`g&9TUM7--vue`bE#5PnkE>(oM)I=93v9P(p|vC=Sojv?F;z3D)IYUO z)WxVib2TeJMC(F7Ay#6J;3AJN>CyUN=rIda$jWT3Cz=2?=S79rp4^c_mo{rZ>TByd zi^1968l0b(dHL9iqq3WaBvJicyV#-U@58{-V2^m^bk%0Lg`m3AKR4dpUR#`Z!aGJ5`*Rn0T=@^3mY?_E?VE!ViblP6z~Yetup|@(U!jth5HGEZNSK ziDfB*D3`&dB|p#>Q@!EA0Rbp5uZx4MtSrd8ckY^xUrioUci2`-PLjGyxV*l@-5=qQuqLUASBuYKxL6!o`AP@uuzUR` zOunw@>7ER8MWLBI8}115)4Gv12p&&cO^Lx1*yWpb>J3@T_9gfEBL7w>acn*3r9kh_ z*j!g%N@9E=DnNiLJIj~$>iJ3v09^dKO+(X1sNVY2b8yKu-ud}u3F(Qk341P0BvO$r z@8y1tieuPibv^%hsJfm9$QjIr4Ojr$v6b7SKVKd%cVIjmx|dTLDUdYkYq(BK7bI{s zUsUk9FdS$;HCF!pT_?`4Vqt)cW8o{(_9J9Vg|r}ei#zetB2H?(<@ zLZ8&PLrm(RpsKBm*cQ=HXo_ZKWiPytV9FyVT+;P5EzAFrQ>12E$ZM@Dx6?N^pw0E- zIbHnvtj1h}|L^gg0@1G(O>%``GmYabq2y~Jy{zvq{K)X11z5eYON0;E=Y`EDcsi+! zNIopnoJwU9auH$(wwlOKtcK(p5pLQZoSRrg+{wJ_W;;DNO~B#ze5CY}w%fzwnPgas zQoY>9yGBxiUx0qvmpgX8RJ+ZwfyV$wQ-q!ol9=VCMX;-%n7zAmX-j#2&}-R3&d1+D zNS;XtK6jiV9e#cqUGvyF`5eO2v1|xcH9HmZ6OTOrg}E8`dG2H2^wQ39g>~YZ0Jg5He}k=>LM+jJYlOxZPYjP((~+ zNZ<$yH-jp{hB=lc9jA~XehXUDgfntW8ADK;gNw`lbSa2QItHgEmHz-#B9M=rKj~hS z2kHaA0d~-QahBw~4xWf>>Eu0iHj94ql-O8+Cp$9Y{FanXtOQyREIQWgaxUJX-0B zpgbJ@(f0Y#3y{ejLB^{%6aDQQNbE+2{tCn*t?)52G<^9u81z17%FUL}*8fRPvSB^5 zo~e2Sh|@B4OGynqiV6Bm3=EgZ<5ghhgLP{^(Z=l_Zn51?9xy?v85va4mCoY=nkvIM zpGHYqCFA=Fs-6%QB(;QrT1A13x>1N2-P_+Kc#V#cj<3S@bl?`7baZu{Y)>Wc_JL!_0H>>%c6V8 z`$y|3X*Qc{MYUO9(Z-ru=45SY{0w?_X$~XKbE>5Z|4<5e8GA_IuDk<&t%Zh+7dM)h zuQGa`RiVR%$)jyzsp1X1K$pSV_{cVO*WHEcF!7oT)ej+%<@wHu1pK9%7?cLST|0Bj ziT4j%S_aCm&E1-Kgy57_H;0&i1f<(9Jg{_|<)kZOp0jv%W#otC^?{`&`%CaH_u>(F zQQ53FRPA0M(UNgLN&Js35lFuLw$c`4rRR5Z+@-6l8zT#9f>x!a%EiHgA7VWY+yjf% zOVdsIVuNrfCr!WRZ~x@99RFGCvVA(<-mMJUlFNlob^n4;LHr zR;@Nu6B5uxvK04cE9sv;O|x5S^6>D;kxPaibr9+ik&+_u_}J!4~C(*s@mPNT) z$se<Q=^CK0;uoSsAQMrE@ryj(-$TQ%47eA6PT= z_y0msc9irLH~DHNsKm_f6%%M=G-@e&Pa!w|I}`^`xy4K8bDTfNjcDs<74kawC84-; znWzm@$Fj0oX;O)#J{0}kU4z0Wiw#tMjZ4A=n{hBRQ=8&X*bwuq7_*bkgfDt{eQBP~ zG_n)*@$##oxqQwA7-2ENv}mKx9pbHmh`p}r=LJu1E#rSiuV^HYK*h-LNSz3%(jRvg zb=?tv47;Dh=qjRgYDP9{+zH8L%&Nr zR^~bbns+V0r@40jsTY99R@g3F^)WX)ySZ_LI7Jqg<>M4z`MiNwp&f?6Y;W)Elr~fK zRk-G_U%x<-GzA3(2)@36!%6s@e~M=)9S05xUH5l%n1CAwHa0eZiG@{ISQr`_3fbD- z&3KN<7}W!c3HSrp^CV#qGF6z5h(5(NM|+5leg(=}a&j^b8BdADST>VR-Mg3TIGmG# ziIsLfK0ej93!Vu0I>^c{(yny`^#{~8QbEs(!a_#a@bG(^I1n!lfNUzDprF7~044Zs zdY|o|uL>S8(O4oksD!%d{lI-uZ+^I|3(PHMU@rZHlF{8qZ)tA3zW6?UkS^8=(?%z` z{|BUP#R+gp4%fy*Vi!tN?X5hNnzd2qScpKgS25UZCd1RvC7A6$IT045_bl-+c-G7y zP3I*|%e*HAZuuUyn)eQ@7HX%?rHzxpg5)z{Zr`=(2Xv2m}qR(>2+R&)CK zuBLad)g4^nnHoeO<%iq?R-LC8+sw0SX~0**-4T_2x<1Nc_>ILaQ#wT|&Z`Nj@W!z6 znROjJ+8{1s#LzLQhlZ;8u@ zVs{m3RX2@M`#xKIY_Hz>fth6)LYEs2Nn4payYg5Ba5!PDc=y%bDhQ{b_8)}d+6>6< zs71CqLfahHHWqhl<*c>J|5M%B-AweK@_+`aL~c7SZthfeGdb{32kt-lT)E15y;IDl zaiF-wr2P58)?aS%2h}JxHWq}+&oXn^RCg|NPLsh8h+t2IS`u<-NGO-@es_ZP4Mj4@IWB4Xm;j^DNm zbq=-jvWZ+K#>Up*J_b5Lpwo1Ga+1$S49vc&Dlcb;fy%N;-CtI~^+dO)yZa~H{x7?* zQ;Ng6s@R=A*~F4gnQQWFOUZ1n@u}x(+3ociZ3szQ1JhTFZV(U21HbU;g zWmTFs6p%dyM6$k3YCldyY1W{TPvN1KHnf_EG>C8g!jNsXNuAAq7`)^1XkCg*zkS`w zR__YY_j2sCUB=~wJy5Bb3$3B+2ACpVMDX8q+}ZnHy?Iv67!e?P?PGCP$obxl(aZ$bwMydVHF7#glmAU~43U>)RPd0*1{y`Kf(ydQ&Sr?KBu^a{aG9+&nphj^Qpfz9sPiWObC1z|9r~Y&pT(DWcv0Zw zSm>n+UiULkS5iftot_3_lUo`aXI_7waGNhRl5lWv(A7<;udfF!!PC>zz+k2O>x(#0 z2+T*)LkXEx+2LT|V4%Mrn4X?y(5VCI#@oR8%y@YG7!{&Dr_pp#BgHdsNxXb+UX(Oq>G^ zdZ{DpuP^OY=vDmeqV0`X`tK|LPnCrkg*qPztCt=$CMDip>VU0a^+0gFdgtdZ69_7h z@v8ev@uKVI>Tiz=*KQu#%=4Qwt$T2A#8NLw6|*+o z&}~ebzK>Zmo0`|j*qlu-gjJgJo;&yT+I&%2H6>W#4&^zkevkw$eTPJffhXjMUerYh z6vQ{>KFX(iA_)l>Pg3h|=^u}N{n}ZZ189ngN5T31?J;NDYnMV!idipxQZFtgL$xW^XsI?aTdEShySzu(I;P|5l3`Ju~%x$a)K)xVolWcnCp)C&8T%T!T9# z1a}==g1fs02m~j%OK=<9g9i!jHn_v!?%b2-`QHEi?)^_u6g9=b%$(i3yH~HZb}n!Z zTr*>DX&Y?2?>!av&261?&H5I$;!*fN+w|W#ox0!?_J8Ix{mmyUtqZFrA~38%oy?bRU^ zNK?R-lam7i3Yecie`2Hbr1LtVprVGL5xgNI6ZX2Yjud}OK(M#JfA@zbYYqxM2AZY#-^r7Qo+ABA9y;D<;>;_*TVYyB@?8#U@%m2T3T9i@(w7}Yy77eH8nM`LZiaM zJRPLaIz+pn?#>;d@wzX{E3Htp|wKQ8?? z$fd0@|0_;ZADPdaMd0F}mC`voX{7N@`c^hbm6G9g=x4TkL~2~>hyvAj<;NEI{YNy$ zv>FY}wup5jTI?}6l3V`%;H?H%ac>ZciKY!wDNDd_{^#HLoJ6Z8H`X>(dd_E*KXSZ? zz=c<&GpU=qtVgiOiZv+b)rWu^2WjPDN?L7BTjP95L3i`niNFP6{_FRN1fDm}#Ve@` z0#c@~Tt=(5X1X6FTvQ~y;O^hkfe}RLJYy+BB3r{fJUSifI%bBys^y!QFj!fZb=QG)X)jn-euLi_)`Q1;*c7qU6)bv|0W?TG|F z^yxMp01b5F%z*s5=j#4Z;nlJwy2?00H?^Y(4&m8X)D()iY3(&OV}gAKGmy%hKkte$%Cp-vMvrFcY9 zv(N5WPC|vKsObGA3|uG0h^yc1T+9Uy{pSw(f6I?7$}38xVT8_c^$+@-K?DgM|qZZeIEa(!XqYbf=&{RPf`K7xPN4lQ*eADS&6 zN@F{H3$l!=iEd${Y)xbFG)jnI490lM3(=g<5PrVN-Q{d%r+1jU z-NeMJzf^)F=i~kl7eFvBWh;?dOpKnd!E&LzAJ?IQG*sWI3zzH~wqDH4kS;VB;DLTvwRJ1;UPv0h;*6A>VVFbc(cXudm12ToyX@z8nY zzolJwuniJRWmN)+JolDYPF=x2BGm~y(CTQMQz>#;9oY0FifwJq-mjqjO&9G_B`bFG zKaQ+PK&AuH|Fl!&dM$s>;?RQfJy?E5}3y!(taEog7fhdq`E}Wp z?ettiurE=NHD}mvN^Rkk?9LXf@L^ReNOmXVAcHrquKJDad%!A|Q`+$OVI|?^0HTGI zTX7#1tm=+_$2z9J{2TQ1`*z`>E-n(jx_BuS z{Us~SW(V_5F5_}IwrQi9jXlwaEG8byXHx{^f4gFBJyBMdP$vk_#d$fttj{gb=%)?+ z*1Z;m&t5+2A8liOxaFw1y-~a3CSXIgnLkxvD};_N6CR%Ln4fB8Y-wThI{`Njg$gG} zC&6Tr%M~k0dW)5#zLE{6NJe=gY*~6>^1btLC|5|IZ=+i(bgwArMUzKOuWkmpU)}`Z zI^>4=aBhg%ei|a)sQ(6+O?+6`aw&!hjW4xCI~lx`ce@IQ?!L;)Tg~ctmI0mB9sX{s zkN(PQA!}o~F(E5o!x`N@1H=L9`F4_=61k8f9G3;)G#CmhzGLO#s1l7VknDk=X(nyI zslni_{T68DD)&X&WS_+F_b$k{MM#nRP|fviSs~8sRI|s%O!-hs{Fr`&p!!$1>?&s% zU-1tdR24Fx6a0z~j~+e5c@nxjDdn5mb>?<~>B;VElUm~s>v@}>qZC4VM;>aYQc>u^ z3Pzio{P`B}%xGFEFJ!x;5i$09J2#3@`>NGM&YrBs1w*bDSR>uml|qfemjmU*k`#gj z7@vI@b|w;5$A^U;zTBY=dYz^swvlmu=)d9LHKyyBn$v2HXU{nmMZ6!%63iVh-z zj)nRk`Zb9A2dE)&`z(y|K!iR1CYM_+8c2F6z{C1T(U1az;NQ;st&INQ6rn&B4+EH0Kj_Bi#yU6Am2ukW05RcKO)HJ(9_}=Xn6@hX52-#AXJSJkd`FGURS2MW8 zi1gohxY-%#Wb(?YMXSEMaMV<-vlEJ0bjC$Ym30xK9<&h%$bGq+iS{F^&Avv0=f6p2 zJNwnu=<}6^TMVW+F(=}BInZn|P^Djg$g!-88P=8jyIY&(so09jBE{=m@2w9SNTj<9 zvc$s=icVRI>kCcJ`Gc`;Glk?{E2tr|y5 z?vYm=Mw;d(pww?_rJ+9B#i~EchIZlo^cEi#HB{TwrJZI+jJ!BDt3evbzl8Vq&{-R5 zSh9u^7dUN?^65~eu+?Aju@3|^?*7D-aBr|6jKA~OPnuNS1gDcY_{^rF zzIqeITt2v zro|dGI`gqL+wtWiE(u}-2$Z04_WtNM0B1WsLOTPz+^QpvA5Tt_( z?W~RSqP^)r;#32JvZblcrf4=Kj4!|Z*wh`p6YMcOXQTW?D!S5{M6dGD_s5+yBiL_- zXLkVhGCOBy23Di=Vy|v8C*Mwx*4o!(nZ~b%-lTj`80I`M(dTxo7YS+T+NtYdl4*LK z@=pE6_Z|Z989na**GWJz&*V1ge@kKy1G&L{5%FZZFd;&XA|x=R%9chBNVv&Ehp z%q>HxS%N+R1VjT)Xc!ze&K5XovDqd`^SoWV%6!1ca6pDeUniL+DG83p#k%3SvWiLD z4f+1Q0ue*L6%h>pn)SCgI6(Pts&uXrl}k4DBPoZa!v{+SF)qVO2l~;|eP~apsj8EDjn(e^VTEyLMiBr#lp+uqr(JoDZ9Rn@2+_*{qLs`F4~ z(k{!>u;r;66{e*>_9y~$w@+~zCrlZHsWl#SRaFRQJK6+lQXfsF5;AzHN#&yky1HT; z&WO=JNeUW}t4tq`(*m5x3X%V@e)zlVJs^D;x>xoK0PgcReOw~5{>8i-%vH@ayH7rK zvdU_-qMZ<|L#3dTyLf;mW2Jp)2Lb{!7c5(2Wp7Xn# z8$G>SO#7<8#j#DqIj4BbyB2&)5cC(9)7mi=o+-%spBWc0KA4+JK7Zd8|AP0gY^$V% zj-=zP%x2hQAmCtKD=CUhHP+T|ec^Vi;Xk`w14o`DJ3fM!aa<$4VME`umvTnYA#1!1nU(I2$AB**wXBr# zR8|_iZ+on+LzQ&KquEFbbfr&8KNj2fi&w;mQ{w)$wyU#M3x67B4ZMW2OM|f(9ru;p zN;9S2Z}ZW9$V$4yV+*&%uCZNNA-!Z+bG6T`-40;%Vy8MqN$xH-s4Y{lXnQ@}m|3nP zJ3m3gjmenyCVpg#gNWPYcg&2OzyXsSezBta^}&-x8G zlO=v{kGhwLM)4z3g$rJT@vBd3vOZ;*w=k6P`LUTq`4v*5N6fDeeXoRvx#~t0`g2pf z{4Z?$cTEGzMhwlA3ur}QnNEY%_}OsR#ni}>Crb){TVqkSdSH<%J#)i@**#kKtdCdm6(|IF8lql$5r^ zdX1)g51}4SoJmV+#qm1KcpyOZZ@)V!{STrS^b5W0>ko>&9u%WKYMTSO8?;j8?`)GQ z6ViH#WSbfbn)V6b5U4KP>t@ww4xxgBAt?p=e02=fzY8`%&nbht5qNU<6nY3C6Whn& zYx_~;7JDJiQvaQ)U(Y9_GcBlX#11@S2-*E!Pi=OETtls{uCo~eX-7}zX87kyw~fS{ z8;c%$RXDnwr%4zG3b;;!_dY!PNp<3H&1{;KQ2HYt+q7zKjcO!jqswFz-CaqV_f_C= z>pXH0|Iudc6mE?147v|>6H_D7zZ>)D+G z2bAkcTx#e~i2Ix4GCB))h4q`>AdBxqkl%J6i62gJXXVd>%`-+vsma@R+syq8FQ(n- zx0-}$+mF5SE7p<48v&X2R_RWcZpiSOU3To?%<;H}JMYUv43@nkE)(cz_=4rnZf0xY zjK1WS`dfqk{sIZnZVuVYLSVXb*|A3nA$u~}Hv9%|Pto^lzDdg)NA1E!^WQ&)L$8oy zk2~xONdC}m3bDp_Sd$d-%t{ID!w`O5FIMz*Xv%Kp+2(%kX8h6#s({CaK_Y>s^{Y>r z_vAk24GE1n@-1fW|6>qzme_A>vf}%A8^+1+&4167!Vk&ID`L*gf8SGQvvQaZXZ}s6 zq_9xmW$>`F6gV)?c5|uB1u75q)yat&EVDi!YW8Y4-Ua;XiGZZKjf1mUYrb^4p#OD& zd04xC)?AgqqN41GF=-H}P0#s@Qh=vo@uQp3be5odiOniPtq8 z9Ujv)XM2bZM#m4}&CH^Sm&{;csPFzGXo5)YkfQxryn_C0d(68n!U9uP-;)j5I?O}iZ<_v%OXkgnx^YqM~V+)$ECq?N3AUl$3iuV)6B@= z)Qer$*x;%ULNvd5XZs<290LgkZX!UfaEYeobg_@oBtE=Sg^l-QC?H zC8s@Xge;?(33dQYf9o1ueoDGp^z(V4_14_*7dWfj_lMUtajQoE1+l=$B!V>}8nFb^ z)7xMue?RWV%A=tDx?)=-(l+jP+e6U|1RaL{w))k9R+tv(G0$7tH(r9 zBkB>1r}NKJ4moi$-CY5{)_;0~0IcC2gZSw^gQ)(BA~}|^kUW^ym;(Hqem&e<)X!hd zM&}sON+p_Z;^2z74bA{#oQP&-57Gh8?ru zLLu2H>dbR0bLhnQpAjBG-3M74@Jz?@LJ;NX$PdNGomn-@seUyYQ zDa$z?=k*V$v+*Da#3TYTJ!`XVU0V z_zuyd%a%Xv!Zu3-ij-v*b+0I%|9_5jPYLTAq>TDJ7E?hi!h}L!yX|YLCmL+E3eZK4 zZ#<^K;WG!qr0UA)ZkruZLw$Rok*ufO=hYwWwznUlf;XdY$s&^6iOa(I@6- z?-_1%Vja}8nm~Cn0WZf2X7*P%do5XRnZ&%VcR4=VF6j>?`IFQN_nsT|l!%DEp|6yr z^QCjg55dF#77sBC2HA#i9)@}*t!A21IYZ+;WBs5nn$v=pW!7?Y6J-LYEtzNS$qN&` z8vZ}S7CJyLCl<@z6vQQDfD~W+Z=qO_ejDnb+$?ofg=;!fWW#0anOMk+huoLXU98tga*3WcM zf(FDXP32RjhWom^vcDj4iFV9Z5E7V@5B%x(`BRCTA^h-VA%sMWUwc>0R<`tR=p84h zUlnc2pHT4++^Wf+?UKflcq_lcUsnlV$WeJ+S(~?}s`%}EFL-#FDY%3VG{)+X_zJ$d zXvQSfxLJ05ecTl_69+F1yc_&oRDel3Wgjy2+2+uiPDEb3TDjrTN=}b)M%8#`8U^GH z`N{>--4G;Cf=9z9>{mk(X$eDI73n>TlxNmEem>a3cMLIYEoEFmm4v+s0 z=xgx5!aeo2NDp_r->dZrcDe74KZ`|qRe_})AHXWYzP3h|(p{^hk@PF3c{FN1r7o8h z?BHiUl?{$e=)0sOxinVA`@4G+cKYjLJ~a@qiIsUXy$-Lp)@e4`P(FnEuZ(P&nzg}t z{*_j33QOjzT7eSwEp~q>m2!Zh&t(A?c(R5zfoP(h;uXZ~frZB1G6*YVdEH$7iX4M0g2T01X@L&bMOO)q@H&7 z9`lDo5DpheQz+EUvCEEsT~adX?NWmK3070^D^fRZPaF8nJo7_H^xep>sy-JhYTb{o*JB&f z!nqyjq89`9bT+tKzMm-3#<#GoW=@!0*}=qU|6gcDkgT%vaLtStJMz!QD&49PQBco8Mf_KX#<31ZX#_1s1C)^T#1|xe z*t{38Wu2I6t2;~ywp#5lt?RT=4|ra^V&AwYvS?WEulJl~H#?or-`)?n;0Mt}g(8QA zB6rE>&TP~ZCyx!b?;V{WF{=1fG&3Ssli^t!M5PzqkJkxq?H|VwI-Gy|hQi@?x=LBq z7ADdzt9t||IK5Hk-K(L`{L8OqmuVyg>&)(>%j$nvqW@E))!$;KRob|%7KdZ{3EGbr z<{yWNWhF_ucu%qZ%WTTS{jVskSY8xW#z&o=PEGBoJHW`&rm%vJj`ru>D4!v@3`@UR z(+Fgwqy!o%O?gDc5CO&5{i2*1z3oOe5y2Af<*x9`9N(w2ricF4C*(dgu^pH8-BIsm zO(fz)trEOJ&hgYEJa}Z1a=FEYVWqO>xsT~aO?;K>q;@sn>%n%Qmt(i9ZcB<)gG}1h zp?RT<)LP*jHN59S64^qHmm3~6%at!1$o>+P{f?bs_E=!1q8Ji*c3CPO2#jlzzCIqT z{H9`M;N{!Tt(PaO_d1Qf@3rDA$kN;;B1lth+meclbIMu?$^9W?98uMT{@jE}f*bTz zN0mAxC3L)IPhc_~MiGrY;zQ&7!7DX;_Qd+&P);%K?*~sH1JMmv#dk@+3589z#Berz znAnPk{lvHiDG+$w%Co`Pzo^Ot7CHsDHRYbCrBeXlUN}I!{I@wp_J}(;I8dcm`}h$M zY5&u&B70QW=+&I)DgY|c{T~C3w-*e#x+BlE-fFgg@oV70fbs86Z09yh89)*3gepB9 z%yq>TY&o90NS`HQ+=N@d?X4cGEpnOVQqb%HxIBUbaUpS!35rFDsNGRLcp@b zmGNLMbQhx1XSZPUwDV7&*_yGbHn5o*{M5))yAej`)!>1k=MzKKVDESZIINkB97Xpc zmGTAyPT_)%JN*^D`kRhNIbAj95|0s7b^MgyCY(0!!bCA0lElk+FGb)H8_CWj+sseS zKwKxsjJ-`#!q3#d1W5Wge7z47?H6vw+Zow-2y~0gpq=dv;kC%>$9MZ5yXS7oQHiPB zJva1pQjzP2ACPMCjI&ztweufp+TOjRml7%(ENh~Sc>0sbPH%V++5c|0mK=>CEK(-X zjri_1qO4~8mR?RuT=jcp(3)i|hRt*9Xm4@2E1!M~ek|?RZ*dB!D^c4ND(!O)4R^e= z?jc1(X?3Tr*dRv*B9YNHhf!BV;$R$;zQs3*0o~gfOXq$*+$@CGetqdIh<%%M=ox%h zM;gEU4gy%wRl4bBKLU}7SRK(VoUvc)v774T>W3g!5Ps`Qdc^d=SL*ltWd>p`p7a5k z;@RZCo8nn7z%M}~6Iz^^aRnH$U#c`(#upa&goU4*mH;DB6H7}Jas~z%z(oNVOl`i8 z&7YN9$oeoy1#b4IOO=aN@72k%E{>M&M}?ou2I&9__1(>lPOE2KluS-xA-DZ%2S6og z7hln9^%QhJS#1(49hjfjq{VL2WB7Lycy=xO8krjxmk?S-3eJrUmC1aczb%thPsrQY z%%e#Fb5LV*Fx2uC&n$B_Io8Wcb*Lh&%vOk&w^gT(;Gm&dn>d`*-XIHy!@lHi^|dYy zd)z$x89{UX)aYyJSC7qjF(3NO2+JgCHa0NlLIR=s+$OiHUoy8e{hZ}AM8~W(jV${W z;&QM&3SAoRQjNwjd%rA9?0x1Qh2lbGHRZK6uE)H1z_7uU;oX6hJ>x`#!!yisKlc#& z`XH~EXnR5M!Soj5q`*+|hYxMJvZcmyWgBo3!0x6w);hXfOpbOd(<4&s4OnqJKP&ce z2W7{ERy&bC3VU6X%>KUZgM09r|H}Fzs5Jylsq^a#dghmIrGP=S>wH#jdEJ6;`Sg)t z;lj+cVYwCq<7k85Qj7JEZ2jL+wL$Gv29CYtua z7ttB5j9hHVqsm<148FGW!FN>y6Uq~utWS}rr&TW)6dQKZ74Ft=@)w}KFF)8kltl|; zHo*FIxcQ3v?YA^e-ij-v@RJL41WT1b=^5A?NaXmbIYH)k5%;UtYkEneSjx-e?p3Rs z%^UM3y-#|rYX9K^K$kc^AwRms*icagWksijr$zG{D*n+P6myYVD(ufc)jcE-_s|Z1 zifTArF=APK=$Xy5Yde`spPhR-?fKKt&4L*Lw0NOb;m4>e;C42J@QP8*>hjiw4Dwho zB?1o$h~28tz7X8V*rUW_l8)g7XUXZchF;%o7cw{esnDqTf^#|ZFdwHEF~hm}zFhP8 z=}4O*NoFs8_h6r9CGByc?558YqmE*#oTRHOe#Lbwjss+1#Ri=6%v`lu_dCU|kCwj1 zt*xDM8+yA4ougE{ORY;S9u;o=9Z6j}qH*2b(Ej}uV3y;Omgu*BadkdP!-y{>$si-| zec{RwnUr`}EJ^J7qI^8Yg%@LEBM_e39_U2l( zYePqRTu<$4)(X~O?E3sVTd5q$PlM!aAz+-vn=Ku|dHP2*b!)8Qdq(1zk&IH`3mslT zhdm;N%2y?qtFeZ#^GJ{Z7{DPX67jZ&2!&jf1d^0%r@&i>i@qhlV{KRN(0*XLTTGvQ z*v8y_#Dg4TisbIc>XWv#%^`yx2IP*JZBy8fZcEJ=A@N4P^|@7!a$3s<6SP9f5Eu>KplBd~btE{`Uc3zKu9*p!4kFRbx8CVk&WRG2l936C zqO^CdrCJC_DS`I4K#Qm(yP!sE^=7#U)$5e`RF~3_B(LY!U+a#2TA5Q0P((uN3v7q% zsCJ!+e6vnl1JkoF2K9xtR-_lzKw6V5ryKek2QYd78!*v=^i@7l95wIPt@zY^^i1+H zMiKEEr@L))83c3TLoh_#f}`|qcku7wtyc8r4Ip1yR`W-3=L?t~49pPI4)AX0%;Cg# zZWO-1GMq7a)*TmrQymdsd+wyu&93{GApFzDV#CGT4B+zry$srEC{a#rh#Ki9i^8c3|Qq2_yD z&sq2^X({+|pDU)Ub3s1F&9qH6j1nD6QEq|}vNN`Y+7BW{tPG{pQ;X!Rw)j|owQD~$ zpGpi|=)#$QRJseTu>@te+yJ`eE;RmusZ2WtZ}> zJ^C$A?;g}* zyAQ`os;cn-aoxfsNAc!nBl>u$W!9VngNO?a1n^HSI2NXgRe-;ol$@-grnYr>IK1UJ z{KyT+F|rB^X(Gj~tgVOs{K=smfI|6!S1Vs=#gSTD%MsYwvE`VXo6AcarNeB-3c0Ii z#zMZkK32eb87a>1bLU*4*J^EJ1H4=!LPFq0QNDWzD10;-)kQ_Sd&2SLRIJjZUHCFu zTOYrn;>mW87_*(9p93%Z1w1^QNa^&l&&a<@wH&0-7X^WZS%TAAx;l%H)=rce+qNiY z6C)|#KtfvccqRDm^k2G!P{QNqxJ47uWiQSJ70-uxeP0E3o8d;$)orU4rZ?FZO*gu( z5I9hOJ9koBiU7N_xN${fAN`SKOIiKI=IjVgo@?qEf1b*pMUv6qBczjFu%maEg2~8I zOydht_-{s_8D1`V37Wd<%fO*i0{scg_BY(yCP z+H{_;*RSn`AP4O=yTTD^3lP%Pi2GB<7G;)RdV{4)IGq2CQd(01F6dX%b@U-s`JC&f z{e0alhfAxdD6tlcj~f0-@2Ze!KdJ;{^RCouuJQBWCZ%aFy=C|6FpQYg$*27@GjRoJ z$wQ%pOJM~Uv3x80&r?U$X!Xo_eq;^J6Kq*RBOub-2a;PX`Zk(FOGY1ymZbWPTK zL8lHrGw*R&JsCNpNCiJclk z=iG;%dAUAA%tEWt9qg=&ykAo5Cj>DLKRT;pq5Ds$2_KA*2a&R(iTrDX6ol1`BpI77 zR67bWnHQ`EOjDOJQtmatE{A?q^&J;*fdT1{NzMZ zUVg^1E)Ws@?E0F8jxO~3cXL}?W_o%+SM-)fo{^cEtHRpI=q)L!@5$=N40${p91|`4 z*RRLt=RLNE2)NP&0rC3GjC$zj_0`p}@o_^VBhlP(bB@%wI2AyDBo$jQUjZnVdbb>D z(xmCBspJ+j8yi87v&|#Q zNx)wMAOqjsdGck*#~RDa%L6*3(~Um4d_b=WDBD1+787|B6B9@I1=;7EUESSradDTP zf*H%*vX1|5P=c1Ui6RE0ir-71bt2>V90MXQGNQ^HL|**Nv{nn#Aka&Elk!8tuQV%} zleBH^ugqglq6h`MWmuOy2aOBJ?jgBN7b zmzL|*a5wk9Nbof_h`ZF{`V9N8)ir6J3m-?$q0vO8M1o3h}VTQ+~6(1LuJ8h}0)l7OX>jQ4tYS<{WBjvwC1#3yX2f zIuQ8}Nnp2!6CuaK#>IW%|0!n|xZ0`%0|PY}XZD;6YyqJ$oPT&YAVWv_>($%aD-H&O zt27SQ*Sn0_`1$yd1F(=^0ja&iUYpWpo`ets|In$ z6+3KNqrsS89pX_xFgVy3MWU^xrKPQXuc$!qFNkPM)KgdgLai`r=k>zMbg+~nVn*3V z4PAz5c#z`Dq&`GtYVMGXS9LnC4D5Ok8r$K_IcX+V(!{1rr)U|&K#Cm>Z+PI$IZC`p z2@K}uCqL4})lW3DxaeHn?VAyABF(h5{yM!{uo~pg;xxb7gsb;rk%lnS4MS8FULA18 z$jTJJ`BMmVMB9}O($})yE|AEr=x7xY3wKTm$1u|G-kwqQI3K*-vZa+}f|bqU7WaPs zcK3k@l4f{^D5;MJW{3&W>`S4fE*-3By@K<+Uwr1O`@+~iX9Z?&c7V?YBm~?`z{oLt7I|5OMxr4{`I5i zjXH=gY>(#-x>FEhC3Bs{Ly_mv)7M+>KL1)sw|y|6^#w>-H&y-i2G!@Wj2RKr1tP;7 zBQD?crv#iqftVRm%8)NKz36(|LfzWtQ_<2!NDa(57p7%N z6#B942r}d?gMoxFG=}iXXJ18_w+tn9#)p?XuY$kGQJUSNp~TJvwp)kGWr$# z;Jw$SA=RAz=_%eFWwSEQ>&V0uc@)U~rxKtTW&JgWW zPhXeenpVDSbXccJfZ2dFo3pp?bOvK-?^XyzerzxR5&lp2GS-?i3j_H0K#8Nl$irF?G z>n+6YIy}g$fX1HsX1BSE0P$R16){HKYuIe`;8rTN(bdVx>iI1%FE6+CUkEStnj{y&ioZb&=;rN=M5MS{$Ssa9;aR^_pY;^t@@n%V{kOq;|Dd zO#{Y^LKn)?@lyM7YOR*g7VYMTDQOD_+9?9jwC*189%ktJ9azV`=pOl_hKc(!`!edQ z&!-3%VQ^8WVy2oE{v^M^req}59}A>R60tfN$jkLt(oAY_Cr$M)YcExGn_-1pD|A!3 z`rM-h>g?@0IB(f6B>kpB(eCp^gQc8Fh!sq4A(=;&J!F6 zmsLM&Kp+te5?-!Yx`oQ^dop8?$We*XbDhBuyMwP^R^@vsG*IeAKsERJyS~^?y{w|Jw5D-M#@UD5?hypup1Z?#;jjnVN04QcbRM+ zhf|-8SSP2wOTh1#9f^|v+P1^4h$rNI_g-{n;(|KEMX_N^ygs~!0U`Fvl>`oi9cZ{? zGMEP+lWlpVSlX=al>D%`KRBxJj_Aydc`k*{v)|*dL}hpUbFNm+?8u+CEk1So3m9E1 zsjI>>jHh|Z#;#!55$B;6aP0thf&c*KpLLz%6-fSFv&IwSl1 zYn3O;7pA@F+uGU!o*){6A?SIrTi!Ceu&}?irKYUhwPp|a=d|F}H8e`XDt3IST3lR|mX^jw`4$<89^NxBJWPfB4(Ky_zUZ=VTwbQ8roMRb0x0}w z!_ENFWNE2U%{*{WNa^&Omuvg`mZ_mlb#>E#+evLLP!aO+@HpFb)()DOm;lx%KxQZa z0euQ9J{q71uBn+c=WuM&@$&L=!ci#}&#kJj=VAih=7(=1^Vaps#Xysj!fN;;X%GR# z&))*%Cjha?ke{8Ox9~l!R48h3=??AvN6J1XOc~@) znKi`M?!25W#r#KDN3Fw}ftfCYaG3?@N+v!#ir({y8cADRlIVF)S)hZ^f)m81n?wWQ zUpXvz(1BDkbk!te(W2_*=oK$D#`{g-Kr!I9pf?7fHyvT!)dnrWd}8366gV_oQSidp z=U~3(N;pYMC}kNps`MH2^n_gL6d8l1XXo_Yo%l$WTUdsfME=peM(dx%Jm#5z>sYC=RdoEXXvl4D_M9$JOGron6qWEu7!TWNcEOlrq<{}fH>NNUA}L@ipe%PgTI~1= z-=qWSG$<-B2LhwDjWb_4B_VI!>e8#kF2PbeMt8LrPY53U-N(j$ZH$kdlT)MP&kuP)rH% zA}AmzkgGD7yqG`?%(*un;-BlT;kx_<+q z9Ue{`MFcut(&Sx}Cccn!dQxA#9iSuXKaSi%?yoo%pWs|wdq^0dKnW_ke_niBr-~_J zRxNj2Ho$fGyjFDC??w~ktS`mDcj8;G2p_LLtHW6Gb_puW)l(U&Bj^=o*S!Y5-sI9v zqg5l7Ty|f@&@QlIn2fHA#3#YIa+12=iroxP!1^cv)SRMlC+ILBqE~w zjJtW`f82hT8f^r7H1Q>i{!v9wL(l%1%;hQ{-`u#gh~#PH)$$rh9u1uJ_z-RL9b;5+ zrP34W8O=pnq~P*6)K`fJ*`3*cv?^OL zl$n!z!J+$4!O#U0CeX9tvf0fSVh^A11=HvlvBr^DX~ziAa4(WRaIIDKM@@O)83)6n z8#}WRfO8=^KyQ*`C-ScO*}FEjg|-%asC+~{Y-9IePI}S>R~y3)>{)|aryey3ME_=f zWQq$re>n61S^A%@hX9WjPzNAXU-^3pGclC|?PPYgKl%5g$OO+Z~*B8(4vOp)G#1q3h*rJ?f{pzf>|JV3SQ+65BzgAvy8(o z3l*~3OpzuhCWb1a$%K8wpNP;9Rm?!-+}zixF>d1Gy_`&?swF_Ty=bpmQIVPT_p>=2#Ed_&w$Jid71m+c~2XJkW+&{_SHtr|>WGUUwMma`~`Wu)bh5cLsr46JHvz zYQCg(IddfeiJ46hi0s#1yWN?A3TowTLG`(KNFwkZwxhImJFK7dn)yKGKhzGGMq58} z#J}RsOrAHWy(kbuX>46~fBN<#_~4qGDx>|HUn9*~io?iWRwvz97sS^1{(_|A=gnj0^6coZ2z#`>;ZY%my}guG5-qxq z8mGJ<-H#?=s|%WN9Dj_tJ%S>dM6oA79KrC+t}OpNoge4D3stpX<-5*mv9t#lQ>Ey? zUCIdl?e&RNxbcUhazQZ&k}-N`YKB~{%gi3&Z`U2PeKj@V3L1zqvXUdaBEipx;2EG;Kl{E8kMMpN_=J3>4e3i5t;jz7%&MTwZCW}r0w&P^R+3q0@c+d? ztAK=0!sjeW9jAn?rm0yxyHXL}((tWsqj=gf_C0`60=T1xxbeTMZ%i0Lj&e4@ztb4r znTnW>FMOF(n-NRA?i1>j4iSbVM}aaVlG9&3zwKO3E4Z{tRlN|lLj)UK(kW?cmH2^` zZ2`7`(}=L0*R`Ss?Or6hq`xRw?P)?SfU5akKeGHulr{E-YT#&^g^9}%;HkP_=8K6N zsx?`g+xwA?MuBLIrq|F0x9QuWe(b$@V~c+FbAKRHdrTte!f|T);dC~19WunWMaFsk z&HqH>%-RtX&RpE4RRh82*RRI0FNC@iOY`Hw%2^djgdotxdr+WZ6sF3RH3OoFXHd)t zgSS0ds`*B+{)`(5{_tv>Eby>s;rj`$0NB1c>L*R`U{sXgR@HEx4O=!ZvbQ8HjPMKC zjfhnenrdex85*_S)t(9m5rnsM2xi4b|MKDW(ee7{3hX!#$T&HKA>6XW`oo1Pil?SZ z2CjIMCzQxa%K{`)o$j+lC#sp-SSSpy$U#H{m+@BM@bGcl{`oR!||%%2&~1Y-xB9 zmbgcuD*DVwlnw}lVqPyFMJ!FXH??>iHD26YHYJc3{kkU0H`ULKUlM@XE~(MUN+SXb9m+#EzM!eSE#w=y^x_!5MY>?0mB zr?+er)@cmdMb{O~^C#k^T0C{6%dugmQ~s#f-ap9X6?J$h-oO#5ou$HH;wcv4!rDqz z?voqB_t8ksrJKZbprxgU;M3%ce*e+S_ODfsE(N+6WMoHqcFeNJyH)W3Sd^m7IcPVw zA)sPaRu$NF=;C5V?-g-0S&ME#_EdG+YLoPQ&tDTJ-_H>BtNk!qupFf5Mc$%iLv*>0 z1s*KGp1sC+<(fCuwOxNNxbhbvnu`27<2B=@Mx*a}WtvqLku#`X-spUCKVSr-_alYE z46m=(vi;e2C%d}a+wmN|u3tR`7W2H4xlW=fj0e8Rgw7_B*M*jI@>k`3VfMEy;YLT< zkRMnPPxb33qv5SjrMDlYj@^6(G@mTiNrZkQQe4n>5M=-B_5=)jX}$v}j^qWfME>p^ zQ}AAGMumBghS%$Q03HLlQ@8_zy5~1RO3RC5+Xu%u_@C@7z_IEtz+Z9b80?f>%d@RZ zj-8PmF^>k#kH#Ecj>zlycqyIxj0(ep=JNciTb{293m=+0-1L1lTV~qa`V|$%5{VeU z8#yvt3$mMResA&<+VfML`lxa3!8j4UFS(#;)M=Bp;G2ur_STZ7oc7BFK|}5FR!{ZW zq1j+6#8}q+iQ+XS@n$tP=ugIB_L05oxJ)bJ0#OCuyKI&r$TTJyE==z2qx!^hdAD(U zf1fxiEnlC$2;8LI@7Npr*cg+%yo`45B*t#ehi^(z&f0}S6hB^HeRjDUwSu7oV(0Nr zw_T@JV!BH79Z(>0YDWL={|x3b_vYMUK&j!B7o62S{-2CS^#odXW@^P1-OiPsSm>%cyg(kDS z9ICpTbSMTHgt%s8)NTy5hILm|7B>i#FTJGKiuxq{s{E^3boR&HhCXZ0hb~zl=;Hmb z)0O7;Jew<9R(Wc$|Iyu}oU1)$l(X18DdiPEJNn7z@F_scxq9p&umF|+KU@Huj2DpM z3yUg@^65X{#1LPA=5Q?9ZBab#6X@&?u12M;Vgt%oc3GKerybv9Gvs{1;WA#7VasN3 zk{@Xmzi_4O326YYw~ioFIHO{NSJ(FT%)5;yBM~EY7F0DS3-;+>P|wcKLe&hqP2a@p zm6}`NPS?|(&hG0f)kw5)x*5iw_0NzLrb#NI5Rth?k3~xT6t?lEX|r7VKa{-%P+Z&A zHM(&NfdqF55Zv7@KyVEN55e8tJvhPLH9+ImXmEGe;O_2!pL^~-_p7h|SMR;5u3}TW zsvCN(z2=-_j5%iIfaK{&o&on_dPb=(|A=9;?AL4i>8@3QK=cRou74_qzrmra&* z!A6~j2EoxJgw`^qT71&pOYOU*q9}#zJLd8kPnLc!`|9wY#;#2QKzYUAAJHV=X4K^G zNh+&* z!el*Xrzy$dGGTUIp@myNt`PbbcCc(LRK0@_D67h`JZb8V91&DvX^Ho_+7wxiPVvNe?GSPkK zX)n152Ja2&B9rCUSjI`05qC)$A@H?HNJ`B^lomS0sO;K9({=6XCOEA2+J|XLzwU#e z`ANJVyqni-N;wJorjImx!!RTN{pJ$0h22%MxPEYNSo~O2vlsKgQuTVlewqT~bC5Q# zx2)*Ko3{mKWpa;ae=>D$|NN8|$Rry+xJ*lAE@TQPy3$5hSJ5)}s zzaNj)`_a(XCyFXR4;hSjaA45G zd~9`a@JEh;t#ciIPto#h=JsvB<~bc1I5&E}g3y;(;g05aN_*u7?eq zERgse1H&;Vr#xCS&MR|O*CtN;%fP`cHYS#~=e$=FS8nbH7?|hRCx@}F6GU*kE9VU| zk2k#7D+MOA05*TS<xu=*v=+D%pQ@GB& zBxM$_%s&SBKY}wPiQv+U6`0Cy<{{S1BuUC>%@D`+F%w_T)5b@-kOo=mN6+Oa)dO$J zE6n8ql9OlJt+SdjuPnD_r^rCkm$c!zCZQ*r_1oN~yMpUevi;}LsMD;&r|Mfi$&=q1 zA&5E6E(lq9w($SdJAdb_N4NIU z#-po!2lv)C^$OqfO-Y}}WBAKd6dXa9i>>{O3tHX`PES`^SJxkPbrA0=S$RQS74oa| z^K^Ea-;0aV6cl*;-Kfre|g}@+Nk6ak2z6q+(wO4nc@H2vLAnmqPn%_ba6HPUK~D zMx24$1`~pE`!Ix2qR+Eo<{mgRD6+#xvMR1P6tnYYKnWkbNy1UqAsIZi5xgUj+ju-V zF&cXu2Y|gE5`n4{b%)>#=+T%U9+F@e0lY3Rk!=Nu6HL(Kq zVN5+XvpC7|l%%O!$(FP9ut{lt8+HhP4=)buhDg|`Y9c1zJ6hqx7hln$oskbGcs{lM zN+Xo|8ghMwlZlJ@XJDdkmPGe__CcjOZLYu>L%oJTddNV*)J(cR-^O3c#@T$moP z)_SuP1Y((Lj7VnjxEX(GZH1NRB(-3a6KFh-69^(oe1iz!;_3bzYk7W3CR>Fj5Y~u1MHf54*De;8>n})b6eKD(6fv5(@uq+4U6%Vhrg8DZEOSzGTTwa)&Ek>NppW?)xiKsQ4c1{ny?@Cl>uW3?e zJw{uosKkTe8+qr^X@Bl@`mN2qCa)B-HIU*pz05y8kG8U%K6SY4MueT!7sSh%KqfXP zQRJ`Xg1Su>PSdiK3FS6XNe-7%n%0*L8-hwdx|fu?v^9N~37+)Nw6HSx$mD6|?)ZZs zTDf0SD@?`ZrSd@5p&=x(QtUf+`>Utt4+1$g2lwM$i~8_4Iw1p;`UC0Rjfz4F>cmUq zx+y;PE4>T|as>i!5rJ{h-QhGYi?61>Fgml^sbHJq!F*&Ha0EIS71#HlTHs$t4>6g^ zSUNNQ02aw><3q-M0j1k*rIB9;go1~d_GwhiL3ld~iilK}%_^%UXLWTH2#^{c4zY^G z0wDC_@v&8q=sQ3*HU7_^jusUdXUoezPVYHMYI z9}!0wI5-Y>XHpQJrD!Tew$Q8@ImrI@grZ5?+Q#O!8w11Od#-$zAPN8ijVcK8Y>($) zY;PyX%SS~E31(7AsX&rX0RXm`PVm_p4yMC*-vm;wyc%Q9Eh4LC@McPy`lu6p6v1<> zhSzs0pN=Qvpe?X(O^YjPNXrF@S?4eDx;EL?G95T*xR~pu3v4c7VIerbR(t$mt1*!$@w|FO5x8lpxwQ~#y-hdR_S=eVvI?w` z*^60+>2md`9&WPDdoE5&92`{kd8MqG>!-mmX?wQbE}LU>gawMdFbbouDu%c{D+{YK z))%Pk9#FC#YjoZ0J?e7$pvaIal5bz!eTu8D_GZ^gx2`$@X%wFCO0H{|e6rlQcvO(* zIT^j%h6#u>XTy3Asg&CV4)p1`Dw2)q_`xm)AXR3=<*g(&*UMGyGrJNfcS3Zjv+6aq z(OU~Co=TR|FX&uu3m=FvhS}8D+=ba&-c@*48~@UAJBS(^bo3J=(G!T15!tl5TDIPj zQ5ge0zSLd4y&V{1)VNusZPclYXLtzQSC zDcdd+&o+km2IZU(Z|N9t(aq+ov*w1;tfizL z3|#V*#;Ml#gDld&y`<+`yI_8X_ep(PV5eFSPRe@ybub}+On%kh`M;A)Q=Y68NO@vY zes|Z0wtBB8uCXp)2~n9@uxkPk%5kaA;&r2k>EYfD^6XidANU8r$U&Lf*q~K4ikHst zaGyb1BOz|PIH8}A0U{zD6)v+n-f}g_w2q+a?CgA(({EE3`dz{K`RO!tr7gyJvR zulqX?Eb_I4w26_JSWrZyw=OXBxV?Mq*c9p#M7 zqze3Io0D$f87Jm|J1QxI#z#uo{g5I1+up+I-5^>2iS@%+iS(3 zzvao_JFR+GHPZQ5+&vb1Zmx|*WNjo|wH9khQY%!42G zx7KSYv1WZWFVCcoSi8=ly{&NVYDTy9-d1E%C4>m!%{duz14y@8XBkU;Nz_D-gPmXg z-UOqqe;Vj`8~SH&Be__xHXvf3OLGEE_UX-e;glHz*g9Bl2((2 z>k{i{ymsZg+#o~w`diUdw>Jp3xlWVpOA!euXnV0qbZ+|Cyf#h0!iTEnT8*Xe3h{_9 z-yb_rFxWmsND)txXEC|)e36HlZ&7Kv?%2Sq+3<0Ftvs5A5q(!ro|U{3-oNlP+amCt z7j8+O*7@aJwwMR-I)qY+_SVlWV!zDO&`Xbp?x+w43#<)SH)o7aZIz@6KGSH43u~Mc zM72iKyStm8uFc^PKna6Sdv-ZP<8>4~7N0DR3k^>BGr9k4whYtF&{Q7x%%3j~>80eZ z#yo_WxkWpL#|b_?k~+Rgege%6WDsx_!8dpJ$zNR<345cJ=%Y5jj(hXZ>>H7kE9^fc z)W7?4pwgx0y;acAhJz>aco{CiD_5elMY#v_U3`-r{Y%-eoUm{9FJ9VgZ< zGj&8aGGvNJT-t*@|{n?`wr z=e*9VLavgNn@Oo$#X_nBj`Y?Pe0ShPy$LQ7{tPX)OSuHg0wH?BY=2I49Gyrg*effy zu82_13cN!w%WyE8k|@0`P3|iSqNp2m#sk!IVgfp%B>nP>PR(f(Wcv_NiRRN%w2h_1 z-*0`Xp_szN8B1451LxB)S^NL?G=cXe22AZIw!NWx3@<5rI@NNsN^vk4_iT7bO(K?| zx+J(uSR05fzB4Zr!qYNkTBHk>>)|r&M-;%w??H~3NC}66hH-bpr2~mHg7R_@ePeS4 zNc!*v2KsB*RTtg`2)$g8vcz=#kDwPkGg{GKZ;Nc~y0*&sX<%LDJ1>BqU82Cpkd9%F z%%alK)Z}twH_9=V>CBfgmt?r}3+$|yk)b7n`ZoMc;D$ZG zT>IlqQuA*^<=WKPkDInQI3~V{Ck*j~&#hPBM>HFqq-?_sBGznf`=Gb^Rz$E+$|>oU zPL-4%Pi)X5#R(-ynLe@*;7-?MPHx5a+NK^H(Jn3l0Er)|D&+Y_#Quo-20}x#XM9(h z^UJPDVL$jTPT9@ZjQkl=#kDQ?_G?SLK{5(;K4O}*!<7X91o6Qj+V7uE{d8+(C${G~ z%UikmMh5DeBo;0~q=DA|0}}sN-ZV}gGUnoLe-kbl>#{Oej-FIhPRmpw`x$qCdz7r9d?ee!q&I^c)7GhfOelVRP;1MeoOUTWfYqQ4eKqGa7{*zRSJW_(qMu2i# zR$l%EhEV-)-1fx*ky#m7j8a3Lsry8JJnrhMrQsj~%JChzJw5D$2(acX0AO)3q1Rhq zP>^v1F7lvxo*t}{UnZ8zHm1C^Ojgi13YwLOnWFh~CdsJ5>|hyKK^5)f=7of_r*6WV zHYg6ik;-2H=QQIEj9WNnPZ1yiyd?KBp!zULssSsohlQafx8{`C*q#1wz6t{jQc}4RFlLr7!hYZ;*WcOmUA? zPmku?R z#8D}-VIg$~#}Q;HW7cbD662eWmMQEB zXU@Z68wv&@fNWGA*aot<`(*Fe{AmKnTkq7PbQsbcuKNd44>hgFEu)>J03gy2c<&3# zYzhU+=)Vt!2q{+hrZ#llcj8u}!|B3)(J>j3EW4N0wm$M++!U5_eln@?Veju!bNhR2wx5@f!+Y%Q-lYrqyaPT@epOWD|0XQds*47UjFcC? zUfw%%a`HDVdpcpIrxQYCMz&;gSF`*wiJ}lA-!H4Hnd0IF*v^ogd}eJ8=RY5{q7E-$ z2Fr{K+7dyU{094Nd=C(=O3%1|W0lvPu2xXOTL5pPt|i#26zL!=gh}t-5RtzR5x@b$ zCRc+ar*fM+FE4ZIyL3yK0DXnVS_>gS4{@i3&$t2e7Zf<1B1$Gc4Eg^osZt5Pj(&3@ zv{5xLg*2fDF~0uIKgF=TC7bI@?yA3O!j|y~f$QncA@pkw*Cb3|+lzM#9E>Ow0bn{J zIqP$8l5zfvC~^9LJ~}>jGWg=5I(U>$RQ3;}EKfwBK9HRK0o%5?BugQJLV7*qpuU>l$@)=JxO7aN1p-k0v!U5`jkZZww$0 zEYz>R9M)Qjm?ChA`6zNvV~VT7zhh8c>$0q;IS6o(I=EkKG6{wKETFK`PST>9!*f!d zo*XkdW57d?yC_HTY0W=q3skIAT?x#-+Y!7QR>m~hDK%5)@+3eqS}M%A)LU@7wq3X(;V@}?MZM_!L~uI{ikBW* zoLi-~l|3~*9`_4@#4%(!F3P>K^p#_3agQ2M5tM$cl^m--mF*7HX8_U(v$1XK_F+p4 z4H4=h*clGiqlvXd8Ilpg7bZ1h_+D{Fv5t)_wV2snJ3{a6nZf}n9-`VLYS+C%B&Wq} zSkfOb5+~>%;Ox1a7(V?_o^r`AJOvU?TR1G%2veB9A2*Q;jH1iG#Urp%NqRcTl@r(T z%DHa7k=!wcf!=s{OFl2=@KVs}(LjQjL-1d<$-lAVVn`aoWNc)q{ks2by7rAUFAo7i z!sABv{`$3eymX5nsgq=gg>2;Qecs{s?r_eArJ~*C^!GdR@0qUOt0AnrXwGK^p{ykJ zMjJhQ%cDP42AdHQ(xo$MYO~2|f<7;~x)mfWA0WuqoOzv*Q9=k>R*Sd^K7MMw%^I8K zS4UM<3K8`8p;wUN`9Jmk-s*?ijQ0$#yx=qZQY=is_g_3-_g_37Z(Sy|MO)sN0Bvz7 zK#`wH3x02AkQl~)&pR7YWbNm~@UpCcZ+aH@FEklF@2YvoHy`2!uMxW|w7jN7JIoO8TqjcWtKGZ&b&ji4 z_-nlPM`3bHo4h?;L0$Ob=J&7-YCGCxlj3N&I2I1fkjxq|Ud~^Ym6Q4{?xg_JIk~M( zZJvh5W*z@+C{v9Wj&Q7TU66gRrKLyf?T;PHRq^9wKe3R7qRe<`Us}r}bew~F=yBD( zMkSEe&5wOdTjGT(CjeScw{N4E=CIk)Jm#(&li!mqLUlTssR4FwSs zc2*00SewI!1d|+uvCPxE1s5$Q9sXwhW64`H4CRueF`?V#vf8GmI*3H>LzDgU)Hh~G z)6zqC=+ufjM0!d~R|?ig;o*TuQq^@3#|FXi2~Gn~Y82*o2c=Fs4Wjz3ij9YYQ8W;! zg#unTD~QU(jpWI==O8g;<*&w%SEv6cN)H-953cW3tIMcSs6aQC2~i%~;^A$!d-GoZ z@_6gRs|@v@#yiyW^-Aj0aD=qnvQ~1@URp%vei_D@@RK$#Xm&L_J#UF4UYgt>3VtFJ zO5QfFB3NHQPFgL>u{&OzL@Hx!+8p*-lJd=ZgN2`!Qksi)1D~-lq$8)j%;1p&=DU!~ z+{5pU;VTEM?{HlL?ps^l!3U@&fy2ZEhWjXN_KUn7XCdG%Xmy1~2B+PG{2I+tTvchBuwaf?|uqM-!j;D|K-WUcF^tUe}P0aqeQsT`?;hc zxn>C3_U8MLXoa0!6pk3dgZ%JmXFtd8^vz|2FTpN#FkGuKn zzCn9%&+P_Xu2@~s}!FTZbL$;(O=BAt&Oo2;M9 z7+Ah^laT8Zx_*%&pE7#Vlc9o(xqu-^WUAl*0N!+W}i3Vr2WOhiu zU)nsiI4LVi8*88EGj&FSPs;y4cZzGubV(T3p4SMz@GpbN9ai@8KXfG+{8qpfFgo!) z0|-8rSeKR!FFUgzCr<;}`)nrxYat#wf?}WDBsY8f;|`-tlJqTM$pWpzEa_;-o;;Ve zmpzw}0;E?p8Gl)@j=;c z%d?E3WBTJLodcAwE@WDA^pXvmqB~4r4)=Z96!DngbNe~Jj*8U9E*+Qd5ihQ@qD7B;V~Tydo8or9WzQm%8|wWoL^7}26nCh9`>t4DLjJwlug#i%*f z$LQFFPxW+b8&{WJM*n|X+iMJk@31V?%5qmzg*+z1C&0n!`5cEUYhH9)_WR`sf#n0* zuP4pjv2*WD+KB$5D(+vRl5KXiI0>plX}v}?N2YhPbT=g8zL=Cft}p3z!$JYFUZb-K zrwjBN%x1-ds)|hUHAsLSNV!zb|jH$a6Z%6*2GrNz${C zt=T(y>%CIAoyFby{B(tcBJqB5FcyQPb(Q&gjnOhJf*i}Wp=O;ej|1Ka#|1X@by|%{ zpcwW@O3NH1XfxfukV9LnJ-X|#E@1h-wUS#&UXvc*=X?^}^g!Sd*-vWsu5C+bECp&a zahj;zB6O-0m+93lQl6l^9uaCjwR_%fa~;En^Lnck0<}09v8P%Mo<;%d2Y{o=-d=)F zRFzQW&2a+i4sAV(?t|q|tHn+QLuJq7E5W|4p&vnLjw_S+r+XCw?F$?smfX=*I%6;rOM+YUZS18 zOLGY`L=bec)y;2PiPCGJwCf9!mtk_pN1y^}lTewK^mFT$J0gb@?HcLCJ`J$pZFumn zt-+*1f$4zb0!qq-!(OU{{onnLDNl2B=emxC6vxfyk{6xtgHAr{m6xHvzhoN z6IG8aiT@}{h!A2QP$w-C>vB=Gtv}PcX6kTDHKt^f$>ODe@O+~6h|>Y(_Nn-9Lh$VB zNy9>!*jbMliJKI01}XHQ0Q3VZMK}?; zg~g#58XFpKi%UVXN_9@tZ*MJqPSlq3PC}7sX@B_ovSJ#ZVoCT?80y)#3(Yfbd^*1L zzMe^1)?pkIxHVkyI`Q~oXQp5Z2+K#H$-?>%;{Dm*EX%-nXy@Z$`WQhM8?_StJ_Z2k z;By&lGYL>J$vzoa;k_09H5|G869rIfcfI3AW|fCSC}?$+S6N@2UC8G5_K01%03Cx% zh!HFtC*wLCTa2%nDXW>hNo={o-=O37>0J%gu8KW|a55M@Zx4+2v)di)xsfsWgKTe1 zO;WQ`XA3jl0YLeO>xWp$G0{>B3|-o;_!5)Cd)b3c_gw41dAXzgLLGaT(VXxqT&RhX zUj;&MiMN)lCI7HVhda!)dZ5`U8J#Y)pZdZAqHM)WFJ}ir2O^6sRXbL5S={p>m>4M= zn+6lyO8SRvc76e~mrF~=t4zgo`EHl7%{=V)8B}gB6s9(2$L(UQ5XZ5DjRhhf&(PUL zJ{S>n*uP)*pT;RG5&cR3etjP5H)M?rmjCb}3o%lHJZ`Na6CoIK&l6trksMAWrUF%D z)Bc*8gL1)>2c2SY$c8Ohv*c;q$&(|evy9*4aewe6NDtYnQJp`OnrXz%37jIk)h96W zp}#j+JE2!07q1{@OvUeQ+Q=O<4lFXUpp;Rg+pR}ch15l7k==V!gU#;p(&7BcYG=C- zn^Gxe)eYXS6>W~>AL54hSGqz=%xV}s=2JgAOxY9`VAZ6zu(vL}13_jj3Lr&ChY$aH z?DH&*Tp3pKo`EAZ?)4?BuRUquPs~Yz&cOEmM>nCaGwK_zjE2g_1o}ma-M{J zt-q+SS*B0JDx5r6@VMF5bM7r)vGW={Bj#n|Z}y*Z@_s7RgSZ()6SSQ{tnM~vPavw7 zi+=FhqB+c=4dsggf()8Hv1GSC!~%5H$#e->PXz4Ln&q~$7u9qK|)v_KhRqF~Vg{qM~{_)eAq`|~fX z#T_dFS;t~rKe|A1w}mGE*_V_LMvY6F6P3$9((xm!+hLjCGz9~!DO+9IbfT5G!M*g~ z_P<^|8LOhvZtY?sr+pFGpx3E)PW9Vc%*1NPY+w0B_dd@6EQ>!I>a{QR<&vkQDn;4H zqi^(A7bkmX7lA;^exygo#AIiWjn(VM(XUtA0Ye<`TY^F?Tu#?FRZ}pD@lw>qY|8{B zQJ<`ZtWNLv%s6m?euzo;(sJJ~{`1i`ZI*`ewXimN+LmPk*@_7VCkiAJrWFRYDn57a z!x{h+^3jhzQ+jZj@SL!=*WXofUiYbS~B{iM*-ILy}`xhhfHXPdahhw#f#o1Y1*afu(#VSPY2-N z5^C|=*akXfBeKN`91BFSic6!%YanJvgP3l8)v0~i-5X=lP?<C|^*!=)acM&0?|ZMRVWe4BRw1-&)Gp;EeGORzw#LE*tXW--|D22I(xN zcpoG>v4|?trpb7yAd9c5^{pZ_U@3dlwc-}8ej_)F0^o=Dh^7e`=J)y#Q z$@+cZBSn4}Tgp($YQ`sNU!#E z{Xk<&VQy7514IC8;a@7Hoe?z!apWUaM-YKR4OO7RaIVI)4oy$8HL5(I=L5;JnY z-ZJr7fCEB7rzW}daaTwNV4*ZhRrJL!WQ4gEL@ zUduk?c7W^JR{3i3vp97evLSj=y_m?K1U#`hgr@re5WI&JXZ;V#wAm6>fyJ~r9vzC*jZ zI_9w7A=Fkyce;K$_u|UB)M{>pgce7Nh(M@>oI$rlr8HMiv7bEAFhfM$<;ACRcLrFM zB;;)6L5tV-j;jw-{6fe8UUcJmSAHP4dK`RH1F_Z=@~4hQTzZu$yy{|4OF8?~e_&V` z1+p4@I~u4+tqjF312KtE#t?NX7&ukT(8y^azs&^+g-bjsI4K)|rq z2n}1PZ+(ZkrZD2&7Z;7p^Hw;glHHxVra4Za7bP4UP%Nm@xSvO-Vsie&CKZmRI%Ng` zzj|d^;JJ%|uWwS}$HI5#rh~F*gmaWIOT;OE)e{=<`bx`jx!4tmNPqXVUWrcaTpFNc zldIL$m2y=5IvXzJN#Fbx;_gJe3@Q`wkip)S5GdPY>+PM@PKYa=sGv}G8rS1^z{5@!l}zAx~EF(t6V^lV#^S@Pl(_kKn=?QK;(zD>^RgN zKyf$HFTXm0NU0j;33c^#+;QsEBXQ`dbDgV5{pEtzcT7czG?=`?s8OX*K>SCY_Ez@p zP}y(3eHEH`wyh^Je$@PXvbK~!&_eH`N$p&*+(sDQTi2Qfvfau>s z#F`Ree)Zw6+ZR;>8HGngHo&II_QW5$rz3DpD}K99GZ+`}3$(?Kv|aniBFa zRy>C%IG-JyXlV+lqdHrIV_N7#HFt~Gz80sA_SO7sPtHMlHVR2T%Bk{wh!Zu*8!Re>8E-mKC}$bv4c7y9 z@e}F1@gn7+I_#u|D61f0*?mFf(?Ac01R+602km8e)yb;W0zxLD))93RG~g=>7c*NC z32g}Z3*)qiN>Iz6v9B+W_07JIqf39Z-*W_zL}n&9DDubdQLRXEF7M4!&2mTi=y=80_UtuxpbE=TpQG{U_We`Wwb`13FU(Dd5S_yqWQuCfzP~B_cz%hW6 z6_vAi66q2EO+#9mhuKjQ)&(~_aau`IsN#6(mVzcjLPNk1St`^1p?R}vy2S);OCA#% zlN%)BLG%28=2Gz{hZ zkaF94%<0`ei$#6B`Lxcz7MQr!=_)@KKS4T9f&X7egWw0qw5@-VjK41g$zhoryEv^h zSQ}9z(S_Qb5B|IiO)QMSIDN4D0>#86cr*H&91}G4=f0*e>SX$|@A$ZxNTcq`P}#`B z2Fjn51O=p80%hAGb+3yBg3pf8yqEZ(D$<6Cz}puKMGBLBNj`c1ChXFG2g)>sc~jZF zlg^)2)zwMgSo?Vv$LU+(-bBE%jaT!XtIk+c-<={sG6Ak0GpMw|CJ|zO+9|1lHN|h) z{L3a`X~9&cns~boSI?VBKuo6t4yz@R1XS2ywiYU}=aTH%1R1*!v3c~1gy4&~`TFk! zd#K>5D?GqS#0c8ppc;@4?3`O(E6x#tP1ti=~E&`0?;}$6oI~Mce0Mcu?D0Eb0545>G%h z>G$l1GwGcCo+fi8Hx78PB`5q-emn(`ZSQzZL;4<3mEnyJ$Bb(Sy~{zI3=*_K6e_^1 zVF-+Wub(hh4}+U|vKNGpBGh4pl7546=>UB%b?_D=4Iw-2xNvKjy)8=*!QyQ}eDtL0 zf1ha7|M^7!{b+kKB7RF&K)hSK&4hP#J8H{n$aMR9-TS_TqJ@9+fJ8=&@k+1SU_tnF z-zQ^SL%_{P_KSrL0%Y;vq&~*NB#9QlOj8mfu|{gEGt_gi7@eElhrOK@oE;V`nQV|@ zFd*HO*#g+5vAg<(+Ol-Y2A~|HB^$CHOfqH!@a!BB%^Y(81VHf@Kph`^$DAW+qJ#(q0Mzs=;CgZ_SVHdV z3hTapaD9>Tk42&WPYK!32C265F;rz*Y#~$1#La%$qKFhQ)&yAjGfq{ZDR2MyhfOx@ zDVJ5V?`zX^gp_4Qvb(N^h(0CV zg6_(x*{TS67kQoh0})Xhh!KEnmEyqQq5tOndi;?-&j0=O?^W)XEIhf~-Eq_5i5>31 z%2UrYmlZ0d(N6P*jJU$6w7Ol;X^wh+xm0*rwIcXsuiQOAM$kKo@?*W`NU~x@Xcm9w zML4Dc<6{LV7AC-;OegEiaG!t~Q7W=Afvzn!^eK7#b5){3^l#l$bf}MEDDjwd7#>55 zLSHCkOti>lW0lA(?>`Z*f3r55#l2Uxz`G?dh2zdjTA_z475d(Pq!ag1Vb-SswcA#y zJCc5HaX_wFqa~hx)H5~vkE7%Mx-~3K%6_g3|7EdIb$fJcn9r?JD*Udq zD3aem-D}UwBfrc*?cvV&Mzc@>?d84-M}dxq!XX$9_cDjbYG86Y#Elz(mlg>^zhYn* z{sIUCbviaC&d=Bwn6z-hp>uUHKw)z`L6JA(M)^r3{Ifyq-S645?#Z+T3yyd>I?bG#?Tps$VNW3`3=+3e7rB*AR4{5;zg|zJ4YBbs zj9_2g54L*jf&NzPGzKG&(G%lb4Jo^V(i)7An4JW*Vs=g9zmoel$1f`{_xq;9!n@HI zo9a?F?dCSB^9@A%Od}td>oqF8Jh(ott){t|C`^kkp`Gaj2^#!vYjlILYVTLD`|AYR zW_XNpXVz+_OpS5Et^DXc^~7@ObWaVXdby9G9tc``blhw!lIgqm1cxgnT-~T75=V05 zX}Hp`c3c{)sEJQ`cQ}50J}*8KHGCTN;`5~6KW*=Q#Wjmwx5z5b5ZF4t*)-vR7bJ$p z)#0RsBhov5n&k>|*He}_)hWuv%tVb`Y_xVyVm9=!ev>5Z968IL#ygTo|U54SKV4rp0w~?Q+ zl)0JpME_Sj!7Id0!2W`}02zZ3H+1(ME8L>fs3dUTXafv~C>w5I{<-h7DrZIqvz38` z3Y_1-9pzJMv<|i!mFFg0^iaLO9PoQd1Dzm-piww~h|=-GO8cU6~wwNS~{nPWDVl z->)LL_Tk@$WzTQHMP*!1kK7Jm@0%XC+D*Xx4H4;Xj=f=--F5%IuQuHXv)Cj5w+ ztr|yqYBnyO89QjU7KBnTPy!ceMqut_tOh?&URp!R!L=Jn^_xW$(~bGpnqZdFqem?hX5I6%fK&nHDJ(ecku;-R$yBjRv?gBMM zxmNz>d9KkO;S^i`4@(VW#R75j>h?X$_1-X7@%TRPA#=_fHkTq(1U+(o=LPbo8tIXc z$Vg3sE7NZUi3evqSK(RHp62H0|?KOLqG=exu(e`Bnt-PW3m!(m(HJK;K9u6du9wDJOMEF7c@ z6bIrS*kgICbI`l~!*P$htHTd6^RM}TGOY101-FRY$G*--VPn~!SO8SUWT}jk+=LVhHL%avZSOp^OA3` zkiXGG>t;XexdtCbj}pv(o^?DVCfh8V#qK=fGT@?~G4OhXR*`Gx`%fkC_eHHBM=#OU z^Lr+p+&@=g<@%BPyZF2J9xeR zS^s*5A3G+WLF(Af>0r~18J<3fhfNwf?ERT477b0Z9D$0#6sIvRD}10@Nsz+uh%t>PlQaT12>q+-=6vqutRXD52dw!cG_gz z^?iHlzJb~0J^Rq5U^#(bH`HLA*m5>F$yUbUB~bL>(kmXsf~N2kh5Fzj+?Bd7<#Jo$ z8;a8UFzoU>obAM>&EVltk!}Q>luzIKxcP1-I^f}oggHv2Kg^+PcBRqv1;)^STH8wp zU&QvPwQ!#62KUTt9c@w?g1V1H2{tCP8kZ;}?Y~uJ5jxxma`54d%HkTM9FVbL6!b60 z-Qvq_C(!5)M?sy(jaR$WKL|`fAY4^k^tkH2(fL?!;$O>3&azjeC88fm9y8H)7;^&v zjm3o&ln<62k(Kjk7sHSQMvsbPYPh64fcOX87jaKY34K!wGnQ;oXaF__7yCG4`0L<7 zZs&WRP$N^rjSX2D@5CVFL!Y)QVur<+Kt;N>heeVmyD|9y`1r!tgh7w%lP}6kV|ep$ zxrsWMZ68#F7u>#<#zpBY+eYf~splCIy4<+Pcr*7YXZ1X3eaNqhBsyA>yBZX{NLRl$ zipTMSiLGWgT|PkX5IXo9iBxX>({S;(s)2Jx-NVNYJEkWah+iK827i)Wvdk zP4*6J;QTgrgS=bbE2_Dyv%r&X-7=Dzy<6WzBfAD^al>TqO1{}*Zw%;YYxib!NI|$RW;Ip;TisS+xi|!pQ8+ZC} zk|$S_vrQM*$^1JRnN2P2h<1ZVeDHKC~;dt|Sln7PJRv%7YW{bss8tLZ7-fdK%vt0wLH(mN+oe(J2v4J?8zA&g5gpm||dD78%MqIT^kU0ODAK5QM8NBAhl%E2oVr z_uU)AgJ=~&jU7(YA1P<+Keam5y=qvCrdULkFFA9xv`deDr4nnqhkS-3cIXa1DyK&} z_GEb!%Znv%s1#*s;mD;yk;Wrx>fi3Onvy1{`50HJr!-2)8#5=v4sLowXYJzN(SDIJ z?3gw#4ILAFjes8ir`r4b;fA zKabV1?-#>t-<+#UGAC&2x0Rh!gURfWfE%;n;4AzCD#Xexda`*wEv$E`3vaH#{+mXE`xlLb9NK}DJD?4&$HvQ_`GPuO zs_`YLds#DSggP+?_9{f3<7lV;UF_NBd2m4QqQm~LNh*c0SNw~{bLe*36*D&HXY2Re z>##oE9lv4QA+15AfYc`uRWg_|9>=neLfdQmem})wnTHc8NJc zc*6tybw@#Kt$`^+G23EKVBE>>g0ZM9KKtTE=dS6U8!uy*%e~3o?yqS`lY`UU<11XJ zvumoKW}W(H12S8l)#vB-R zTCY@Eg^16%FH#NPlBPL}T}g2*^|A6;W36~EmTTz3J1Gd<-0V~VxMy8E zR{R7aHuGB(HGIt)=KA*DQnJGZ<@#onk!w{w0aP!oC2~)bFk`%E1hZ?ifpYy}Y@laH z_9GsRDV(uV>@s^DH{-dhgLacoy!5xX)f|*a4#YZ<@B^NFV@B|Jb|f1Nz#@VxcgM-F-9r?)Ro&|*iYiMO7f5SGc&8Hi=#Yc zZ)WSaDYTusL*Mi8nBQEPe{sKs7Ww3XkIB1^ z+Dw7(o!mGo0zu680|{xE_&IJqEoN_G3Uylr#og@`{O|WzhznK4rL^~QgrG$o%-24y zGQ($k`%U;Dcv0yJ?dH!Wr{p!yYcOMO2Z>mCuewyg zo>&;Xia77eQlcF977F0#ne`bNc-4HbmsrdNYJu7AZkEnRztV)}p z&Ddv`4mpfmLpCuGC*K6?;|42HV?uneKU#Q}VL)o_x@f{E=6q5nhITSe8;McsmhI|PD-5F|JR4ek)!o!}7MgS!WJ2=49> z+&#FvySuw}rSJdUzW1RSXYgH~DV}}9*h#nY&Mbd3^5r*3D zL`5nlDfKrD6Inl?%im$|815mOb<5!b-c1A1+F^1~K&&tvKg52dp!C7CJ~PSbS|c1% zQmO!rs3%D9;UXtj+;G4&ecLWIgPjXw?XEza$A#)RYXpU9;mYuyxy z5O#E6f+{?hPL-nLQ|fL@rK^G&m%m~-Q!F!S9{A>-+u{8#4x24M!$yy+LX5PBq3U}- zO}@lYg(W!x3gF|G9WeZ{$5P~NS~20)3Z1dVO*WCw5~9s~>J$+mVn~Zf z*&RC&A4NrMO)S>ssJlvIGOxT%4>u2GE?%%W({;Yqj_R`B&o#AE+_Hw|zD=C4T+fAz zHXj1K)8D(Ip8uFr_r=CpqQvSJm{a5Z|9tPRKHY}sZDD@EGJy7ElphYcsEjLuPrqR?kgN^di1h{c?R>4WT z>rpL&CbfFDa70>@jUPTD)KE_daBVkJv=JyRSiRlI?H9LCmUJ4mFZ~WMVANguS-moW zzNJc@PdgR`%$RQM^Y*$3R12oq-!!*VDyh%DJ{h6eVj9Bar|A+LeT!3G`@Splbpz-y z5+JXi_CPc?k^Dsid~stb&Ju1r&~6^<5WtMWI*vsdg2= zETZ6kwF{*^39KxrzQE=Z@>g@M;Jq_Z(@%Bk8dK3HOi<;p7 z)xs6~HwEB#2@juLxk%b7_>Y$n)c5H5QR^5AmN=qRi0ZG*3eW~cvnHgiawT;+ck=^) z$>D;^ik4_f*sA?KH3_e;pHLGJ$L0EeCE??#ve+alQm|c4{G3VnZAwE=vry|XLw|Hz zrp3edun;?Ig$g#Dy}G>)(|177+3)rK|96%EzT>wohc~Ju5|o-Hd~$A2K2%hp5;B0* zaW6J#O^XyE2IIX+qPuH9Z#*9aS45E#FjgaNSg$JUrI`-^LCx_Bd|Gi{^n{rbZloP0 zP~&0OTR3AGu@LKxzUQNC0j}4Rh!R*(g|=rqP)d5O&&syqt1HjBs=pYqls=m43#Kdl zmSujT3PU%WUnB&1k&=@7>SM>ZSyDurbQmH>zxmX@9 zQL0^MB*>RR64-^PZ4-AB8iL{LC|cnBv$fexRg=r@jBslJ;_{96!<7tK z7~EL50n)wvgKmjEPz?655b&WnRg=kUUqS(+IscbbQ#GHjw{Sca;V=PkDW+djG|vji z!WKB_eG8yxg7F(T>t!PykD(?6O(0+uR&%}(087HKL|eiI`4`Ff9a*+*NvBqnU=MrN z|Ca>vy1ddv*bUzuF8lUv+tv5jUB=RWG}aMR+9*hX6s`v_!Sf|!C@;rG$qiA}j_;J> zXuBEjp$51?zElrGbydEXuLsHi=j4;9vDVJT;pgi6IE`#?nkm=^4cOB`XaKM|uKb4M zt5gc%kuUcd&6=<8$ee>Avn-8-d-xq**a|~+$>Hjl8G(T)<@s!$~CJ! zy);zsX04F^E1>8=VTg2j56WTX8gHv8sVHHQ6=2PH*ZEhnLI8=`EiH%?b{3Xl*SO5& z(iP!;{se^(B=Yiw`y`!84~^n#wQolKpWjdVlWGu7?FxSh{#$)vcxKC-e#c$K`bkX1 z7%P5AAfY7ZB0?z!C0vaN2WdN*SI{pU#<2IXb?MbLhKl8gy=875E#ErTgRuW+sX4gVT#eKa z9gPV{8ZlTONOS&B*1lo&8mL5lvD9K&;{Dg$R~jH~kWeq{;-^9%@E(|+$<(bB4B5iu z`DiVp&y{i_rRMMffg~3a0lqko-TRM7@YHuH@~jQ=cGU8tNLT;qg)Z5gvt}n z8uQ2Ln$kpE^p!rZRidv@w~{>KH1uMq;t^g-pC|4w-C1TD6rJdD{|Xz;glkYMM`0+! zg3=1}hl_@}d!rd4L{4hFM+Gx04dQdMuq}!59`!gdJ_q*udcsuUjc$1t_xR-K2rP}Q z>XD8oHU8oP;xUgbitndv4Www$?0}B4#rb6PmOxpvoWHolA&m)n)4FAN5fB*B8Mtv; zPu5Mx6I+<*wPp24!Q%$V(`7zhZz?7ONF_b|Dl1W*M{b#w5K7KEO?=i)y;FkDuRcH=ocq$32cYj0zd2l+c5i!z1pPoWgFfff`h zN>zJ{?61N=VzXMak$@L-)lqS2aSNM=Q%|Tqndi}}a41+IHPeakl7b3tZdkzYg#v|u zcVc)r1UP<-dmngft=48Fs*w-@3oF7NVVp~m#?eN>$lIjH)A|&N&ter&)L5(Zxyg28 zzERPu-cj~{H6y^X9E5HrIj|kKQ}vi3`u`0TAwwA|Ynw673`{rqK>+V0N!=(XV6|v)s3p~i2SfryQyM(@0UO!NBsfl= z8pT-Is;+FW(yzn!?y58GEjwl&$$kdswzOSRW+_nu{g{XVD!-h4>MrUB*71SK7?pNn zv(l(p`@Wrw&2%tNTT5gjamA^+B zzzpx|+U#h~>TW-InUeJy{W2s=H5wd>#*;FUds+*LiTiIT?R?R=6QaLXqi}${Jz{a# z`V+}D5?Ce)C19%DeHHUp8B00wW8et12|PDAm$4TL zk!W3eiI+eYog!LK%pdJ&Kf3{UsXF5p7n+Rhh6NQz(W#K?(QAHmOadCNtApdRklE7b56 zB+~WCC5M@&5}%+*5b9o%iw!t#gwKx-j>i+)W()Vpx2_^PfMLJ2ZPGqqckC=Z;8{Mt{un?T zEt|vI5-1i2Wc~(+`;_znB2}m;Q)b(jaaX^kKsvPfuSTZp4+lwVpcc}Yuay=Gm5KU_ zy3(LEt|TH%p^t>oEGiFJvM_ue`~cO&nRqna?u}qe(7J=6@G%`*Ha56tWh_`YCecDEI9- zlb*G4;`9@Hv9SJu_c0(=1`hDA!NVI900n`KiS>F&D3b3q*f$OC8)IjaZ7K>3H4OA}DA$(h7BmCZM=J^~8 z^5}tVUeigTPG{e*n2D1`@}@o{r=k5fEw)`9NTX~9;9DcJ>QQT=@DtC7%3wWb??sa# zYgyuDMa_y$5p|Z%8%Uyjv0*!@)LlT`Js73y$ke zyC(^ldp6DGAZ(==h)crhD~mkX+SoPSneWL{y^C0FQ?2*-IO1kkoK|{4-j1dT!SD4p z)ABw~;bK-nijzYabpSXA!%3dv>_Txf+2r=n!Q{Gw4j<2IM7XE`WXmyRMsEj~oVJFG{k zHeCxP8j_L`-~G_iE5gJ%uXb4C!{Bh()s}KQ>A|wLC!FQpC*5&A`A3JoSBY7cLN%ey z%i4qq)HfNqp8@fIjiUGO zWL#NmY}1e?46U`p|LbL4!bA`50Q~xg5JAZ@#(Z2Z)Oy#K5d>e^tf?PtwAkA(rXwfK zj`O9jmT`ZvZ?0c#;|vv#n;;__AaWqAOY#^JUQr&O^DWI;@^G)d)lzUWBByYgPeem4 z<$OM!l|e@=akxRj!+wWQV!%WN^zS(Ajh2*vM+6%csM>9AF#E=x2ZwtuWIrYG12K-;sgsX-Ga_Q=mMnh;$g z;ewTzLKg}{bfW6|HBshK0TJql9Yid(nokxe--@-(1{wG6c$M*~bNIic%D601F z$mIrx3feL^NK2PgO?Zm^h}I(5t6|8H`Q1&ze5j|hJyf61U6AB?P}m+E#Cqb~Vc3^& z<@x7-G(1CnzVL^HNyIx+^wxQ5F{AbP<{|iaPh0%lhN_nQDIw~GopELSWsyG3wc(Sr zbM^uOsrSZ=xpyO_^GmSY^>pW-QlpAPCpY;h%x4T< z*jY^)>fBaCbQ4cKEE&dv5GFf<*3A%Cj0%My{Q&ag02R}eN|p-f)lFQY{$6r&rFHON z74C@dP*3id^Gxm?K2)*QON;$wMMbg~LnB?7)M4gDi98fIqDX(NtBqU8Xw|&4S9f!B zHe+;FnTj1!wWXBM$#X0wZ?0m&r2_T|!25D;GaQG@^Hb6z!;Hk1bqv`pns}af-_2z)f<#}+c0IlN8Se#@>XQ}OC ztr&+OU;>$gGZRBe#jCUa&-ff0K9{NE`OTHJdat*+i2(u5)A>sJXS!XV)*Pf@A*OZQ z54g#sWW6=FBL59fU2f=I9#V5Ku=g(B9FWJiLHI>Y2OXhxvWWAb8)(_t=_glZD~vxD zl4Y172 zyJYH3&C~B5ADf84BhmQN^27s!NNScR#MaHGJE^J9`vAp>m^EMTfP z=Kn>tJ-A6aBkhNN$Lsx#Ak|}707WOidSbkFAEzeE`@PmOjFKLU+B^SB<*ww196S+T z#r3=S@DGRirrvSS_l~KFM?6-k;<4^|A%@o3OBRg2ox1P3s)|+Hy>05tW1XavTNlG~ zE(%GgBr~|bz|9eOcqE_b)P*~qaKcOV+4Y>$P3M|_>=IgT3PqUuEFU%;veWZs|Ey3rS~lfDr*voZ>!9S_nudp?7n+2m^y|qtF2Cpf`W^h~9Z~>-IKqfB|ow2j*t)0t| zieHE4U{@_f`xjip$>$r1iCNQq2n_jjmG4;_h%>l;vn_W;FTKvJUW#5?fx1L)m;3TC zLvHi^x6)4;7G@P|+i8J76GSMinLyI|%)yVzb#e1CTJ5`(qdhr~2Rjp}{}N#M%SMtQ z`da(Xm;Qp^P{B?G9|cluO`(-#*?wX+5nJT)rJ`5-^$iN!88`YTmp7A+I<_w&A>x6(dIcA_Wm7KVqxH zsrHhZ9S*%+P_Q)SI}4AgI!A(2WgORxpsuwNcxDKtZ?ZJsK0$Vj{}KoEj;EJY5WV9P z@_DhK0H@o#A$N(bZd*iW+pg(Vx8E@7ZZyO>JE`dbDVi_L6~2cj&Dqutx<&8$$UY=- zBvAt${CI6gb$fi@L_ghk9zITeNe`LnU7ZqjwcLOBb+@$5*8Te#PIS}F>WF)XXy8&k zYrcQg_%{^Gp7!{Stis&0LmA}4OCq~Z#@q7^WKLisQ@Lox>&o?mZKnD4&$K~vu1M4u zA?pqJMrbud(g*-zR_v8 zOfBmdn&UJo)Q2IbC8Oi#704L7w}G}&U#@~NmDW34@B2UY53=88k4egTDSN*x-4f2m zE4Hp^;tz1LO@DTSOCjG?lzRz>xia9aR?`bt(nyTJfX)L%G zWppi?FH*cR7W%?K8pD2JWkbWyzU3pe%(@uRE1uUGJ}QAKR_s_y%&fGPw}-=I>!wPn zo1ra8KP(>~E^^=k-&P-Y$!H#SFe>{OW^#V3f|D=Xi*1sQMj`5D<=Y7e#nhThZoCaR z^NSjhM6@J{ns!J?5fQ9urM-rq1;mYH{GkDOqKO-+@CFfA^)H-pLX<+jA`)0eCkKWj6P=`ad8YN*^k*`(m=OHRZ$ld7PNh0BbQOd2F1Ae9lQ!wl@Vf8T zc%OR`aN2C;*dw6sFs*>B^{TSQ7f5 z-dBGI?AKPSDPAgwX;iM+;R8@F7i@vFOkV#9%Jds^R(806Z!9}I4281jAGyWH_Aoj) zEWePLGn|$+hcbl;|Fd`FJ9HU}jB$D2Q<*4Be#gIR;P+23r zBFIj+m0-jZhPZr_n|X+|stBf-Ws=I#F}@2mj4Z5{`T-jl@-D6dp5otr#FLg`W9&9* zqGBo1{C$pjPTd@=f3z1~MMaPe;l(T7<10I3<1D`1zJD=4<(?ybL;jrLua9$W4)3-g z``SkUDDLpBj_=6?h^W?78FUK3p&dkA-6ivS3&KiVHl3uP4JCi+5NmR2bv~XZSo@=Y zTKhdq?LDU2?n`Ud_|Qb}SaS~Dr(Bkn+(2U}t3a|EeD!Yrq_&go50`&X)1lTD%3hrhBSk89eyo=V}2MMj0~5yH9Om1YY#>J5Ua8)3j1mP zgLBJS?tXV5<(1s&OuH#&FlH&%^XMsr8h_RV2o_?nHmk$zv({v@IQJU(AvQqd z6iuoy<#v=Fv5Ng}G4QoAX^)z~n78AB1Fd1k zk|nP~M(>R=aEi6#d1+t`Wm4sLuIAdMM#uYwr$~sGXTNp}W=%%@;q^uyMgb#<{zpS$ zl1REb&o`|wpVH5_>NP8J^~6Z-wFim0L^O_TO|4!=MJ&z+W-!>&)GWa}mFbO!Imad`yDQROtVs+CG1r z8pimp2aaqbL*NgPkn_~fM7RQ^aaPhC?}Q*BD*^WOf!LF<+NM368zC(IQl~qYl$@%PWup$mYE+`kCz;=nt z@N@eb-LsC9h`#mW>h7bT8B)@_4u*$~!J-SJqUJz%FY@-z`GY_eE@=1RC(owVugpWhBNxEC%1#3e?Z+R#N7b9sTkKy)&Ym z828YkxaMl(Kjx>Xh}a_~`4x@tX?e!$w6(k#iNBr}4aHeKB6@)H@dUP!6IbxCF6Zbj zPM?q*kOrmVh|>FTHBvhezBqPtJa^qEVX-Fm!GIzXl%jY9ycPf zju{7zJiS;zYt3|eI9t;R7|o;JI_B^8X%7_2>2(D?=}inS0@usw&-XdOF!pugT*h}1 zuHQ#rTBocEl=Xp3)<37bE79M8rKzPp;dECr)`Dbc7g;~LOT5E(v-zGYaqe0#&)ra? z+~u>^xdlx}-fOoHwnI#xPYps*v_KjRvpmf;c3R)NEYSgq5^|yQw%N zETlii9M{RKe!Qx* z#@~`M=fhJNnFu0WF&TWcztf`sejFAeTp#qd;|%2QpBy6 z1VLs8c?TihMrc(!!K~E!w&r{n5T0Ja>d}_x6YlXvL87MqT<9GnW}PKYk(D=kCrb2L z0kSHzd3EgqKQ`%ZwdL+o5X>G&eT9t|XbBfo+CvERxn&j%nk~3(Qzj-o1ia`m5?64v z+Id=rjmJ3eA{UMk)9uB_-bU%{9m{W5IzM(U6TV=Tsyzp;VLFsSguI+I z+YQRfrfh1QTeqTXUOjdq=#67x)mW%lEV2>O$vAMnyVmlq3|d_rby_I2ZBP5^SC#T) zgf4wga8vCi_rJ(9{}+cTC{sfsb{oru_7zUHD!Fa?a5erXoq(^Dc_5!-#8-o#YO*VP zZrlpJle>cYw^ zFYEs2cby6%d{7;EcIj9hqa7TY@1NUHDl~} zcyXJJnyy>c6XhkdfgK~Ugg>{Uf84CJCrTVHwi=CFp?ZSjgjAUwP?U<(P)hqY>bqA-?1JBoZJKGq;5o?Cx7?Hr*b`;d>3d30RESC3AnBiZE;3c*fd> zp4xdd_#|vu`LvE}dMfAo2Ou(JenS9bg=10$x0e%v>*kx*oe9Qh zg!e;`0K(Xh*Tt#^uQ|H1Z%_&hr4Zn5dUW$}Idsq)zG!^k{C1`(6xx^gnN$S0OM2^} zV5KQ&Tuy{-fom?s`Bk@#;~XQGT1z|LtpMF2>2^Oig3q~|_0s%eVZP2+q-hkiXBrud zNP2FUPSE~IX{id^q@HDx24(HGX(1;gWw%*JIuP<6D1=OfOMKG0Z~#yNO9%sC9v}p; zq5jrh6aBw5X#f3s8KP?Cu|0U1cUbAINZhMmpKngRZU2;NK0stAc=ytFByU!x3}2|r z=ir#z?`-|alG|;s253eyHEYE#!^@P)*J8hExYxl3cP7Zo<=KXFw$euGffX(0d~z9V zo*)~PL8o1pS6X^WS7=2Sv6j$&VD~uHcUQ~0I-V;?|0B`X1~b1=+a3XZ?qUcD z;BBqlEkuC(=jCaUiS$@-9C5-cHh@yt5eG#WHHU02_FIX3zUWI8q zhV^)Rq~9g&^{_a0HCp!ZgF$38QY7kE`9kE1y<{}CO)GN&S!dhr*H)tBk=`uw+8c?w zx$Pdi8K&|T_R5L9OF#Ff;#6m$Z?DO09={}3rzm*S$9S=iQ0J@cH-3}vetlnDOhXmZ zu`BlZ4fLguFg%|=RdF9kYR~*t%=ud7@~gZOyDY0UoNQ9=cTTk7!vnzYYtc7amidKh z*TDmQgjreI;TwyCKCRz?elPNGtiM751wS7D2`Is5>EJ*tKZo^a?vF#u42k0J75swj z=_UW^OuG`TGxTr&xWUK7;=afGt!aFHCA+_*BXb5}53r-IL#;#o`9Orc&H(`5p}s&; z!X?73Ybbku07~*iz5nxf|2**lI`qcg9-HN2O;uG@{3+6-OVM8!OTLMnW80JsuJRl5 z-|$Sd>)ASb-)bohPW3YaC1J-b%$v`KO~}Zs7A^0}h(|CL@2`%mro6hQ8waVAv9R(= zr~kGuv;FN8uC*_C_qfDhU;?crzV?f9OiY>*ebDUapw@Ct8}o*vY_l12j8s@n@|#bg z*a+>*k{8i=+)^QWZPy ze7%T(=XHLopo?D-64!67w=0&=?1|-XB$>x2e)C0KZr6+mjVYHSbn-ucd|u0zR^@3( zfdbT7PnhpZ*V&fUPx)#5vs->-Qkn0cZ|HZ=JRJ67e-bwB+Zqyhhm-vI56joH{fsZd z!V1y?Nu*auG;OF+U+C$eY+6IEm#_RK{z;(wv4kQ%~&<_>PN7Cl?_c+r0`B^{?d zfy0&Ga*x+Wl*)o^3EnQ9uabLu`uTH*HLtB&1|~>Rj_(l@L|&w^Gz_%eg#v}2^XuYK5&Rz>xFG>C zJZUwZU>Q*2a3R6>lF0@uiEcEdNvXC2!Rxs8P`U10^}!t!LcAu-KV&njCj`jDEK!+= zo$A$>VRUQkMc+NDJ^NZCM&C8auL9l)5-N`~cg42Vjt}rIMM&H8i-mxSr-K_3N$I zV7=3M+~_pwO7;69@KO8b=I+inx=Gdc($+wA4Uo3IJU<`QWBet!(Hs7;x~fW0Q1Hi( zAAe!)-Cv&{t|0hTRh?#vmC4D;A$|BMF;5q&DY&`ufqnZ&P(B^t|Mu;hj0_zuEecR) zu@Dm-`qmg?4avAllKvLFGamI_5_hme2E<5+k^) zs|%HYd%h-OVQDEe5*1i!asXE$Ehi@@K7JNlIfBLkuy$$~D;PjLZVyc_K&j$aQGC7|empXG_6O`?qfJdBEkE zoKz4M6$KJG9g5S^K81_uGvH=rWq}Il7+wbWyzcW$OI*y%m;gRL{_w~Mo7FPA`Ro@r zx2LeML5(JzX2+A$mF7S+Lhw7QcCLfS^mw|ZqoYGbMXhZkOkq9uB7vkvtl+eqZaSa4 zjtJq$fYG!5@oO>_D?Z%qweqKthO#gj6UBArROcV^T*8<7rW3cWqD7b z<7ip$Fd_Guy0#-PHiPzBF!qg#=F~NaS#FlLWl7~Q7wyLle;f7X$3;tXYm)R+PeJ^0 zFTYo_af-Z=gq;KmeKXyLjD(f){Z&}Cf3AI>>|W07$e}~n8F_b~&0}q^(|G>X`0DV% zM-83u{e@=Zv*LSC?#p|KEaH+qjmVj{r;{WBSMBS)#Ngfd6Nf?$H9@thQT3>YFXTnR z^~Ae2FaUFMs?v<%Q!?)R1s~!b;QF#Y*z_-MthR&`FROcWT+jdjyz>mFBIgGL0gex* zkkHVkWO1Ei%$bZPEs0@YvWl8m-}6)cICNmr@7P1zpo%LJ$e3ci|4VM!KcW?96!YGlhuW zK>+6tuEVX2y}u+dh~U?@-7o31o+n9NbX+kcp^96Q8y7kdzU^7k%-55DZfEQh{vU*N z4a!T1{yG38A+({SN*C=A_|ZDBI?y^$J1_tMQoX7DKP2V(gG4l9x!tRspUC-QQ&?O) z81zvILzdC$$IFe?-vUwTb=syUCX%>aY&|{O*fAB3JA9z>^YcMZ!ph33-R|k3rkMue1v$C#a;@eZ(MS`R8($3m*MsLu=jX#1i=q#GZImMqr7)-YdF8Z*a9!mZoS5PaVwL? zX$f9^XE1JHU;r2gul1fnW;l(Djf<<DI5}{`z3b?dFihY?_jUB@ZbXI-x z?_Y!N02Cw~y5ZqrZm(C+PdwL}&zaFKeAZ8hj#g{;d;y=cr$kIkORH)KT*0xiit-*6 zeUzYjQgU*cBqsl{J;fp=W#yUt5n6x2g$5gaK|$YbJa6?irrO!?urN$CcW~(=K{xgK z-01vNs#fiKciL(&fPotLEl;69HshZgb8~acl8f1VzdM??1UmEZk8Ftkf-$7l%MHW? z4i2Cga8Xea7{v_}5pC@L(vs4_-zQi;7Aj4ULp%;u zPJ0Fqr4YapG_}&Con*jm_%ZM8`N(M&lWD+|GyqLWau70zXW(MrmWwt${OpeIP6(tW zBwcwxS5yUH1buHT($P<={>s6xI602(YY#Clq@FldG}a^d?zhclwnBBgQh6Y(3VgyC zTvWj%6)_qbour&Z_}Du~0MR>Y42QW!)yyE0SSE!wC}EW%Vpn7X{yR;g0!yq&I=;$dJtu|Y?dc&{o?$D5s zntyHxKHZ)`==MhuPfSdJMsD65qRsAT#HaS&Pn49Di76@V&KF%hJw2-hVPWnUoBnxuhCG2e3PqQfag`tZ{rxwIWW;|~|7~%(O6T>U4u+rw^-EYJU2j4_ zdptco-Q?E*;1LimE-sR|oUFpBS*jG@LlavGU4odmyPE?)qOFbBUr>)J2@??paONfm z?2fio$vaNG&jgW)PMdpUPL95(rza1}YJ&~vXQa|NYY*sI0U!*Qar1I#h=YT}8uVdr zuP-24JEG=rzdv7JUkA<6(8vgM!dj`1k9!}u1l~!JD^_We(e@7vsHv+5{Q5QBG{4@= zqHAecmXl+*-H&b)V45YDm@v04ePGV!a=9H86m)%UKM)Uk3KW^l+(27f+lYvWkE|B& zDtNNwoDU|+SW|d@uJaFk2fcru>8^hzOX)WJm>!~Siid1VoTvu$rddk-*y2z;@_|9&%bTSVf+yy!M zVW8c!;Q5IPE8`sXjtslxQ1P92vRcg48Y#Jhuh4(90K4+n;jTf@yC=w4gBWYMtPk&~@dk+~ zM~Cv+zAOB-SZe&}r&OEjVYf26WUggB@*$Hx+u_(GpIZ2j8$^8~(YBHy=|?d^6*W}v z5u>?iD$Pmam~!6rNSLX>!Thxb2iuvV7o-kKVegYgghuM#3vEm^OQg zO8m*Bk00>ViGRDJYCzi-huE+0@kNTuV8=*(AOk~X2c5Y|#qawQH};cH#UGu<%&&~a zFO3lC`Pec56bD0U1VG5t^D<4A{&VPIQqS^wqEuFl{G5u{(D8ru>_?nzF%3Cve(0#_ z|I^eD$FuzM^LIf+45Clck3AbdzyQ2fZ7d6h{FIaw_RR3kLT;Nj5PhQ&@){T#qTn&_ zcljgP*w`Q-Ab31pCo$>=GN23K9|*xp%qI4nzq5QU)-|UZ^(KP*Z~dBKv4) zXs#|VLn0#5SuGd;2ET_OA|WC9moEGj$Y5k-jG)X3s;Hn} z;4&G{+!;!+UTIP$#h4Kr2p;Df7$0XKBNLXCG+(GP6c-nlBCl31c^ti#O=2RWq}&G) zKMV{^g-&~U<=pi2HJCu%DDinb)}*Je4#ZMeT3QxOS6qDCD+%(`Y_>m8AQ83p199|P zn}>$S5C|M-zkD&nT&gjRz2`4cDJw3a0&g4!#y>19479q;%*>$#`mtmdNeqg`?>A1R zUT?3G33OU5&NlM$@;9cxrNnNCEx^y({2L4+CF!H!#H=hmTieU&B1Q0GF^P#Ly1H_* z1AqStnVTPhcRHt2CETm6t$n(=X}dd|1UgO?of;5&p}@nx3l?&qWgWnqZ*rhvHm#hI z5EeEZP37o|B&?`xeXW;z;k4h60$rJcf`W&>iAf=-?R9@qtX!gA3+izVVMp(qhcou7p?zWhOy0Wq*(;$o--(+VobPzx`a{ru+#lW-jS95+@ z9Xf>gfc4~$cKCf|w;}^Q8v^2zobu~hLkEYVvA@`Rl|I?|3%5t_?A57!z0>Y?0=0W! zRpHA>Nw`v?SSDu+IV1kWrla}|OZ21jL%HPApgtqY4U1Wf)%JCz?-!!e&W-_DGf{rB5lh>_}U+wYi_#$@sF+!>x zTg2O6IiJ^~p**gW!mYDvUJm!#>Kz@-29Lmsu0!;rst_*#(*dvlzm)}NWVEzC0iX#3 z>lN*(@L#vY254&aSQjfq|fHRA|)8#GG7$p_|L;6rVx214IfLd|qu1 zM=Fw%L!hBGY;8W`*9?{p@Q52R zKv=BRWIr)CM^8rwmIHVbhF}1C`U=n;1S|GiFI44~H%Xsy9Q7CH=_f z^{S~a__sl`yt=wNFtG1$%~-f~6C4OPucave(4AndPc zQdV9Tm81kxZS@xw4@4O*>0`ZN`vbD((k82$?rPt?z-Oy=*sb+1mD+^G5UcjL-*4HO!R!!m+-# z_swZX<}NDxQSg~~)E^{5OBnbT2m=xG6k$Yctq9&)xZM=1!a!gi)&;l<_Uor{dh-;e zGPKe63O+!$y5HHUbLstuS%*AknS$-`<3h%Yjy3&+o0*VnMQ(%Up%v9RmX^(WXM zj&~`C?l>N?$sUO^kU#>@5nJypL`*&?_hKQ4a5sq2bY8mHhFKjoLHF*4po{_VO}8`- z7*B~v%xDlZTpKSB5`m_V!=D%RtM6Ha(wv&0wE5ffAJ06JIh+EA{nPw1Ddv=0bUE*x zCv(qnVyk?N#Qve`&xn~n_9~?25xim6D&=mA6s^9ryWELPeY+?#&01Fx{?1ILD&4YM zc~<*|2CCEKAuEIR;xQ082FKO>2v)bt59hThf0;YfxyM*S6 zj(y5yz0x1fKfl5lgrcPqd7b#MWQMX@7NvIGjE)vN2 z!~|7T4-v;Z@WDH5%Yt&4I+k`08De2!@lGKvXJQZR^W`t^oI#@N1H_Z1TH+ECLf#KN ztgK2^mH>7D$X-olB`?SWl9R#iKB48V_m{b-r9}sTmOMPO)6?owQYestf&Jz|M{#j+ zFywMLnH!`$0`8Y)Qcvd;?}LYav725W&Ut!wu_Ay_V`F0zhR;}4U%vvf9VkLpc6PJV z0d?D^h%_jOrl+T;*bnDtXFVTol^m8N9PWIc?^iP4hs4FjflDX08XFz8 zUG4A#A*8IW@;B{Wt|EM6zk&jlu=Sodd7w9J;I|8^w+e$FR~p`OzH3*d$uG>NeP`6S z6$jM7xWydR>!`sN#pN2R=V5H;($ClM=Aq0k`X}yg*9{I~7&a2U+YV|&eAm_( zi#&d~f5+HPm|BRr%`zUJ^N0nF@LrtbZX) zV3uIrpEb@P4tYe+poJNOban6XmSvDD3;9w#0GAAs@05dA0G1U9&0ippWiu4|YuuWXOv`qRZ47 z(NuUU2cnFZN2U=OcuM4)=YF1E4vS~zWNk$G#T`cCPa#Or=kvX4TU?`hUA?i zRb5X?`Nw(gVWB|2v&8pJ>i4(3(#|;&%V)!g5EIYUMwQpxOeFQkO-g2;)mdqXF!w0Y za<^Vl_%_&bSTC%-T~P9q*5I%(<;>s|+;Hm9g@N=Af@OX;b#^I>IPW5VcSA0qUWYQZ z7ffwUv!}u2nkI91S18RW^9@bvg>v`;ha(R5<)CzwM?5KXA92Z~D*K0r>E-3K&qtRjSv)VOUWg}XW@9ypZME-?^P$q#vv)Q@Ss5vHP z9H9DBQ&XS;K}J9T^-2acMY&!pm=>q5s_J>QuXd#?mdWzwjTjAXsDswq$u}V5Mn^}d z0is<(BBHO2i$g=Q=5(PSvOq)U8yR%>ui?e^Fo+EU{r!dQ2{&{Ita5U4XPk&DpaK{N z35PqhtfhxCLPAARv7p^pK|#UP)D(12&Q!n(1n65Dze`yqPE2z00l*g4)Azy3qoa#>T?iT! z-@o^RTnn^OK+ezt0yP{G<|K{EBjej#(3`NeJprKx{99N^ymW1Sy#~0Av=d%#ZkONw z2%t|z%=V$Ey#O#GrI{Lk*oVFl+fAezx_{v1ZH+4{T&QbpqsxB7%|tCElTsTAj~I(W zNs*tkq^>Y3a;~7Fx9=$;>`NW0AjokxbR`+olue*6s;Ko4Nj7CdQ;_lLe7#zOBvtCpR@}I+1-)czp!j_C1HNdY;U-nkw%Q8Q;b+b- z#~nW$+>;8vf9#oF+Ce8~b@_Z^qpy#KJ=r|;n#*3o1*dks0PT5!8~T**KhU^dTyrGo zG4`zy&co$PbcPw5_bK?hWaL;D+-WVpRt*6dnP;jNhE9ERo1Tp-x{za;AG{G%Na6Cl zcuHB7qjoaaB@t;}s&jcfG>{lGO~Xv4$CIWw*>v%z=dp>-v!w!R_*Lx%dr-2s<#7DY zcT|(>4HM=QQ?a(z3PFx`t#9n?bgR3)@5pX@>msuypf3ZPgd^kmJbwXdK*il5-m`tg z+S@!vvPm^w_OYv*@0QOWNFCgj{}jf%__J}A>_m{xNiq#X@AMb2XUC69u_E%yx+wkf z*mIu2T~IwsU{1s^_%FtqIybg_qn)Ww1I37N< zc`^Rdj3Rxd>3Xm7vUlL$MlM`b3+iFtuCfjf+RVHH8pbtcfVCytA8>i!#3tt6WrxhM zXuIQrFya5g65g(K_l7nz`YTx{$FC?jCt5Eq-A&>(y+$A@cc zm0($fhod6~P+)&A(w9-}0v8Mlk?t*f@i$6ZT1lI7U;p(~h+JHt1qB757YXVlo5#B= z0n#YxTv7qoQ&47Hp>KliegU)ys*J<3vfc!VNhv6R+R5A5S>;&!alI!D+=hHI7r+}W zK@s37q)+}rRaLd2sfjC-)0|HtW;60NzkIroAb%K^1K=Q{W}wsEMU>cQp@oHKu3Yo86;O4 z8XA15BO7PP{&vpJ+h?xX5)m<~g_f0#4GkZ^%z?>jUPtq_R3L`xw>^Mv@p{ko?%!;X zS8!V7fN%tk^*+#pj0_FMu2($J&Qppc6BaTb&r!*j2PS*O5%}2HRd#Eg#EH9hJSoP5 z3Azn-qMoluz*}{{+NWB^BOs^;xzK(P8X6kbD<1L@W4#$%@Wo)i73m(73m=tx@FozC z1NMcpe>jYWztepjY;1k-sr7oT=*9E<(ZI`T>-=%$^`#HA%j*xn>)08&D%IE{-E)U` zK3s&QH1+ZK<|&+h71$dTZD&5~Le21BxD6}P53O-ZUi=&LMERDr^|BCQ`(gF`6YiOD z6S?q9@=+Vn6NWjV)C;TW2Ao4Wv(6 zvr7Z|G0LI%WZqLk4;Hm~S?x!rd)2l zW!U4C&D*rl>8@#MATu-Uj+bP}IH?x;*yyq0{N(TaZ}w48UZhzy@nSnB;MKBZ1y;diLDHIZ^XP5Tri`< z?m_v1)f$=7n5cWTBM~~6a;ws(^{Z||NriIYQ;CcBvSei2@Wyv&D6(8O5x=_pu4zU$ z$@-Vhef2M|XPwclSVM=ngk)^V`; z%cG=7^auHbMNa_xTf-%v_RRX(uSWIG7OFNQJ7^vnuODQmHR|magI04deedI#-pUq| zDI7IZ@f!}8V5+LGwyivS{-Tl_y*g6Zza_1qF$3araB#5AN}Fy~n0!9Swm|A7 zQ%6Whm?Gq*B`!_{cqX7hybfIgp%BzMpnJ@~ghNQ!VALPegz0h#0)%AL^sJeo;e}U! zO-&70_EQrVCx%i0c=onCQrr>Pq`%>BDe1s}z4{At5!wrz704@y3IH;+;G4{u2X3C#9sHsg(mRkYz0uU@v z`?YvKx(7r&#~Lcg$P~XMwKg{H`BI6BMn9SZ+9YlIa1cc;ttQjaw7g-{M+>jPNrkFw zN>8tUM?15o7X#?Q0N8~sE_Br z3h($wR~!@eGv)hS+H9TdzuU2K7BrCO#f71k7eMK$nPFPH`Gx4vXPhYdSUF<&_Bp4b8=V|U0TVj&`%-sX;%4sU;uXSbi`++V6Al$W{b z55{tm*>G=+3es)K2&RR;wN$$wsj}9;JieOR*>l6Q&_$Z#wPsbGt@?8Wmk5FUI`cWy z{j7O6y<=}DR6E#UjK*2~4pOE_+;Camg@d=AwsO^^DN?h~>vY*WVez13$`~WRExW$z2E)@*yzqYIaVy za8!&;peeB{;fTd^b5a;;@eTjbTT#pFQ1$WqLcNM|ncl2NU>Cj&Lba`7<{-It}s zzp`eg_`6G_X;MwMVKT3f?yB>-lbY_!BhETETBFPbE5S=}^f%eSz61`E@ z?3vWR`LgzH$@8m#?(Wdp|9+5BX61J2QlU(dlZ3-0qL|cS%6_!_1jka*a<9=>H#+q< z?K@-(T7Ldmy^+#egk*Bwmni*Xjj^?u&wB)j-)cHP7XOU^9tDK^QIW-=q&b*v-QE&C>?vS9xA@OQwb53cn8P6z=cQ?}&cm*0s)?Mfs>m1+Z3XuNKpu@S`M z&#SOAD`?ZZ8P%Xj#h|mjbkyScDdD8X5azoo_{0x4_Y3;ko)UTBYL4$}wM}CEWUlYw zUqU)KUsv5`mG*9IFp;QG_R*hkdq4jekvd2jy&L4p#7f=16C%~FaY*a>>O9=?ODyzk z(+~?kp=nshsqHSys90|4W}sbP#gLbYoC($l5AVZtH*#zaLbTIuJ3!{6UEyW5LyY2_rZ z%rkqO^+$hcow)Fp2r=3)~Zh>)QFQUjrCDbIVIg)$IJO0~}7LdUDg?!Z4drK5pW2<7ST35hF&*4r%Q&l-k-(KXA@=w^N64zmH<2nW(_{ z`i5S15EMpy3xk7v^nl8~8j*=UG(8di8qcrkTw+!F5&O5S@%Y~6{SADE#h#fr&Wy~< z?)J9FipvGA+yZT#&OAl&SI)dj%0t_5$f03w=;EBbRcoXgcG4eYQ%CCf)L`~wc6SDt zewe>x?(CK{H4*QrMp46%bKfe{k-B7;G97@>Q~!d~BIfI!*_h^}=sfOn@842`|I{|GgQo zs1-+-0~nhw1>B5WWL)6?8#TNo#KhDz0~n%4Of{@CO}2N!L;xM-V{lawDvF!_ITcK& zr?Xz&3wJvDSW3v>z_-$IgXm`hN32SIl%a10o=)9Y3Z^GHCqK%6!rwVaD7j?#5uTJp zA@T}hr{)o*-QcQ&Men7Sm-7)-1rZ`rTnmd}&u-X0JuJ23n|ot`P^wFhp*-t+R48fL z&YMLpEh^Q`JUOODC6i?@UBso*R)HFrvG~)E0N2RJOZjIX6~mNj(0c!-%g=odA3fm< z=b{+Zc_ZQgKp_@H-@=1rQTgYb{`D&-M8Gp;U{cmep)#y!F2Vgv@kYCEE=A zTbJIi{dVuMj`hB_JEeq*JN@6cyKE!Z2G2e#4DXs|BbNGdEUsy_354i|*CwOT2q{KYG`` zyXFk9I;;QLLa~sKi*uqi5|6zbL|x__f10kKX!URg`}OC$4udW$OeUQhmUgW} zyVvFwVeZ!at!^<^fzYW(3KTag2>RnMA0}%g^z&r6Nj?gQ$jk1PxmNCiZ~9(unVH+} zF6?h9(!vhsw*}5l&!PrGBWw@pVl<*Dcza%viZ~c?N+Lg=vp|Ud`(gg)k-fO1HM7_w z6$17-J!cys^^8jd|4Uhi!#kN7M&24Z#l=Cor80s_C*zCPwP;%&I^V!@)J*Ml;P<7B z$8$IqL~H2CKA2UoiP~cew@Tk!a>=c1pZy_ykK2;PQ~M#qJ_yoL(#R=X?prZAu2!1$)DD=ls%$_A;>2D2r8f8=X4nu+H)8xbpBv!qeW9OT_Yc%x`*4EsV&Q$zcGWsyN1 zxgdL#V)R;7X8>CJ0eb46!7n(dNm{6o_0~7mUf{X2&PYl^4>HZ?lTa!1Gon$+v9DiE z%dj3DQ}?N)(|Jf&8hxDGj+%_$@9{?+dDpI;{s0l-S^x9QHH&#ze1CeKcxMT!56e?z z!N~Rn>KSrU7|0tj6*u|e$SF>Wy8ZQX5_Tr zOGKx2yh&Chpa^UWG2i+huKAy*oS80WBcHJIB*#=HImFOZ>!|;KPhAX|limNG>h3+! zPv>f`0`!zAVyi6u_#cVnysTvQqUEs>a$ERmUlVPP#19ML)oWk1z&HE9%NQ&g;5eRL zZ|WExVg|IWKU3I#(i9H%VGS8mO&DjxdKR*Fo`vCnQOZXNfk@F{9M7mIMW6Dvyn^^L zsR;I}e=xfy;?&9CFBCJen%#6DbJI7qr9<`J|-!o8qm_4`@s|8 z33~BC0tl{;RY>OyHb2W{x_TVIJT7pt%^n?O()Lc=Cn-xWOZK5biekt#MzT2&_kfSK*@Bcc(oL*D6XKTD9sj11x z$hr12-_lX_XG6{f<|vbTE1~RM_+?IY-*v0cJvM}c!%5v{c4@jIpsI>4QTuFf!=qfd zZfn~JrehJ|fTN}cdZI;Nwp?6()DmP#FdqehxVZ&A>Io$!l_H~de~^47RKZCfJ2lQ8 z^4~Wu%ny6>g8()*IGoVMHDU28@Qv|=pP}lFfB9UhW+H57pDam*$IC|xqa?)z&z9Pk z!u>P+qKb^&VF@WoBSdC^M)%Rch*U{byZ<-OAwiSy`LWb*l=(B?&u`46bR z5l+UZRhRt{TiyKfxufZA`A>Nzg{i5fcZo>Jk5%T7jOeFB49m*Lq3i==uWZzxT7rjG z3vqbpM1c z3Nl?jP^BXEew~S$vkz=}d=opy{o2M!|m%MuGLT`1z zfJt?s|9`d;19qb6{#ZSRP=%$ixnZ8i1c^`$F8v;a8S-rMqzt-YxQ|fIqLyBNa zONryr_9F%2 z+4S;bPtvz+q?KRmHuR7r`y#6%_CzX?Kl=YRnoLvrX>Z&2R7_SNn3_;B*JRN%6p4!s z0g}JaH-+1>Ie=wKb;&pA#4JCMA$FwI-c;dq{mT0M>(9?|71v2(zN3JZ49v^%(*u1>Ap`pZ#OJP3r$!T!vB`eiSZU&xKs)9W_i;F+P1XF7R$(#Cz@M=~^7X`o&FXPBd-^0t zn>Lid4Wxh?KVm}e2m%1L4Q&WZuUa?5b(}rO?JrX9p z_wwRM3r9ll=M}5rwhy6?z~GW%;&bm&4_tM5*)I}_5$(-N>HCd5#&;*)tsYL|QLLo- z?r0-qvyH;(+*|dr+ToTpzr2T>Mt|62?z3+U_Nls;d$ z{`A$Yr4b0uYLsV$s;4h2*_4dqyrn$vW=&V}5BH@g4_36f^Kz#$%oB8*_V+AkM9hT_ ztanobMh!JrHg}X?3mK(&af5Mcjs|XeVx0#vSa4)`Xb_`U_^&^yToIuAWxRQoSub7% z<+olLwUlHjxu zF|&`!XxjTj6Y~nuphrxj(u1C+mVrHIp0GhnE3qw~AOFRQb%*POdeAc=!U7bO@tFmm zIXwRgjgpF!hUJqD<#HD_eH;5(y%kKq^``!8+PJ zP!u?=U}vVyMJvKXAebD)wZFAl9|CexcG7iVeW`u%M6f&PeTCE(f*`Ma%^=*qNW<m0?4iY+PK#h9FZk6eQS&tLy3lJuck_0 z9igrYpx<~iGYf*vow2=3qU*sXEvWj;*tWf!y*(;A^~F7Yn~^ERx2TCaSy!L3xGDHL zWkti8rBYEvG8eK*qn^|MaHZ+yKo+*3$2Rw+6$(NEy30JWN^ znu?xkH`AXTROFC`9sIDJH0;qg91i;G zw_Gmc#D|r_VVT=Gw{);Rr+iq7gyL9zt8(ZbPuco(dB>YL$y!GwU?23WYKEa*L2YfD z_`1DR)083oj(3uqBZ@wMV##w67)*~o7~Vo4@W|t&0=Ll;RDNZXf4fb_H{*l5k8e*d z&L$8+t?C1T%h)eYwkmhFmXdch!t@;ycCfo0E6&emP?v+c3%x!+X4?CD&X~6$j`0Si zM{@yIkU<>MQWN5{G?e-NtYR@sbH}@#%xmVKi<&_` zrLL8AQN-KR6J(9C;E~!mGhQ9!jrY$y<@ukvO;nZ^3C~k9<6h9E2sW~m8HLC$6?tpG zrxy$v_Ajq`mV1`VbQ`Z>)mt_Hxb%h%HvJsr|-;XKM_f7nrQrMrC@ zye}Q}x60lBQaaf1(m>uan~pe;wjfp^y+2KWwj`vSjb0Lq+;GLzjvzED^=Sc7F^#8f z558*NaK7T(vOKNF(}l;t?aN+-zbi+4sztu~KEIyJjYlK&PglPu>i19hWzk%%dK8Mx z>>!F?GRWPW)RpFs`tTR+E#%jGJe|2@2c#1&+Xg=Eaut_}^~YTNY3_AQh@Dw#|6&>g z%4mAe-9~=uk+h~hf%q#tT494x0w1D)0B)PqtMQ$eUE4vW_m7K8nXiS;@+|G&xbREL z+9)#LOrIIZo4PDtOf|iY8PY$y>myC&HMDAna-}NQezNfUV12#)+;xD zJzSdT5~mBp`#b8-N!jRmKLgF1OV~Ear%afs;kqNTlXdl3j5wJHP6B5fw!EBjM2)Kx z3cs6@Fs-oz`l8fuloSm#c#!^7r}jtiuXA|#Vi-&P(a8>4Pruwqj&XhXy3Tg=dMMmTq_}i?PirJyQ#DT5 z=;kMNnEU1aeUD^$LIOTqJl;+kAS3&djlJA=Yn>k)iUd<$ zy|R?4u2oN2DOGHGKDm?%kd)ITxRMFjLbcxRH&lxSBKfUa`gf8ryRYHid(b-NV=Gu{ z2`*5zKRB;$OPb2Qnm9CDQfo3m*!{LFTK7cfK7>_Ad&2fkW<_7+>RN^DsQ_XXf|c$z zn9*jioOWEN?DTi;&mXyylkX!u&`a6AGaOqlJEKqBB)ZO>#|kkc3+@qprym-xY-8DTnMwd5E=B7xL{g|plF6)^28_uJ`Lml!(=Fak<*&nRs;xSdZ*3YiM z*Vh@ZproBbXiLcN^yP*T+?Y~trrh8PZ?uX>VYV1O>UtDjP5UoQ!|0aa73hH=Q@T|E$%qHtSP_w)0-xiB>GTfn1j`_aTD<@XJq3KiZ+KVq z1QDLij!tLW`kpygL^ctm+=@Jdr77|*k7yJ=ngB+VT%6BXo4$xe6mV^|?lGl>&DdWra7zMQeQH2OrNXY@-Dtw=8 zh@?^1nbuFKgV9H7Nh5=_4G}n(pQWDI7DM)o+RN?upM(Y(MyGUFF;;%lv2>)eAluZs zPcR;Pq3vY+XSu!}2FmBIuIy|N;KvGnV~4;lAxtLturrgcUy<^vuph;+SMKWk7`9-_ zA}DJs>_8IhfGxq59>w==9ujdEs7Js6@{eJ)E4vOk%{j**}BD12lzS6<&a(8;7Jl2S&yn-pC zPO#_*5z(pkhcZ@<>Ze*pB7OyCg)y@tCfSwE-`wq$_rEl_+HclILJ}93dfNV?l12$W z_4m)Lc$Uf(Co}P!Y2;)(N6B#H1F;0-N00TBSPMHjb4+j$&ab;Thtii>k@uX z%!yKI;NiovUANx5WYY0IW#oP{iBC?Hba9D#anjLPZ4yyE(~~ZcMm;Y4Z@QF*KoHl( zXrFou-`-XTqTt2n?&cE#Zx$+z&2DSXpcJ3Yji6B>cS(omHhv2`{;P4_jr-OLnMF}G zN!f4Uf2RlQ%iAL0QPx#OytN~Cf3Lt}sKtX|^a(@cMF}Dq5B#q&4C>!}gf`QE*luvt zyLbq(PC6LXYpjmjPpL1fnid9s;|Krd`2=SU!TlxOEW+ps?<1lBVWU8%ejlv)R%MO8 zBA{=Lexj2rYKGWW3}2N)WgmmNE4G4=>o=DxJSiK3sSoBc3mI|9n@>L@P*`B5lc=(t zg6(Iqs^?pE2dlrnZGmuB5UH}Dq?D|FMooq$H;;^`|`5OdVX~`IkcaBV;H^Is36w!*UxyRE+YKN+TwST zp62wTrioiwxTJ{J?Poe#6)#Q z#%jwH(ERG^==dHU9s-WtFc2{VNdL>jqvXR8it6n)1M;+ zD%K~sOcl*KecEqVpGm#XmZE$doxe`0YJeg!4tGntEG9YjYFh@2LFeJm5>zd)Yy zko}E<;os(RL1!r3sQ1-|eCaX!@l@-A9WJCU@kjW3{ckoO_deB&{mYC%4AT9hO}=U~c+n)H#L zg11%Kd2ih1ddv^Haxma=;;lJ9ViG}F6lo;ckI!#1iI;x;QC-SuGia^P{iLMi z>|!6k)%j((5oTa2S8dhg!X{=@ZM;MJ_49q3u0(In(u2Rxt=C0)CCbW;%{(GOJ>v-~BFUoy*InfYCxvpF^||nI zh}vYf`oGpYJBd-E)#zp*1hv?gs$2MK8brmjLChZ1i<4rvXDh;PMm~{x-61Id_L=|u zI1xX+_>4XvYwN|1M2>zOf+Qz%C(8E#-@!kIpx%kCvgiC09nB!#Tq>M4=^BrQEiA2O zv8qvk27#>j{s9)U0`~&JmW#m&aRb58#V$}$h1{H=8nWAeiow&dfx%u>XWGJe_rkYu zIPMAK|@H4HW9~%#*^U-qI)?5Rb)f%TO zhVRy?T{dJ_g$2%+n*yI&)Q1Pfr_1Ry*__jd2Xmt&*=W$|89{{tMM5;qd~LXN?*4#+ zxT$kQBH1~3*{<743Vzn#ZMR325LHPo72&CZaM1ug+@F|-dAbub_@~pt?$=#-;h)O4 zk$$Ci0h+#tFf2mdZ(9E1Ge*svgKZzlHgKc@BQKc|-D1*@Yp7S95EE?}HX}szWs8M) zd3co^%7{lND{{3vb>;e^t^J3Dom=i3Y|^0TPAof;?&Kad?Fst9Bz~~>t5GhZgssXH za~o|f*L&*x=i!fKQcBU$(w<#h@PA$Z@HjCFL`^SVz48PCV8AOQP6PyBKX_dNsn@8`SZBtO=n&8?6?X*>OwajLsskFspIX55RJcK=%DwpO8 zK?cxF%H#6O0y@uLU*7h1y-v#Oxt^<+;dL!Fg^)#*rIRd475mKOarnc*-{#$$~K zP0xM**@hdEPQ=Sowe8fY1?b+1-1YIzX59y2U>x$gKowJ6t)6U&{=>lBy#{STaTH~) zOtpK2=dGHx80N&h|^I*sjK& z{H3^^u+As)bF|_wK4B6)uQG2OAm&aOJv3bp#ye3A2!)wMI`ceTaU+cFx&py^rn-Ld z=@sf}<<;)m%y(Jz4Ao+gO6+S@`n)b{eM&eh3b&m;5ww04DQ%2~$CnC|G3|1b+CH?O z8%O2Xm#$g&rFy=>GN%3cge+f;=Jit7U1YQOc@{y~vKvED1?K`D*r)=-GP7E8vYY?2 z*kM!1Al5Cf+bXVxJyF9x*X(V@u3|RJF^7oodLjnM%4B^;4QEF~d0Z%|g|(|M+qzJF zpO41DD$U~4oj^k9gHnifTN`5g`!%Tz?($69SzRnEPXj7?9=R4HPhGzFE`qroqC^K9 zjlV($yN<_N-NK^PW#Ai?OZHP(j;E~$=IV9@F|wZs@8n>bB_2f)G>12_Y=-QY<}!S1 zF}l7TsKjVjL;bwvtf2_5uqmrNL!4U!kH{&q-0uhl;kizARTu@Vu3d;cF_VeQDzcR> z*W65{W+z@y!jq7NVmTxsyl@J;s2eVd;)>M3bS_KPt@YGjx^Gpoc_h|ZR zfM)!%kLVAp$3mqEwYC5>`IS;w-;^QCE0YfLv94GXCq}4+Vs^>02ucaN#~j%F=XO}| z%7VB>g4@?zT<4pJ>RO-+4*$1=?O5k#XMvo@7|7z7+itRoi!HsFc9N2kG^*d?Y**gS64sl>+1vWA>a`NEC4cqBml_oVkkj+0OX3q=)u4o z1*q?&Vu0iThe#9J4>Uey4inA1`Zvy3_Z* z1r|-=MC^#Oz>i5Rvtccm1jsCefjeSOP7ZKpnqOF$fwnLE!oWQ|KH{N@J3C(k>7bG3 zF<=J+zX9dKNi&XrG}t}I>+5SE>8`1+re|i>C?xjOhMu1jbC^X_N}&d=j|x8nnR9+v z*dXvt0=9BMv(?qr1(>y9ye1&$K0V#r8d_KzngfwkzIVl$CAIVZE4DTO?yCT3E-~X3x`VzahEsx=)zBy8c#rK3rd)CnisH zdU~_hB*%1jp(+=j25sH>i00d;7qiU@)A8wHGF-j~A3!Y}PBrnAe9A zW$bSY{~RXcUmtF@F;~;E)Ff6Nj()P7z~la6SPYw5Rm1%P=@OMD zo%CpeMAZJq`PK^KHl@0mB_z(GfU7o7428Ir^Lr;on%@R#5}UKEZ@Gt;VpYl)OEk5r z`MD{TvPi~~lK{5nim|cott$V+mfn{wWob-=Z08drJ$ySxiP`pNmwyd)AvJN{R>K1g z%7RTr=l;7jDT_^?c8J^8F1By;K!u|!rSkV0aPjO^i|~A`pI7}pB-R8ncq!m!l$TD` zs#|D`uqNv5DcTU&r9VzQBny9I!!7e;KUGp;wDQ#%6Gil#P%Uu~Q>oR-eD!5G zVG~v^SKH>um2ouQxOo-pCwemliL!wpgKdL+vgrnmET=eR6lry5`jT#Y&A7-l_*~^zN2?>Ek62I%|D)4Sn)zF~Fby|Voq4xY_HH?Xg zVPj{vwjTBBIKQ}9XmP7nFMiGEXa?*efxf`i)fK3NWo5&G+6fdYUkpdRKH57wW1^t} zZvG08SqEc~nCK9alIkfdSNS}9gE9q(5P*jR0RcgCb29-R9xp#XJuWZ~Tie^yR8`HH zIiQjP$^(Aq!;1R)bWY0zH;5y!R1**o00xn;;jj6*fSnT%QUiZVLBZFALY}Um!~u8c z1qHIJ^495x1Qqw|Ln0g;V3}B`N_XY{2%-=0r2HpVG&FZ~bW~Nv{pZ(kbK`oUY?yK3 z2HX>YEK)~XyI3{Uve(+c&=7bysnV%Eiqar=3a9-33}#XCrIzLOCN&i10d==62r+Po z=%-g#t#K!#?yW-a-t7Qm8erCWuOR$5`GgpBU0ga9x@#(0A)-;t?K{FySe-LNB$8`( zH(Z_9kdbBiEwR~9-g!&>=w}m0t4>I%r0;7m)#|k4ml(g=$vt@ImIK({yf{-I77LQ{ zaM%yft^2WbRTVLqu1BWzam_XN^EQYsLZXP19hU`6Nag7o3>C-hx~?BqkLA^*LMF7( z^=;(eDQGz6BJeV5aL+m)2+vA5lT>_u^u05VkvwU$k;KHSzoc#bs7)t@Y2&O&P{1H_ zU505_S0ZcEB;gfeRNFTTO=U>8Gv@$H?SutT;mC&>yyao$Dyo#? z&8pkCxN1E~Ju03S>WsHmu{yr)!u*8As2)~#MM(>2{)}YBS-dcEre|Z-ansGEPs1rL z3|DCaGj?QTz@_zIJo?~gwB7!_G~{3v_W_ajv#*<+crkasz3!lHmB|VB9`&hT{i^w& z5J>|#N->BPyY`Wrot2Q5i*SqSS9*kDhQuUy^Wo8=guaClB&eKUE(?Zx7Y!kT5QYkJ zy&IQzU?B@ZTQEg5jDBsNA%2W~4Nq*s=;OA8(ePK!!AmaA9M0DH71gTi3vXVlV}|P9 z{Ah%11HuQa-mdJ^HatAW!^dl(7s~%0L|~YB`3&&g`wF)8E26jvuoQs3ko(~*YpHs2 zqK3=+pA6h*zyA=v(9zYkW+7Cii%(2U1nwfh=m%1(1#pMMtt}_uQUFZAGlWvyL3TnM zK}}1$*D`&6eqN)+2v+*x`Z}zuo13_#B;3EeLtK2D9<1M;%}qkM8Q`)2l*lk-Wa+>K z3Qnt(i=TgCW`_ISJAhJ3ma2nAE)}!6wY7D0|gCY?D}}#x8`}P z^#+jvh?k@@KhACD$}b6P5$2ZE$j$CVSylPL$`&!n3S{gqScIobm*c(0ePjBfv~qn>E{}j#%7jwvu1>$)62;fkh(u-+_lW(V(xR;B(U`DP z_JU+@xr%%;vm;-Q)m{Tx=oN-T4j&bFeHMavV%2GgYRp?p}LNT`%pvFKs#&yI8*2JM0?LM;`krK{k# z-d{}Z_chjira`wV#fDp`xtf~lqzh2@hDc@QxvJpcnKfc@)Ir$>u+x-7ll3ppX0B{H zhJLHLR5=!N84{cYgR`u3q-2Keq}w#&<&a7&Q9(-~UOa!Zn*jVR zNPC{w%24rwXw02kpZlYV!0+7}}sv4N9oxQ3k5K|po`9lm|q0@_CWBFV!uCa`;$PCO;bPZ=PZGbBPd9i_==ORwKh?kJB ze0Uk5Pmc(!8wL$)P1;uG35-qOM^cSSlZvA&Fdn*uiubA;TWv<{DF0F>VWh&+QgjrQ zs_N>Y0q$F79JM$)T3R65cR4A7@~FtmZ|>|Uk)UZoTE?}%M*-*guTWBZ4nV=f$n$|& zJYZxwH8mCZ$4LB<J0=Vq^!ig-Z zM%?%Q3>{v(BOwKEbO*oOEZG3Zo{w)?m?RWX1O%??w}gazKun<3>>L}L3NCU0?r>lP zy2WIS*uN978&*UG=L=Br0&1r0?6rV;BcSKF)Z}F0Nz0+E24QpBV5^C&Z$vBSJir$LB%&Z|?Jo{vTJvjZ00c5peAkdK`LovD_mQ_z7}3_& zp+36Up4Q2y#MZW^uy8U#KU^}5_wNvKU{=b~ z*at2C`S6rjY55!_D>?0g$71Twoj=zT z>c z+zm^9n@0=NzOeCm@Nf${0p)MPw;nSY}>{ui(vSOoegVT5_S6C%p>Zh)yg=ut* zG>Uu~c_jZ`VsW`4q3HV@8ZNGD&wSNOOj~NKJ)ZIaUz~cAt?b?V+tU0YP^-au|3sRh zD5Xj5k$dJ>t1is7@_D3 zyZ9^|>QCulV}&gWYyOlHEW_y5`Db z1S@6{Vy~~_aNHN`aw0c>7w&eQDL}MB@GrSt#QtdBgpw8*xvO#;oVt!~)3JmeqEALF ztjB@d@rdWyr-$f)>XO8u3Ck;$Sreyby0Gjao2cJVYVtTxY)20fz(u%RC+9+xAbmaB z@m&E_Gas^?Yy2Vb5P_7*QM?7#G(>Q6zzK)OJPhQ$1yu*e@1IY*7-oPbm z2Yo$QdVfxkD+wV!f=~pE2@R47E^KTCjOqIrH$g|FLOonlhfB-GQ>q5+-7cssNrheZ z$4`yy;1~B-aJo@ypsP2oBsU4#qkb=NBzM!6bT+<79utF!^~P!bdU(tFNs}CUDUR) z*qz!6x-PunZ}~;T?F@lmb2rzjb|g{vRm$O>r=*Ve?nN)6g&{IvVJ^lLKP* z=4L34j)xpjy&y47j*Mh>KA!jU^IKll6e?Aoo8_=8WMyKq0W@8E`;BA80s8*=b0;v; z3*;JV)k}fHi%6Kx?U0v|p$iQ20>fnW%dLPskdKc~LjxDU;xQu-F$HPq>CrGTId5_V zkoExaM*x##Qh9(O-Kj>0y@rN{U%ya*$zGuNQBXkhJxch?7a$2RI^?=C5Bw|u5DN10 zKz;$3t_6&$N=r)tHG1>fD2n@s2Yl8~K&SzQLIENIh`9rVCAnUg@B)ZH0M5h5&mRUe z4mft_PXR*k@i8&DXo!Fyx^(L3im*BTj44}qxKqUA+4=b=#MGt5ML;@^nAh17$hHDt z=mTVKoSp0H>s^5S1Heh1NQgXzv_!sMRg=D&Wo^?^eFx{M_Dyh()%kmm5WqPWqR@64 z)eu%c!;rPcx&_9%BQfbPA$cw&wuplkX2~mbGK#VqvWmpB6&87#8+n(q2Y570jj9%% z?N)Ce7C#FzCS0Dh`X0%oF-|)lE`e(V7g>4evNBJ?%N7Mq9vhZkIGIerBEPriAx*w3 zJ6Tc|@+JGOC+lisd^!(JtErS#Jd;8j2se|G4Yl)L*7I7a6T<*ATFRE6?<3Oev z#z(&spzat4)9cvcKNt7O3v1^>f9@Cc-!YDQ5yD8hA-#M|h%@AbODNCgTC^g)y2_V=B!!Fe9|c3UPA9^Nu#S7>1g zr^O*lEmjgMC?COFsJocS=Qkr5y!P7}UfmBzLz12P9A@Vec+n=yc6%S14bBk`N6CZH z(YaM<Kjuc54Rz8?P+KBp~D>bGA{-?p&la{blqF><^ zzqL1U{`u9fJ2d$6D}n`lfX9YR^Ki6%{?2L4L-N7Pb=InKl=$wR>PuBDPXm-AK{GE?k7;J{GCo`y5h}M!#_?!^1X+g~mq(#ub0J%ajYvjTsS5>f3kN+fM5Syk}+vlX~x#4!Z<+ zC>dT&MNiG6#=hmlECjCi2x^frei&EZ>3WB2jlf?Le2s0g#Wox$I}bmk_wlPQMi#-Y znDv-*@+CuPP`(})jUKEbyLul^xG-CQFNr0a5sV-V>&E*OcuZV$lTMN#4m2NY`!t5R zC;Z*+_gsjgLP4aq%m_N_)weG`@UMIXN8Bg5-at&k5P1oHtq0CZHnXn=TdY%^`3vJB zho6-?IsL28;IX@4NCz3cMn53!epTU{iet^u7Lc5Q*~1yObiae1No8^NuW|cB<{A2z3+e8r-*=R%l%HgjR{%H(00>ViBRe}gEp3sV1)#wI z{68R)4F}|C01JzdkdW-GEFgY?-QL}utp_w#7626Vg+l@a?=|+d6&0rog!0tr!R3kB_;F*6pm(Q2S6xzdYT3hA!RZC8L?Eg%{Bj!F_pnOx&lk$?CA zl(P-h09CEBl9FQ}0}bSi0R2mV3IeqyAZK&bCAa6q(5q^7P)<_R za*8mf3bly~rwiqSy3{vqVP$3KKa_5VKL;ZtZzj-n#(-dxoi!-9QEc%ss{z7M2IgG+ z71Mi@q{X||Qt#Xe=P$ZSY$zcmC)pK2Vzs2NL}m6(e)v2=Zv}dDB%uC1ERW`SKCj%x zaz)AD+sv$7+#kr22D@SSirY{zCZ&-Wzn>*a_*ZNnsV95SG2f|EHe)T8{yR*yeN031 zn;9gbA=iL4Zc5F3x}niu+O6|5#l>S5#2&tUg{!x>AQvyk1-d#|8rQw?`-ae0wnWYcD{U4TGFizUA?!9=ne zFQ*Zou3BjWEn-@>X{&v?0)w{+m+KE1`UnIGHB@p&me1zRgZHR0n>|NgYq=S3KPxeP zXB9|SUaPXY8XZg9okbFrR>v8b(fS8~nQmcT4sHZ#KAh``dCHi6TI&cl-9usG;4b6}|Cv0+?CTHf?6@Gtj6m2Q zeo)ePZN9u42-SC8s;@vMN9twqaJl2f*A8ds!-$-+LbQ&Y3?y(6A;F43!aM9xH<{f| z{CV_uk`@BUO|V2V0BYxt1;@X!!DbTX%Au-Lwq#71SjkW-WCfe?&;tBi*jEX=WJhm8gLC$bj|o_gXg?ByR?A z&x9H7F%#t`c_;TKh(wo*OBlw|U(xlAmro$WNHx1tyZ`rfWW;`doedbe@S7V~gtXxF z5$Ax$r`IqM-QW?$wO4apT4zcMxh@@BuVKw;0@l`bq)O|E@;aQ*Azw`GTCjJE8nZDP zh|~OZZ4uzMyAi8@our^=M~{y=UUAGCmaFsWGYseKG{ybxR}<~3mqjBV-hBkCzH0AE znLS?XrB64C+dI^;r4I1p70U>OcO7VAHW~u24Q`45%TVQU@$fhUikenV8dwjzY17sSbcJ<9+QI zTD?z~S+(su6$?w%b0S=PW=TnjUU!gIWZuOEN@^-Uue%VCEkKbTO{E?DBjwl?`&Hz` z1=3Eeaj4RClkB4X{S6-e-*^KxOM!Z>s>(s8yE-VkTI1V2mV`%8 zeHg<2BE?yn>x9Q{GfF-e9OzsbErAk#@^TV37XC-;7c}c{vNQ8b?S_M*0%xj;Fmiy? zm8tq4T;4mDmgx!pMp`Ssk7q0`s}ua6OwdLwKlNW1y}^WGMhth-BysM7Z$P}!!m56Q z(5k1}owKfT^@Fhi)jqB?;7`SHOosZuwZU*2fXx8pyL_vvSk&TX%$=-GjLw%z0RHdf zRI6R?VsD=WsILQ={Lr5%fkU%9uR&ITe$M*(__H(o=Q|_-%*`2*uK-0f;MQB6AhPTv zkKnWa93>P2GW~qF2PpJ+#ftT|?Nv3~KuGEzGEY9aG3*^;|IJ%sYdyn^v!Lu{yz%6T zrAYt5Fb#(rv)YPyYd0$BMxR?ofSBvjzb=r9B26sO)NDDLB2G#R68N3kklM%h>FZTSm z7vLEvwy2VfYa&jJ;TUr43hDf4;sc(Heggm26mcW|pAw~`xP-*fOPeI%1jfl(4`{p- zw6ltehyZnjPJG(fqNlJ|erIDj@`XvLK52lpj_7TfQ8cwo8!61V3X+;VGBJC_;rMEb z^5YHT-*g!aMbW#}yLC6L{eK~e-!YuOPsAnOABDH1ZG z`7?)3G**_e88&0RW%BOd`u!S1pg5&oo2e!d7L@D3IKp;(b<68nQklSHAt>X}@m9 zdY_mHd;MU&j`u4yBj0H4R*_*`@HP>&9V7XE~oSDlf(PG!8tZLS_dp0Uke-$+Eq4QaJpD8gj_0HbC>9 zbST5wxk_$iXnU;pLjf=phURTkqS}53c5_6(8pADX?8mfryBx@DC%j!Hi z23mGJPws=6I`gd9k=ot$6SsJ6S}G;gZ+e+#dxVZGq2Wyi2g~Q=?ylStJ9yuz>mhf~ z(I?+yh3xVJNy@=q{Rb}CPiwyjk@1=tF|D-RQZLhGFxBwdPMVYbM=iwF8Xg)0k$TIX zP3^A_^KIOo12kP2w0f~C3L$5x1PniU9IdRl zw6X7`tyDcFaAu_w+m)@oP5Q!g4Yn%pC#L$lxWMOyp2eJLO#0ViN*93D7qQH^LX>1m zIVs_El@r+J6r6$Z_7-VEFA7JG+hzeX-mzO6z?}8f!8E?l9_AxX+_o)5%O9DTN<11fyyw9llJN0-bKRq?o z|32y&E=aSgl=r>O%N?@SNiwAU-1WK&um}wIEvqGBdufe*IXqH~^*ZYh7cQ0AAOF!z zjL%Yc#Ok1uQcx#b`7po89uvp zfD{ENnkHne>Qi0IDe};O8DhinP1V15_xEeXTJ9YkloX7N(6PV7$raD|i(8S)sVQiy z!d1zaDrS~X{JI*$PFZz_sMy8sN>FHN`(f#<#n?Y|Lh?q|IF0Q0f3&rVNGI@EKb*aR zv^Tm+xM|snTo=0NJU+FY8C$70zB>O~hYq;>`9ky$$NrJ7f5o|gp%9Aix6AFv`x}B0 z#0xf>lXQ-tv!&kj*|KNEyW?c1Q(_>Y)OLT*?IT~<WxThVvhAo9lzz zs>e$#wd-MN>7S8uGw-LWK+n`7%=WYO>f*4>NsK9hr_I9eob*~%CocjuvXzJZgG!>2 zl<}WB>m#oE`LEY-yoTjql$;exk8?Wp#!r}96I>wv3z|-IZ4%RzcOgO_oJb#q_`sVkugAo*ZBaIexWRbjV?~O+qS|E!HJ98Y0ePrpJIf zMV(nF_Uz<&QGUd!z`Hr+gqkmjyIybfqL#c^QX)SvAQBzt*xwZ|`tNmwmQ@Xx0Za1# ze!8$A&!XB1;7cMKdY#6r%)M!jI-zm_J)!BfVAjcK;PdX?rtfOzlTXJf8Hs13@kCTG zvFF1~qQkCEi#DD?h6_^b*{s3TN|KJ%a)m&h{lh7Vcah9{ttN*Zh*}gOo4IO!NP>`5 zX2@!dy@PW6eFUGKKX_~HIkqkw=IN>V<{Gld*)gZE5`+!_$y?}z1f=PxZZ+JVK%A~w zi}z(!v==F)THzl@2>GVW1$O7j{77#ZS%*6yt1Yh{mR%pr?-SbfXR2GejGl3##*9}* zFg}XEdD0wKWcgF?#vIJ-_dH;9molaZ}!$>%h31A|GOGEeOfQKJt+h>2!{R}L2jsm(1&;dPb5c70MDs<@aMSS z;lN!X6S!XbH48qudycK@A+~BqhZDpI^Dbed8i(O z9LH!b3co=@D=08!bT}omLbNgeAm3av-<5_+;&zd#dw!+-Z#yw!_-EJWGbH&f7m08h zDy5CkPw+Y)%(QEmyfCgH1-LHEVdw4sy4y3RLH{Xed3RVdm0~3Ge6@d-Tiv(f0+mYl zI2#O~6TRlpkqTdN5I?j#Nwrz!G4HJTeJPxJwA)H?6hPSk;p|R|A|6sUa4b+tc0xS+r5w2_4mEG@jF?`!GKHXLk?5?<-px5)=&0o^i9>%k;X@{Q1I%?g`Iye;1E;n=J zF3^|mV5!HX38c`3%EvcsT*p6nKSD&U*KYdfCO$;{<4ggl z5+WO$2L{yfCRj=wj7p}}V<~xHPH&eN-Q2PwJ|+%^*7Xkp?Yo0>AJ1EnUZP!YLlL&9 zRym~@RN@S&dP~)u=*v2JOjO#Qv)8Jt%iAwGG`^3nJ*U@J?jQE0GyPvDmHdR( zfI|gj)P>#)bayhB?yY%^U^_P|Ov|>rMJHr_j&p1O zT0Z&-#3iH2vmg^UN*GEnw^(#(KS;44$TmF_DVN7w@YoG@6K`VWcB{>(tD_rHnCbO+ zn$7nY0J~W%+knQl(ju}1Jf{dakbY!CZ>`??e zjK9nF?^C1UwYm^?^z8B^g^GxQXJ%qjKA~t)J44a}-Q0lVMZ&;qwOhHpw#dt+5V)L= z`cbI>Cw=2Q10=_N5GU8kr!J5yknDxvW+GwM)K>zrkF4SGKCFWjeK1hI zvK&a1jr0WtPI8lW>(ThLs3KQp_;T%I);L<0AmXk1ODdGtIbP8}vDV(to9+>N{Ac*E z5UWY}!?ZBMF|7C0g^f_q-pFid;Kgic!ASKd#ODC{@z0+uIghYL+UL*%@qd`JdEj|{ zYL$e++YSO+l9+1di*HQFpg}t>UPGRBvgXUsAyVubSq$*7c!UocFHC}I(j?oFIwKMu zHtbnY;i=oLjD}abqn|-ueRDr*Jr1J`4rIQ5OaB8f$OkXXK z^QfdhZ?*2}JbdUL7p_JOCeGfaPjKq?Q{$v>xOw;l(~qspsD9gxbOyA=5 ztiRQ+mffs1|M6{r#Fx7Bi^H5Eu4K)EZ^{KZ#sU@dP8b(u1N}5*C;QL;dno>H3mgeR zo9X|KgH4w>qDltS&to9I@-@rLV*cTyP=WcL`SmFXDZ%6$PDu$Ay+Ds&v6J-dcs~)E zZ+2NZpJqcwuXx(->dfSK3jCjV02q8(XkmpDR4OYViPbhf|G&B z;g4eMJ56(XCN1XU^crz%1Ds$>CNjO-^tl4xWBs+EFlPe%k*;euHh%jYk?+(vfqNI; zl(~XX&`e%@x$cr|JZ`lScIjPQ0Dp#}{3*S2GTlHZ|cAbNG~Qq5}m%Rz?ZVM}*$ z=Q`j`p@MD&58bEsdsk8|?P1?7r5SrVd@T44`>6Ev7bv71@F<__?aj-EPGSRhH%ZIo zX8W3!9e_vA%e9nL4-K(c`I7C{Jq4AB{u?KAX+@S{yy`1y7+E$0<4X-Mf0Ej#-r zcV_x+ z($f&WqMSuUsWxF9z7R{?Xp;}?hgU?RbuNw>MLE3#pJE@D8o<6~*bzN;?!|&>2-v7sWm% zHLrW`Gxwr{C2p-1<|=008GZ|5hrMfq9LNzH4b*|BuS*diYzY;aPHTb);iw`@NJ`Xo z8>+r4e*UXUVX^uA0(x$eywUc{$ofEwLD*0_+v-y4q-gO}H#3V-Dp)*Ir+#8vV?I-N zx~P1#+Hm{z8^YHZO45E}WJBs(C^@{p@T&ma@!QLd?!KMV%SPz(-l_}o08}7frES<^ z1&j369d1Hl@)WEXqVs|FGdu`a5fHK7$ksJ1F6y|Nrzz(|1Ell#n=nni&o$t;4~>$Z z5kdP%!=NhPm*Qb(99U3I#6~OXqiSMyMr-6Q)Nw<((c`704?Bz8%}ZiNan=(K4X0APY*<7bF`yA~mbVtllN@9{V)l)Hg{=Vd(nv81 zpz#?ErKr?fJCA~zsz>n3X5R4TF?WgzL#|M9@m`a$Tw#v*nt<)M7)rU8K(-)hO?32- zX!Ic=-&n%Iw^uZJ=rVywLH#pEf^e#@9wC8Ul;`A#T|ROh@VaScV8aHbprbmt zo(}hnFzZpR45smH$0UQqxhAKM_Zh{fJA@qwTqet(SUlc`S}I{B5EJZN2W@Z=REySM z&&HuQ;MR-gUB!Na667Vj!sozA=UsbC8MnzvJOxAs-L#SSD| z0!|;(mdv+p>ziNFe#OJBH@LW3M2^&Yk5XdoQE1GVO=W{oZNLICS=$8Eqdo5{VC#*w zn(8lI-k9bjV%lgnyiv&rlDZ`OvJ$mrnOt{f=R>pc5NLX|Zq0DSbCFY6FO%97oe9F# zXusiaV!N%<3rQ2v<^w|um8MobH5wL+){>_4_9qfs^+4T=(-01QU2;wf$hp;@@2HhaRDUNW6?q;U9a$V;D@nILgf-nBlgzgmO&OOG5{A4mD;G{v zE{vU?Y73UHZXT(4WN*J153995^!bBBUOb`(au&yEJFR$6=e-4(IC*&vSG$qk?2Qph zNl1OFv_dn)tGs1c`2vW6uQL-OdeAxP8ik%)YVH*oFyaqh4n#+ddzZ(m{`q>h=9PtPt)28X@s zO~u+Kc|9V5zq1iRran=MC}G;V%D4n(?o-?#kf4OioW-A>Bg8>LKLcNGruSYs670_K zlh?Z;zI;A9O=kwJW+xGvt5lHR8_IZPM=0}g(yf8kI#;o7r6C|(rw5uXSd9Td6sT&8Ll@xaK5I{{yr1dKRnNlViw5z zV&zGpSE5DOsE^H;j`6gFY)!O?t|bW2R!q8znr*)w74hlQv`z%B{j<=8I3hMv^3hL8Z#6&~- z;F~SPlL4~|n}jU42wx^2WuYj>qbA`XXlK8&QrOQ* zIc*zU>*`Nn#oFI=QU`PfH!4ZVeNopKw*2mqOya1lhif!n%Eu`+3r= zJpD1?b@jmDAYdCj1N5x0<4XbVWd{2C)8_0x57$Q;O=e0l39q4jha@C?4H}B71%iSV zYC}}M3||rMr>M{e21enTO`c^ukuXR&?6Wvv@P+2DL-gjDU@IzXQ7sqsH^C(JrZc0H zykB1x8q?;g!xbnFMsbI#=fb!WGJ?V!JZ&`0l+DRRJN7^x0^NErZIMhq;+a|0i~kp~ zTEw++rOk*cFhFN`6@<725#EgT=~?YR%2+{*-_ z&{gv8QWi~4k_wo%LUZbJggoHp9s{YYTdEmNGf_BXXJzZ^7tt}yW-1Cdua0x^4hNruXmJD2C2lf#3?;d(X@F7EB{OynNkqP3bK6j&i zYFXjXjvadhSePB5==d+BuWcJOr{)aEO4#@hyE>tBsRYjZthzte$4@IXn;)!%)gQP~ z@vvv5Q;t;;BPsYi@ZZX=Q8DK0d0KY`qP1#lzWSWVPM>TsdlbQJrQe z`hB~e8O7Vg*f4fDWKg1H-b#T;wp{B0G&q^sx!R?PbU?SK0cO?dFq)07RgjJiZ2 zL2FI3t$QGGAFAz{?%zLmZ1>Ish0ae5=v6up6lu9ZZ9ZcLmi);9u7^q+&>D-A)Oc6U z4;pH*3~&Eey*V=JI|QjHJeO5U2DT`wJi@2j2NAb)2xkI6H`$fZXxktIrXxn1%m=pP zDUb?REnYS9W2K&JpYEejM}?qeO*NfruEJ zW_CxDFkeJ_8}v~HvD=DcjTo=m*vh^27S%}#*eDVA|J^7S(r%&^UlY>}X_w1cS69h*pjRZ3jp+1rL`KG#eMSCxIxFXF3S19qqqQC{ z*7!n3_q)I3#PU~kJa=8)EQ^_)d{yU?>-O8k&GUP2py|H~aOW8z;ItHoE?_Lnj(;_x z>_|O+IkY3sy*4ZEqR=Wtld(tp`eG883a|qD-~gxeNn^n=V8{^YSWEY~`00FSGQImI z1K8lf3cdWyYKby1Y-pyvTUa{Y_?%`ir2GuEj-TtcUn-7p(kQ6a-Gg6|&4m8m+Z)i^ zIW)_M!s~Ax>;Dl)2~h-1-$xjbd=r4iClw=Zt3_<;li-YB4j})f+5L1B{yM4E6g_G!Gi>u6CcyVE~ zH?(80D}6$(*Xz+`F8#RC-DIW-OUtA9{AU1dah?L>a|4I|!QzkHBkM3L6QkbD1XUVp zkKq6I0;sw`kMfBq-63-T9C&vJ}$Q{&go3wgCHq8?B3{$ zxZ4DH`jq?7kmBk3hWmiHvNf9Ua8a)ix_pyCsCRH2O(qR%aELI!h2Ymw91+rBoqTWH z$xmP!;I0rE;gEd@6OJLh?2JN5f*~$BP!3C4`>IXe4<%s{Yr*|DXkUCU5 zh+o4jbm&Ll-p&}b&C|&3Hf9*zB`K`Fp_kjG#xtnXSiQGs`%(HgTA=ei?ZE9o>WATh zsbktsRdKySCgZ8At9$!1px2jbm;CiBbGIATt#z$6>7-b?xB_@<-!T3h?%Nzb8@e#A ze4$>yqxtbQ|840Ht{ zL_Ydr5sBSJ24zfs3CdY&Gt{Hrw1n(-LMZ6P$IgxgiXVE~O}622+-KJ7g0EE7l>h5M z9%pTURckhP(0_0?KaYf&olX4bYrtAlTyiwRx}Kz=y7{mA2uRh;(rBu{uF9Bq|Brf@ z0OIHJE0lz&mvU_$hwI72pfEUwR+)gv!yeNfyutj2! z%ol#J_tjnIVZ|hHN^;8D0=a_k05IIH&2zSSGug2d2%cwmtBl{eR0WlEOmq)T4Xh4~ z_9g{q0X-b@6iV|Lg$NYHPiy;cDB2q6salui(D|Ul3H#X3mu(vMeCfKWI*_d}@(1;A z)^{h?S*qkx1G2Gbwd*_Db+dKRyj_$-C9k8wi?TytGbn-FK`B z5=%&g$|q2Qmt4{4+Rny~&7APBaJfCZuEuOu9vy?T+~4{(R`q3Ga%f^*@CCK7mlYe` zMuB`5+s)SG(i($Dm)%qkU*iN4s}j)}295+V9>@o#Q>ym1LF>x<23```8M0Q#LX0_@ z@4A0XeRPi?t_p`*T(@ovt>M@2B&XW3g}x%&&Aioqi|`eM?mO#)*bh4kt8*B8f@BHj z4hIK=9*Imi$>5H8N3aiKL#XoQ?=V;=$Uv(2Zi8O!Bj~G}fl4})i;nj{mxSDgJY49{ zu+k18U0hNaVc(6o)8aXRGx`=0cb*7+(Zy%RrGdu)F@VMRf)av@&d)=fdQiSIH;EsK)_@63z zjg9%r0L1`3+#o$yzBE~yZF>u->bubKY_(fm5^4+L?X1u{4QlJ0!kDb&N>83J;w2GA zK>AjO=IkS5u{iijb-d=Zo0U8FBE8cI<9Md^=R29wC>#o{_Se+@hX-L>dRp#iiPJME zRl}n3GC}duiHt~eUbZ(iO7)D{c~>|rMO~r&mwomqN(Z+P!|M}PyP}JU0V0)9wGq_f zkq|EMNa0+@_s&p0&J8{Fw&YxJYUJD)=bFb9F#T<{R3`=4UDD8)0@n;MNcwn;w92&=qGdm~(w$OG+!%lq8Qljd~j#!CB>>qt)gLZR{Ci1M$WiJQ}>Z+K>fA6oGU z>EfHLl@T_xHf>zkXNgzS(li?zWJg^}dVOtwE^ukRINn<+XO~>1dfAVqSs~qwxn4;~ zHZ={yihB)VBG&ZPE`YxoQP8lWFHO&VwOVT4mJN3{qm1H-Z80u=y!+KpbQwdF7mzQ`h!7IE@w=={Kk;;Y%i;*W#(0#fdh=83QI-1jnTpFN!9 z54$O72>IRPuh-qC?B1q8y9~=@pqUB}3qxAPy;Hg_S~NxF9TBLjMxfN5`|9Uq*rxO32%DIrI#O>J8@YQ_5#hb^vl913@3f;FL zT{CV3Wmr0?`E1(TUaSn0dkMOhS$^0OI$GAklzYT*zYS%j#GjRV4S#cn9g`nBVkRAu zT4i^?-g{@`O{pjh>ReghE74lc6!(<;+sT<-7NhU|C59d;42GT2Q`(JrkSxfndwV%_ zHNMfM%gezC{~%_J*BI*Cr|j`yQ*((pZGkQicl8WG-!UC&ul$^r`s5)%O~=6jX@tJY z`5Es_txd}>6MK8s^&E^NJ`%kwShL}EUc+-fO|_z>m~=(=ay%(r6*YbKuDMG>NnuYU zbsjGl>~sLpqO=;rfXeVjns*I^Av0OR7B!5c+bt?>|4CVT=YRp?TGi^CQz$%8ekH8U zh!M;Rw59=$upnGkV@Z4as%Lq|c)$_S;($YW@g!NrdaVUEBiv=BBd?UI^`){r-Md>v z%8f5gJQw!08)Wu{3jKuBR1xl_7vbK`M20B5#bs!+lqx_K1Q+&!PJn~1tt(1X&T<>w zT@m45K9MVGINzO|r<$;@$=a)gi%#a&hdKPt?rKFf_bnf!u2Uc;R|SEUiY&+3;Dmn}9)BWwI0{}}BwJB==d)f8?L`o=28XcyK{>BA+?x3|qoT=j z_bW_$bkw4&#Ho#%f{TP?MGF1ihO22BF1>(0KPy!PP0> zeM{JM6yiJi1(ix`;FI;86P?@qJ=adb`l{A^MKzky{5$z48knU2%{grEY&O&re4?1I zq}doePv}~V=N+D(wpyx=4JL}vQ_8C8XN{MqeJYU>7Q5cCmxoVB<{$hgmK7W6^4nTJ zWR%_G8{lNOiB(JtnyXtfEk5%WT)yjBbXr#-7hD~FvC}n`{OyJ3HB?T$fW^nxcI8~B zh|5efJ_;jdL^`w#ooLU$t1>2$Bv!2-j-A8fhyYt}ZKS7}s(bUf_dJrBan=X)GDjLg z+#qXP9=U5v*}YDKORSgjd~iPGzv2#NjPPzFK_ofEzZB!ded^>_SL-*PEIfR*pl+9o z;+R(2Zd#o_SSC6AAs=FCIQf<6jc~1h)9{Be#R{e$4G8Ow2x}`ZSGI`w3*tLHLu|0E zD==_xXRgcyCEJ!HbZWf}#=PFU!pToMI z)xdW1q1rm7R<3SS?ck->>)DrHr^hYmP-xQm37_{|S6NcRyQP<3I1U8Y1RT~+n03*D z+Mc!he?(#&m>Q2t7D85?-jF<4oz9ufV1a1U*CS@b(2d`fJv*?gZQ!xBJk2W+x7^OA z|J;R%M5sAk>SMUSI6;M8qBfMl`16FR?~oe>KXvGv26<7yO*IfsfCUY=n(2!jX zb|!oP3b)q>Z|xuLH_>50){u#GCNqmQC7ozJLc-UT#vqEAZA>zuUKjqnTz5>wV&G3^rIC>857P zVYOc2y}jH0f^>bsAo^>nDORd^_`yJoG}+^_|L4>}s8qKIS@sgt#@z#FaDMGYwKaI> z?$-4s`r$i4UFr{G7-ls*EMq}9*SGG1ZuaG~o?5~r*xo}quPonb>O@yOOWWPB7DpKx z<D^Fve{6Ad1Wk4KTvo$=!;GPiNB3N+OAR#0W+}%Am!Gc?W1b5fq?hG!00Kwe{ zcN^U0YtDI|d++=4W0)Te-Mx41s@k#D zZJDeqK2(1+NlWy+lf?9?VLGy@YUm$%MriafHQZNQ{6>J!yRTFl3G|)PEiac_i)IUq^|4ey4^HH>UNggU&D7dH=4$7udruEY79WkJ;D64kM%*;UCFQX4x|v;S2G-bLFUB= zJWkg-{RhPE&*V>UD1`hHuuY7boH@FI|6EV-*E^iFgZ`iKs?{Ll$%eV`$`555Rr z`bLwE0iuNi3e6K~?9D+g7TdAsL2tl5Fyge@oe+B^{-D5gIUrwg3QF>XARPxSX;G4t z4t{TTT$mry`~$*W3h_pjz-OYC*0Hym=;WB{rlc`gO(Pxk>^?JJt3C8+BmsHcaF=Jx zUa{23lP+TBK+Q_)u|N_xt_Q!qPC49usS72X=uLPZIQtcYj2^?y188KS7>5>=d1`~fHvR7-z zRjZ0!D1Y)k=V>Kg)o@S44^=RQC`aShU!VC%h8Djay~R9eT1f{Rkdf$Vb1AQ{!M z+6R+&r0Ew+F%|Vz;pN^At^rOAR?fvXUd^Eu94P2grtHrv>k8>f+dhTyj#aoTH2e{u zMsL9@U+1x!Uv4<3B5O)i^wKM`(Qd-kvtYErTB^2Ia{=%?Bzn!_9$p>(719%yBkk`Y zA{#_AbzvfI=L?1AElPByBWI9qI)}0}S$Qt=$=09fK7o4Bani%0f+K;qaRX3B`dGG$ z@C(j6j#sAd--tX_Wd51Rr;oyCRaIjjaNxw;yJ;)gJw3v~;i2_>W*x)tndZzBkP+)f z@Qr=P{1x8Edweq~$vnmUQ0(0bsB!({+nQC?k6^G=-o7)kXB)y=&pU(vT%SZ6FmPa? zt7kREaA6!*3u~ToC>3#&RZ8ES&0-KNqcf$GshSPcX|0h!f@AS8HudP~*W?^(3o7Iv z7{g?cPy^;ZV+})R=k;2q+dddky6b#p@0WQlR9yV_T901M-&Z@!Po56+#(a4rug=wB zbEP0D7aG4o2a2U;u@=kHyCe8|cKX-5EMuVSe$@6NL%|KAPe{&RwIc6yfy`j%=xi93 z7NsuXT^@3~QxAI+&`AMG*tf>TS# zkX1*RozCk_tdr&VTCN(x@&vSq%d}pFwbzC-giH(=n`lVskdq;V+2OM&3u?*_1diTD&kb@33SP(#F`i@|JKN*G6ga*P zo)mSYr$1cl=(vA}BfdqKjD&PcZ|J;$r{c`SDa7<@)A;z3_y11ctDG6zHpb`Zr8hVf ze3pyPa}}9iXquVimfxEk|AtS`%nM8$m)&w6d{#M4d<#HD{`uPzO{3E0g?+xh&;f)# z?>gR=P$n%K4W!sf?)8e}tKEG}wC{D{qM0NpzC%Lyon7jWrY(71qqqH!7)`8e@s))| zj3o>CHdn{i(AaG6vd6XeZdr<*))OulzlBU*#6cFuoB}|+#68nYw>%n3yBNOBJ?Qu4 zs5x~ls@|+Mx|=&0j{+3uX9kRxi?%p3!ByuTLZ0w77xEaKj5~LgMv0v0(-t=m4)&b8 zli||vzo<(OI_hpaLh&>tDo{g7JOS}SWg=~VQH@Qj+1d}@HlG;;UpLazreScb38S?= z6FP$NXkL{GmG2TNi&HpkhW;8O9;|a#EvXkN0bRuvOQ5oe%1HWnp`(HcrFi^tPZ~cX zT7l$%ugdXn;UrApS9arLO(Fw11b@2GGRtjlkVlS9tP}nnIZ+GNcoVe;&I}gZvt0po zx*8#elip$fHBjXBfE3`b%lVjgs;HHvQjo#chRhOTc0~_-r<4bO8M4u6(hKRheb8x;;KV3Scym4KEP!OCO#!wL?41fA*Mo#n5@vB1+MpFCwT96Dt33}S}Hn6)!Wo4 zWlPo&qp@?GI`7t~QJCJ9JK{9-z(a$pzFC5J|!|x8RR* zuM%L*UdOA~cymcOhvE~(O9dbIB2g~kCT3>JJ0DQBYTsvGdpvwE%y5QtZBJ*kveTg4 z^frjI4Bgo&K29Apdhj}Q?3$d)QOpl+onBHz#C{H0B%Q97*S^C`_s-5Met3uLdfDB~ zLIX~On#*k2+@Gmc8F!pnCdk2gzkMi-lp`6}@s6rHdZam%m6AyZu94{BX-gA0u(?t> z7>nC;M&Y#Pst3f>$Wd%(1bFwR$pvz(6|(Bxuqpm^%{h67d>1x0&XKw6L~NiWHo^&?-Y?6MAYYmlRmsU{9tJ6TRZ(-3JLW}xEoNsT=_rWcJ;a09283oa?IyPyau#@avcg?6Z z7#J8-W0Ugf9gOw1p81PkI>rW)-`>LC8PyfD@Mxijhqw!JQ3%ar1>*NC^#A^D`s!8B z^Kw?ui!e>7b;+}|U?~MZ{FC(C%6S{R{7u>6){t+Gx0OZSG0TesrT!ov?)RmhoFkq% z$48sW$w-^*3xK$F81e1 zdkj_2pD%tNG?5y*76(m0yqKsqy=vV{vYf}R+GHl(2mKn5rNgQG{(}~^^wPd8-!!U9 zd88wB!`Geo9ck{Zqw6b4L5;1aHPu6OjDK59#n=OaZ4azehG@E8vyG43YFyL_UC#XtdO*09mJOP8!Omjd^B>&Gi=CXf$Yn4IoWON&$;K2a!tg zq$jE{1&0kbq5%ia-zVsw{}juTPi&2KOc(%a<+SYLT1*}YmKaPkJBEm?2{-Ll*oE$sI$jDOI>MYi~u4HT|m32cY(BR#>mUAhJ#U>rb zp=sHCDr%xxQwl(X;5d0$nCz4)HtdV#Cx?b;YXERV3I5@>LpcNmh<9&BXhOr8oXQV z3>;Kne?gF4fo48>jfL`ANa-ZE;iPx=bZrSPXm%6pwsMRcK$XBUf!pe8D=pW z?Q7vd3}xS1jTaD1s7tXx`nsxh6Fz3&sRd=DIvDFf8J%JedN zIIlJo=w2}P&jD1>jZa?9A~gu-wPfCW9bDTKp+?@`X>I=f=8RjG`q_DjOPgNn#+pWm zw^n7l9rjt<{1k|Y+wH5}t&JK>+B=usYKA>u={RG&h$Pxa4RneeohM4alS%RXlVElIR<|Qtcvq@!Z(&31TE{p&Rcmt}oujCA z=~f4N6K%HsqKKgy-;gStez$w-RJL>RU>v5K=JYspgOvX(j3hy8!D>^B<0hWUoh$iB z{Ed$S?ULr$t#XNi9&ElhJjiBYo!41C|Lwi2_R%4W?!%gCk}kNoz({;-F?pi2>BEeM zLM^<3WSN#KFzmlr0Oi+(=>{L4S>~5-dUEA$o$cI<5u;6qGtQawxR3L$6*g%-k=LvQKMKs7=D0j?5J4o%G?8eb2#t^g z5D4Z&EyB=FfD18bS8h>#=BW}Mme#JWPt8hjgh179qOXsyDPmDX--GPq-!7lYaKtf! zUV(7po8e;uqUZmB#aMcj%z<--S^#+g?(xNKM?RV;Zo*YBVIaf0Lb&*(9Yr^n@r12_ z>p<|yk=XPvMFl7!Sd1{W@{~CuQk7O+Vpo$9kby|g7zdWe5E|$Zp-{xA<)qWcQ-o;b z?^sYGuA=|!+}u>LT6y2(mwmI7u|y$kM}P>bwx;Xv-@jvN?@OS(%kYj)S}Wbh(^ef)vq7z?Je{qAA6hz#M65;L3=P`+ zwxh{Iu$#dQuD1+tuz-`=zCLM4#o97fuBgx#X_x!KFBp%uqVSpt4u`x4iB8i|wZ>`Y0g!C4x{hn){AK=tCHv zcYo7nC;|Zj`Z8)e1Q~+)lH1K2OFOEDZcZxkEw}t=695-SZv%|hQ8p!lw z1L_lGvRt7G?@?W`h_B+g1qERMIpi2ti3mU*nVTYy_n9g{&zsNWcsNBUv0?dAB}7^y z&Oe63M{9bB$6~>1X8UIX%XeoupIwihB{`W6h?l>?*fhsNido}0!TB{4^@?}Q4QboY zCMfe`ll8&XTMpZ$`m=@Cv}TvWk3ywsLSEmtY#9hO8!ge;2rycuV92sos-ljLvaZ8d zs!|SP(HPqX2mfYm%AdPuYv)lO*<6jAyrGM6DXskE%qbQ3BR>@=&o!FsE-#DrLb|=L zf=c-DN*F>+_Itb?R<=}0M4~)ar|*+INPZq|`W?pw`pDjv!0z3JvwS!tV9vXjM`gn=F8zJtU81&HMu_q8MbLb?U`T4;%p!cKCgUGDv0XQn#)O}Mvt7m5u1Ip8xO+4b$g}G#LHM# zhzLf8Au;sD9q=ctJBHb}2>Q~}RHj=Bqw zhH?pmZ-8I%HlkS6aW6_WD)k4uJA*E5q&%aQH_m6~`9O5^&*88J`-fF0jD#|^;y7`f z2M()=7oZF5aKMeVmFuRa#Gr}Lu+IE~3EesRsw-P)FN!KW?BZk8j1$W# zF~SPJD1~uz~BvLubkqjui~bAnds{ zcsnK;^uv0;k0M*u&oBhnVa%Q7FGM}_)d&_H0PTLy!-lvFa+jpN-=C1t9!;5jz6>tR z6?Z_i)3W6!wPR}xo9hz$Z$DA#v-tyb&`ged1L+mEd6D}pHZBC9C~xF*@og2s0ctkB z00`FUgDpRTIXb2nlai>OU>*>(VoXff{%2()f*Kn4%|IBwk1D1K|1RrV0NdA{nJphP zxjR$v`@ApqfaD?dl>F%DB8(glthX!{*BF2+8fx@11bF;{SFgm;M!rYVbJ&p!IPFdV z?6OR-cJx$!r@k?nf~gv_K}dJrcl3?~dJKY8^!?iJ>dMKll6=P>_iqu(@DtfbXAe>5=G>I5 zxKrN!;2q0X<#cf>*^7E%sHT73nq63H#2D;s;)?d=C4f8$c(KmFL*Os%M*6}>AvT4y zpk>i(%NxRN@HY^3XN&NqQ?%FbNOW8h*r!?>Yij6@*i za87)7!%ZUr2jEb1c4W3dpgYDEr5sJ5GYe$cpZw(tlWX&i37))$x-HzHg1HX%$aa}M z!bf{?zz=l57>y_t{IzhhQeJU0S#`$pZf!(5gDgV0Ax2KYpMyg)kr9apj6oRcI#RAKQ6$UB%W2o$mq(vp)KEvK2KpucENj?S)+r`u#H z=AnBTOETJVfQBuoLHpOJIj}hHD@NfIi%@bOX?n1}k)@vE*WMp4e^ytIQ}EU(wVOOh ze8R%+*T3jlm|1Y{wsDs-+I|2?9kh5#uJkt%?vuE=FW}9$pa1snsmaoI30645pBs|- z;zz6qTyY!Z6n?f3u&|V)%y0`Mwfsdv)vpfHS5Z_@%`2#o05b?c`411lzS@)b^^CcN zq%{9{*&H`sN|O3BTyuGKQ43zLl{74CxOyZ0_ek_uuVfKDef|+jR78GA8dy%B*(Z1r zP(SSSl~|^W#5VVV`q1R|C$0rab=R4MN-OsP0$==txkk5Ha=wNerr@jf!v#aHtyl6E@HDwG3_&6Zg8(ar{599bm{M zYF7n?js8zthQt_l;?4ilmZ99rwA)8ZLb7N>(T=o;Ac@RiKc$t7GXq*u_ReUnDk31& zyjjzB1A&UJ20kgiNqSDY!R8T|{PiIioImD}>MII!dG>C3+j2!KB8QJfM}3)7q-zag0U70W;3e>Z?~V^G)}u6#&LxJ3W-Whh5Y0M{ zsCxaq`EdPfgYQa4Eug|V2_W3xD&?s+R+`LInMhevJS2_Yw($df3UV2|fY3SDCsn?B zv%L;Mt@ACi*|z)Mhl9BtlcizJi~ABn%QV4;$Nj&nJtWLJSD%wCDF7Tx2*){wu$n^l z=)m0)YUd#Nxr%@2(3SrzO89P`5s;c4VJ&*O6EW&b%X zDpJZiRN!N2DBGEQ8em18vlD;*_@N3uv;v>#a(`?HiGB^UrMP9R&1%EJ}V!hQ0(&^(oDrX2m)xpThRyN_DvAvP<1%1b?qJZR z&wCLnTyQ;tj&Ht*PNpEPfumM9M9_E1#|4n3r5sUK{&VC^2*LC9YDanr*~acYOMl-6 zLPe$lGx>h=!Qn(Af!Ko3;)NKw!hndlfwTbJ?qxd)FRGZ9TfRgss~~ru5#W(G?qL55 zx6gds?sRG=%fzw6j)bK7khK@Fdy0;hnE)HNK2=mCv%VXASZRU=+xB4WR`}MCMaReTv z##4#M&e)J|!PUdyFVp!yO*TbUwrWO*9zrh_=ou=po^JGS1XLv5zBea)!cqHVwc~=? zHJ7DYs_uQ-K*&U>n4nZ&yvC1ka6U*F7;J5h^Ic{2ucM4Hsoi#5Dgd5j+0f3kBiXh!!JU|x-jN0 zRuu9-rU`dmM#;n_!gPdSgO1&#{_<_f(A;*3xkw;xmb;s7*2OiGQ}Y-z>y?S(VM z8?3GWoE7ie@R;X$)5y;psk3wJnpG@zzyIFm|k@)(9KyQ@T2Fviw_P< zsDL$zyu?*Z>aoEU)inY-C(d^#i#1_)cOFta8O_eAsi{TU39sXUO5Trw0Wx=Yo~Lzq z!v4>8Sg+fRHC*eI;K3%8mx&`^!g}p+A{?q;`11)|A415lKM)tROo9ZMa5H-Z%8_262 zPm1T~)0E^iVnUK3E>9<^iq(#X3$D8pg@8~OAu1wJr~m}JgdDd=`fWdnh_vTtc#)8i z4Xho9Qwl%a17g7ueb=jFZs)t?@4m@nH4qeRtA+Z} zG0&&K05lKnNu}K{gU2&<$%pyRn~CGZQqSqAuM5dQ?FASZU(sO)sXF^GVsqsxzml9D zId-ww`=>vQoF-F>jf$rqOX;ZmTDWJb@{-vQ?6bB#n;AvRK!E>lP=#I8U5zk<;9_d@ za;+*h-iGh#Za=`~ZU|=-B9JbSVWz+`W=XAM%s-DIeaSWg{svhF2{6KlL{UedaK%9U zFIFxJA!H#SAb3(V1Mtr|)Bt@pIx1?t<4$x!!Zgru=kWEbYN@7){tlp5)&%5h2Z|IC zJYUy)0qw@<6BKMRdHT2(xAU9T5Eejikwv>+f7IsRJZb&>k4ScJaKvCRi&0eL%Qp|Y zfw@`|PY; zK|z*g?9#_E_(52t7o*~ouHU{bw0V02vM@ke1U$HAwdvrrp0}Xa-8VhGC1CDt=4!qG zMCf6pJlHrmPjYe+RLKG^hk!_~_x<@aj?fjbHUI=X=j+$2)x$y#_V#>s%k5*iQea2; zqGxv~F%~Iz>|bL*VN$co_@`tppvz5=&@Uq^Tc!m){2N0H2vwY?+^miPEX1~L_o_{f zBbo03O-shVLC8k{!3pg@5?D_(C&15`pv*sW_n!}yt~vL5G|peNNps+IVhOQ^;dplW zO~N1QX0!|p$j_(7!v0x9ZwZ0o1Zy>$56Vl>Hy<9>&anemtQIWRc68uyJTk%OR|^g6 zu!4E78<%klV~cGUXFUIhK`6`s1mOk6=-tRZ6P36|KtNc4!OC@8G^6h^eM~e!2XnQz zD?yZiQcvsSgF6dLIbil^hzQoc@YdTW9Swl;{b06w3s4H86m)x%0R@0}_G{f=FaRx% zfmFWJfg}zDUwwe6dCBYc;QDln`uSggKi<#ZAN-FStO7D6fb$k;y$C1cdoBW~0Dk?- z?gQ8z_{YzmqqapWxd6$0tYj|G{mGcbVetDqOY4;(@ZVlXZQos=#vgupvV(tv3V?+{ zCm=&nbMfV;rPb5XB^J^H6{w(x<&sK0U1Qi=EQG-MpNk_C{!C8dw~yz`f$whi$~|u{ zknHXsXPe;ld1aTrYZ(;?RfzBHUaH!^8fp~((_&X^Q(PD9ROh6nbxENz-?&?n?KEYr zGQq!F;*RrzFqp3Dy=n9kB^zf6QQ(-D_wa3;aJ0j);AQI8qGf-Boa|8g0dOmzhtVg) z>t^RO~Qds*FIoW z9X~2WgP8s^AFo}+00p=J3_@htR9-+ga(r zHk1}ql9Th4z3#5qaPjex-g(||ZnQKs@BwO-L5Hv=LV9J1Lgl;{2ms^7QzqWF` zgiA;mrxsLKRn@;Yit$QpD$!V;8*hS@s=O(l`f|n8`LYysvRbi?!+g27iTDSlxnp@X zd=q#0731ZvD6Jl@u@B#(NYh+#>>n~5ah@-|uZABM^`GnBBajefOncr!**uWde2`oJ z?#XVh{&*UasD>sNc=w+_z?78AXNdyM%_$savVD0YBl7zDGnFvMp)`SfSS%1_6=hPM zr=@$5$I(x+a5OeF04+(d-@gq3tv$eDNWX-`|gOCeBXzAz^S^ zPVECmwBzzwTDsZmp1;x9=6~P!zu#P`0WKV-v%tLyx~CUy^QDy@xCvKU!Q>n=SMub= zsEkDPWxa2~H0Bua<;!GKKR`8K)~v%eK+j9EV#f`8Q5V^gLCI#fa5>z|``8Z)lAr^( ze{P9AG)4_l8exYh8Wi z3xMDqP|i!_J(%)0Tz=*=i8%i0Odb5YzqSeFi=1c7w?f1x)#$a zkf&2E)7Ah4FKFe`&vqv*0m~oHe>@dW^DTunI&pAt%-4KTO3uavw1EL9p_DI+1bFnA z=_rE#>n5It;>Y9kz!-saplAhS`E8sE-U`Mxc1(4Bw!nJ7@wmm2PumLhL=YP;nnTru z+Y~^DL>%Xurk7=ajjh1oKmKrGKJ#1A91-a~c-Vm4BSosm^D*@2Y1;xF9a__}ukMN- zE;wEudAFTK9xHuw@qLP)i49IyyGlaPXm6bE#?6n1G|a9dNT`1mC={Z{iwHSw?BpYk zdRj!G73QPAdV6~Tb>y3?tFAwPV6C3Fy-{!4fsh@9Ln#Evt}0}IY5``K(BTIJ0Rd3T zm;x{Qdiv!`6_2fdOzd z^Cf(&&RQ!@692^uPTR$1K#XzADG{imp+>#=?5r|(MmbfLs&umEpW>za_2Ey`)g;G% z4d|bdgh#%%?!cl$zdmi$o2?+#{J=949oI7TUXBQ(S}|eRkInA+E@TA93)!0vCCz|? zEZ`ITE3XAFO6^$Ou->_^QWy+Y4YdU%S9Ae> z;sj#!;}MNR2M-K2PJUjt!n?T--G>$48(Hh=L)^zi=4Y`Xn-9>^-# zx~&Hu>bhZ^l_F`ob%bR?eY_faJF}VFYewM8V8%N7|G3RnJz8dFdZ}}nckYhIU-mZ+ z4fD~w?csOF!rZC#l+i68v@~Wn&bI?_A5%EhHDfKOihNxPe{=Uu%RwF%Yq!daD~q(v zXQ?Lh$K;U_s%AbpNqlE0#ArjpvYPfR!SQ}iMfHLA>b(LiPfrA5v_e27!bvu6&6T(D zLc9Z!p9FSbJ|ZFqGWA2|Jd!W(Y$r%WPS8hshY%6^JY1_2yh82RYPU{vS;R!4b{*Y01GzwM%k`B~ul(<(!Zkt#2wqe~+T-3j~XemnY zCNxe~oODrku@0e@(y(PB#1HSf-oGqiZ+f0C`mU?fNZov4cDgE{>4+a?XHBpJ zV~AuRyaio^4c4QR>Dy?BZupW|cZbObhFA8UjH(kuJ*u(rStCb*XRTDrdFk39qiSF| zf2M%8A06zv1zYedS=iVS$zkr|9JaDucfBr{+wQZAYpRr>E+mTcVR6__40+XqbTtdA zyI~nw0G+B@IkogzX&SIA@yL2v5^$awIGhRSdEAWH3f>|gc39~hbfJRCcqxG-tk{ce@LU;R95E^e}_;N2U#dB~TXAF)>f zXi^o%y-0R>*7z>JW316*>jGlxAz*Ty^Yaw>Vqc-KLWa4HQmg*J3?rP`K3ae5zgPg$ z*`ja19zV97&qLvYm#md`O&a}ER=a9&f9-ej-^JU?TenBuw<{$3n)#IeZf*#ta{~0d zW5hNp45jS)b?6k(?-?LlA8?=Pp zvkS-G4tcLEe=|fRvgb7we;2(=XsNRmvODIqWvI>K#~ySKBay$*yKA{JHgSc+oget! zMO5aO+qDwlAGcWGO>TqHg^K|L4vX~~MRKqX%tN!vuv0t2KG82n)A-yFEGvXeL@NYB zUWi-=d2%~rVElR054V$Q$QlCJU$pm_yyPz z)x-qLWg&6~h_rl4?xe5y9#tW!mUwVXVnp!HKT*1l;QL=zlAGTYyr9lyOP#M5!^@~T znIfvcMo~#R;BFy1bE2H7h#6#*({`LfzG#uC?j4vOB2mmCHw?UpApY|IhmdAXD zG+=0PhgfHa{yU4{$`8TA$vrKA55yTjUr6e)_YJG7??@2U0|)zvuo=Ou z05P2}JWyEMe|H4id8h6I5l_bguH}w&7Vwf6grCLFIHu!d8%EU)yWJ5t<{f%tfG6#D zfQ!YDiC{HWt1n%S*}u=sLB?<}iJ#n9i1LYY;%Jt~PA9jwd(viLmhivvugg7W&?_B| zSLt^3BPNT6&lZaMpUW*H8fQ9U`}3}2EkYY=It+sI zUCyHU%mi=gjA}ZcCgcB!`>CLkceA`@eQAjAz2!jC3PzCueMJvXuuy29Im0 zIAW!CHlt11J8(!V%dICIdoRfLWY{%D1OGs467z+x+LOoSwZl) zWx8WvqeZ!*l&{bRh#<6k%U`^PQzbSuc*;*4`E{c?>rU>~qtJ1hKZJ3FS_C-2Y*n%C zXM_&M`P8x(&+Nw8p}`PBnN*%PUzUF;Me!w{%cOxyW%@j#&rM~02{C=k9jWn`(X7Dw z%r64tM&zKAILV@N>g5a#h5dPH2nKGmF@aiBm(O2!{Mi24)bv$%O9B2ykaz{M zoX+bn1^~lf-Vw&Se^lW#?%>~VOS`n7WV5RC*8~3C^0;}wU5^4{bD2#cBmCK7#*on~ zvS@?nAR`0Z7lCy3nDKl^GU}|aqFCa#rtexkX2@`+TSmZg32@% z>M<9)k!FutJu~?bMaaJuGVCYERkuNGMA)u4U|H5#jH%N=1d-(Zr8Y)Z)EO6g==W=c zkSt4R!dGl2s?m>Hd3>*KN}?C1ZI__66z6sYpc0r>vP1Xj+X*-S)*NVLaz$~6h~5D< zFH}>1t%FkaEbLw$&uy1lWZH){(L$`e#?KKWH6c)H*~F2GZS~0?2d>?rFb1t0k=~&T zE{E&t*0Qy*Ggd!&$l(k&n=tPWec1Kg%M0W0* zVZ~#gBV%2a7!9t<25+~|2PNFqJEm}Vs+(*o6fboN4Kff`{cy80KK7&JMx-GvDp%#@ z(Xn$=&*ceulhitMCIdZI<3fMg@oUIOWo%P}@9^9tm=bZg644euE?m_22G%B3B{A^qV zY&{}VO_wl@3`LA{J3J**wfWZ8J8+y`l8A7i+t_|d+L7bG(8wc{#jcx4X@LyvC=GuEHc11 zHlfCy#ls7H?5>{^?6;}~qMohsnRldSVQXpOXt?7@R28p|KhDci#xWV^{`_X=0gu4r zS>&wwl%~0|Klw!+O0cN=CzVlIfrTQKuhn964&%MX?uRPZ>j-48@$sgwYyvX-r}I5i zvfB1nvPHtV_%@S0B`L~}H~ZXr>55zCeV;Upt}0rE;z9^(sdxYQhnR5>X&$RBKB71H zx@K8n*w(Mx!Ohl$GrYv1i_Y~r!<0}j52#Cu{{eRSK{EGJKj%vCe*FF8#rgc)eTEGB zDvRuSvK`fAiIksCTKI<^$1>gDQ?Fm`-wdgoZl$94DHyh?v_Vo}@t)Cc-89`r2NMTN z)6l#8uCwElSDvjny*BF61H~8Gt!1N28CVb3eh1 ze_2T))~Q%frk_^L-QR9>6r{;Ypj<(O{QckY~E9%4q*d%ml z=jZ6-<&>#m=F)-kEdy)m%klg>ccnu+i&q~i7HbWpseq?3t!({I%6 zmgG=HWjp2Y{PS+MnR5d11;;*uOClP99UH@SSKx4K)U-;O-obyP-}i4l`$gOgmY2iu zXwB<&uM3XMmk%3r>HdU^Z)fV`-L~3SsxN~4k^3!#()J&>xja_uGrC>w4Vwz(5;pl# z)je&a2L)5O-d>xlEKyhs?Q>WgrC#QY9)z@@?c|?+n*2G#_PZ$r?xH3Ymdaib-q3MF z642uR=U(T&|C*EWV7$VyzYMR;;srfkgygwp11`}GJwJ`4fmWRcbprIx!^-d{j*g8i zC5INvEiq`k9VYC_aq(-*rBkzU)3_IjAKQ|k&Z;AR?aXemp@)*N4~?E2p`T5aX($BUd2nt$N3W-Y@1O+bt*WK&5hTdFmKB@~(+$H5wIB9g zC~S^h%doSkl&N>!3yhvOTMbk468M{p`f3r#CbKzAR&gV#^v#S5@3(4{IUlr>LI*?C zBu+b9HAw!~F`#3`Q`T8nxf-u*iqjwTAWRBu2(s4J~X9&_Qy?@ za`xrUU*Wd;IOS4@+IsS(+o><(mGgHge;UImHDR3NXee>NONYKQ#mv2^hZ@YN!FC-V zOD^xop`Fohs^+jhepMd0XA;IP8L#`Wr!fDa7%OFB>NnspR*(%GO1+U^=##jNubwLhG*!z2MZr7ToRut{bL<~2c9E58<{Qc}&zr~MwV z(lI61G`)DT!ScRuLFF&eWPP1=Sxp-2XzKl*_vOs<(5?--Lcv@)+W5OQ{T^mXknbQ? zP>DXP2U6Oy{2@6{wfT(y1N+O-LSc&??=bO{ri>HY6J%6B<9c+$tmwm3lFs(lId8vk zv#M-1Ghq(@mxxpEiLYc>9&R2hFTO{#Qxcd?{FYTm4p||kLoymusZSWyy}-qSo=NJ0 zh?$_NlgA|CaR%bctg(LCM~Dx820OdP0wjc)19yw(3u6V;ds#{Co_*A$or{AN=UhWhU0D&2+n48a;2M>krw!2Q-+h z?&VO)<5H4k6eydren}}|T(Y}rj1wq-Tya@RNV;z+c}z-Ei@hYT+&`0T+e_iuCz{Ip z#rM4lN4MTq#vz4APRg)kE_|UiTWdub<0z0vx60|SnhlAMhjl{9f`cdA;87=q2kXS^ z^ww=9-M>}C+4w$v&pqM>zEEeY4JFwbuNsVDwoc;wHl8$JzV_<_%R}RXRjAhN$!4U! zLcV**uj#2!ULxFNT*v*OIIm2tL4QH-@3TJeZcn)r@iOY^t0SV9D@&v&-7w7&%4==xuY2h*=2C_V2m8}6Ks9nu+oG%4=R+fx$@wlkSbE>(QZj-2Z1nq56K8%p$RoqE(3FY3}M zF2F%h-EgMOO$%(?8~VRPeBK_$(E$Mgi)7zaQ5gPP;Aj&Cwwy&lX^(N8XzB!(LUp!} zY-y77^UB+>{6`+0-8f}j@4Y!Et%%@oIE{+3UUQgBgsXs>O1)6_77G>Q} zkv|BquB)t{rtn}h&pK>y0af>g>#o8!orN8jd#FnP&E##nNRSMd@ASn%!|7{HeDZIX z5ljhsgpO}>E=^VfW7?P>db+5+dm@#RSJVH*$X;9Xz91qdCv<*XJdX2HGpv2K*Q)Q` zr#w7R-jpyg$YR3Fg-Az#9j^Y>I40w9NHCw^yb!x!;9U`pcAMkYdD<;AV+wy!V+;w9 zk1e>DX*b^A0CuRwU9zM4S|iu))_gOm)In%mj)a?)h8N+4=qbJhqTax%}9fj?UE2uE+7D)X#MlE~<;X9vy%3weDkQKJmNk z>OF4!Jk8>k9h!^A9Im$xNm80lTU^NWu5ILHNuT`xhLWw>qC~ld}YFt19SNikiTnV&^MvE!mAZ`@B zV-~MetNn`o9mMfx;CnaxPohLuqWzBVft}BzHKT~TQj=)T;#e=V3TDWU{OfospOnB2 ze*Va|2!<`=-yw|aqc5=C^s-_$_-yv7lH^{Eni8V|Yps z4Wf@aFl2t9wH#+}uS!IC2My;5!pTqW(4Rp$mPgsl(-P&KA06@f`Rfg6@+GdSNv;uc z>*o2C1WD^92Wb@BbX>$?Mxlkq&g5tH)WdVj-J2wsNw+!z#;a!*?K0-5oniB%?Xx^5 zjeY0EI{&25Pap7TQM~ij=~ldJ6vtTk0Wr`}R27oj@o zHYlvHe2R2WpFD$@)^PR9aU`)uW_D3#PG_&4UE$=c7>I&G)3Jx3(#OBh1!{*2zoF(**}eaFxA!ZqJ4An@MFgG7k)T;ULuU*^tz{%fUMuq zm%27dt9C_;P-a(ej6J&)eM|C#sx*7UHe3;^nZ(_iUFGmqkFZ2>bmitc#NLPf;w-u{ zrc5P6pjK~fm7HJ@UNR@-T(KmDC*t}zcHV`azM&Yr2MtC|zfrAk0h z#~h#?(lDomP#`g2u-5%k$?colpGUrdCZ!m3Cgg!u#fv5bLCM_0|F5XCj%%`Q8!#Xt zt)wC_MQJJNoPvOKcS}l*?nVhIk&X#cq-D0aNjs=vXwdK)52+Qi+dkNX3Fm$&1@nYTW*(iiBS%5$Qq=F}LBk65wCQ zG9!!ls6#6M1#P9hwWiB&Xl-J#>y%DpEiYLob0lVODtGSi#wF*Sd{*tN|E{r>Qk4bDT}MI_+X>EuPO*1n2fxV|HR9 z^uRMQ-dM+S^i0Opu=K|yvMUCRw7zZGj<d|9O_|q%P&sp0>IDh0cR9s%e}4bkxqjDix8(?9IU_wpn*diC6e5l;P*%r`aoW-C z!tpXmpLevfHODwpX1owRS)mj=@Tm{3=`yJSlBLBwhNBZh`{6Vv<9Vwfby*?{>j1fB zI13`bk~>n0&Z@9TqFon~KH1sezA*S+;%<_DhmvBt~l~v z+>uo)zQO2L-()hQM3Ar-Bn7((R`*{TFxx-O;2aoZVs_{o(CAs=jTq}wy!@n1HY48S z66<5u-5SzoBcQ1p0&;jASgTAWg)u4%QB%yRprDm-eS*pEz!%ZtMH$N@@^@9FVM6w~ z3#3d+@e~df<$MLVvHoXGWX`M!b6GO@_0dIyTNv4^ua(TTNqrmInnm~D0a+XcQL)O& zD86OHnR`#_bilaei5|rue}5yo{#IB&c^1-{nMl*RqtA{iV@|io{E)i#Y-gs_6kgxr z%$VUtR^&zV3Mb0siz>I2*FFU4I)h=w%E40|@~*s_MJ#b&w>GB(t*6)qPi_zg8~P@Mi&FYO8- z?_)EZdnYe5xvzB(Kz;li8M9+k7WS^a#efzkf0a;bFy%t>hT7^2!_5h|WK9{iM(yeT z8%wE|dz2N9I{wz?5KB@@l7jg$A&<*Qhy_lqZsR2EWVtT5R@9@ax3)oOUE}lk+#=mn zt5o+ohz+Mk#ZK4z^=alEssBmcRE&-9?1^Z><5UK$w1&qzwQlKcG5YM&w*z1KYUU@a zgt?usF4m=nS`ds{w0s5mH|>Xgc2x1i&A?7W-1=)m&HoM=-TtAUzTcm>zjq(8##?>B zN07?rd6fcvVN$pypDD842e!vx-;kvjX{5L?d?QI-bTyjv;!hDKuyS;EH)7Y!S4=r8 zeOP8<%93VuHgG*yX&V=>fxM}Wf;Vq#>-RR8%WUS$k}(|L)tsdQ3r2V&Pt=TvSI?jM zGE?yk+8-j<1ejemkpBT)emW4mN`Ip?{`vpKfYf2E(Cj zqy={w3$9%$`S`wiQ!b+e!J0o;CAxCsG;n3moBT6cmrJ$U_{`=1SXqAvHfPR^HZL7Y zZ#gk@kPl*6^=q%%{?O-^$rrgG?7lOl6>c%ce_3*KtCCM~ZaX={WcNL{Q~V=Ycb_OA z$qW@Ib#>PnsgS#m9LkIPwE1NbJRuYZJ8G`~wAG^^%wd0ZhhH~|uwY2}Mgt!9TC_50 zV@TN7Z$|Q~LVn!J9XB-;O; z^NZ+(rVSM0H>UG5%7cw>vu4KP@*g}%7|J9*y2{0-HeF`i*`->7FLb;SyvL1Fg`53g z3XU0yM1M|tW3zBiU+T$z)&$JQ#igvn=EYDs3~aed;m7} z#F4`?#(X%Hlj)qMiS~Q@OT@0^3j61-ZI@L04w~c7d|4q&XM6@BsB#9^TR*Q~)ge!< zbc`rDCpH|ODS7)Bj%$5-WgL`7w5ZqU6wuBeyd?wMp^en$Pc~Qbpf#tagf`cdz#`$MZRf0*T+o3Z2Bt zuiAPniDrdFH#WC3z`y;u=elPz2gN*m~SByAbjqEsH zu}9qjy`&aVrDuvxS!`?(Q>B`9I_Wn<*8F#86Nh36bna})D0r>h$hg)N8M}_i_2zBi z&Ko0g3fi7De7@UHx2cw|3Jbi2nM-Wh!rEMKUQa#KP_+X30~=IVU)YPG`RCE(EzJGK zV99fN#3bR~Gd`zdy!20ZIBRtAlBXjA5}`{`Tdr)uxmfsu!;U&FZV>8b;A&d9be`@9 zqdY&6owZ&SbsnQ6!eNXSO{gr7>5z1M^OHE>)7fo6E}|_NaJa5<=n6p5%ufeG=PiI^ zE8Qk4r|L|(4J24jyWSbaUmDeJmccGj!|Nc0 zTfC&K2q(&@Ip8FRExrO4^E{6s; ze=1s*VrF*vPMz!G=I=bx+_vsJi$46jzg8*x4yDruX456IH+Zgt@yzbRW$4oVJNM$tKaH}XzYw}2DEr-l{k#gZdJs)_PJ-D%jP$zyMd2M%O65|rg%m^||JZqA-@Ju}GY{~MAL$30)yJ|Y3(WTo4O0EYS$eC$`9u`J~SN(_Y|N6*L>4Cs@vbmPZ-mbVsv}cXJ;U;=PRRzjJOzye}hK zoxg)UkKJi3uGwiHd42rYC5Y}3UQLIPL8+YuK@7~HxcW^KPHLxiJ-X7{2Q}>&cY`ff zuR|4;iR`=XT=cm2rkW!1Rvtf%M$rAl#WAKpBQ?Pq{QTfi`kG+DB{A`+5c3U-XhSv_Yy6tMILCnLm3!>b zq+i@_!hrj^xc>0GQZ}DE(rWi4|xoNF0D9-NqO+97qfLoFBePBm*yh+pl++; zF#MyodgOR(KyV7D+Vek?fBp#3y_v+Ed5*0fwg@+%4UhT0M6b?bI z9Cm+Xs^0%pN$FsKdtYE2k&v&5+n-Oq;yV7dw64=bj2bW6=&V9Qr;0Sn^9~jcTF~U# zZ0&e*>kekbiYDfeMS!s^I~-56?JfzS=YP=Xw-#oS7izPe2N=F~JDd`LQFKb3~~_b>S*gxigI>M3#GuBJIG z7^OqLU>3Y4w3jMkE`ucwG5Q!FF5DIPmQ_I|Nx^#S=b5m+$1fV< zjqFSzamOMmUs~=POYWM4UGJ(tzKk$kBkIAZz{S1Zx8oIIv)g~FWtEX8{KNn{5pSP_ zv~7kCWDI^JupBO5ioM%9r|2Qr@pm1V^&7L2Swexj8;r?9Y$M-VOqc^VU` zxkTh`0+H(B)w80vO5535hM<&p5@>kh+*a7;mE)(>+Qb*5G+yF0b&{rnKiXfRwSGa3 z;>y1De-bYT1Szi{xLnH*&P^CG^PpAO@uNg|9e!42tVa(*v!1vF_8$uuW3c*&FBNvvY1ex7BCuhoCsP&=9OvY0x-^neP`|=4J`BM9ylIiW_EG6l*tr_vMH1J z4LO!oPn$U3-CT~ll|7}4vyf@z*Geb3p z@b|F^Tv_Cs06QQD8^K!Z_7_#js-~;!)Ro5rG0%v%VJlESnpUa>^BZDlS!=(Gr|Cqy zUd&Lx)~W2J$GbR>dzlXvcu!GGd6KL72Zbb1{$Co6C(`|KO{GO-U)O(z6Hg-vTDAVX=g36WQgEfJD` zxKF(LvT)lBVptHLnu3swH6^31o?qOCpvc3orSm0iO?2z1eM5Xj$-LQ~-yQ#-hVn)O ziBv%ONV+8Hq!L9v6UcJBl}gz4S+Hmy&*U$*?&$hl)Y7O?-oGb?A@I4F`baI?21(ph zdtWYxY>bNu5nC87K~B=hWU1Cqfw{H#t~&?*W=&Ufbkt2lqr$u^&@8T+z*s1Zo++)L- z7mmAp_($OVSI8$6GW?~O+KJG^Wd)91aTZE?C=T{zfEZJBFslluRy9zlnLt>DIT0?ob_JMK}7k zdF$~gw!gE&S=!yT4K8$`P2Bn?Me-FBhQ&9K*IP*k8Mp-+84h=Ou%_1rNA}a1;HUMS zmYld`qZ45Q&#E90CJfnf4EHNlNL*l5CTaRgjeYpi*=``lrg^Kku=4hsh6z1<@^-wz zc6vml(!n*M9B_X92~}8vX2|6-(NC;lWAw!3M6W(-DG513+;YyilBXBDC0fOSAE*{# z$6*BBo~+suW0PD*Sa?=RJ&4rg!9!q0rb`G4BrV1^CQ;|KkF{8IyQm@1*Ai@-<3=6@ z{WnkK;fuuzGxKYyIL)(%ncNl~t;cEnN#S`^rb)$_cd-mYM)?`=XO#R-9P_^)w9&|0 zi|zKsKoiUPv{HlwW>bPLV6QH1Tl6-eW0|~eRaz;V&aApMU-_QuzC*Q0gyyvLML}%+ ztnZ&nlz{OcvMrQL=67ypaj95*dQ4dZWGo_R8Jc<_-rrd2G?G2Mx&jNyk@5(3dx=1y z2Vh94K=Yw`nvBJHp!m7hG@HTXoc^!Z>P?7pv5i}ITFP($Y0{G{%MjEbm_z8Cnsi;dHNvh`mi)@1`4Y+o_G@T z&UAi0%@Dy5d|kpdR+igQDGz%`ZqL5paWGW%G$C9XA=5;0R-B>AXn|Y}j?O&4s2#Zg z*pXoKmW!cyY5pai3P7sJU03N*;d|agXV9coH(1`v6-@r$3CqcnIjytFR+v{No4|47 z`_y3xifI7XIUps8zBUtUo>2{*C`;F$G4QeaG#C5flbO1o2sVSUUY#rGhql1q4%qMfA(7fLX&i?_H=GoT^>&2?V)pO{ua z_N@8dC+(pe_hMv)=2Mbc6MCAx{pOlSgCo$bv9kDFp<_jXsV!{mHNpfmn!FB(4B=gZ z!Vt3VrMb3qm4sN+#wq6-t&};h_;V&}Q0?Ajn|um!CcN18F}HKR`PiH}xE%dEHB_BN zZIW94GzsBUBmXr*UVSEEr_FD(z4r#ejlKD$a_I2SAsqJAU&PUZyX^b*X%o2(r_Wfx z4Ob|ik>?)XDE4e~qsxnO7lhJ$eM&Z;GyD9`1dAi(2+q2=X5*J@aFU}v*1ODK`SfL^ z)e(3z2KqIpmrK(n*SABiw3L~~CXn1QGsicaVjt(1!}}I|+h{b$3FZ;3buGpG9^<3C zJ?Vpk2We?`H3N^HG}65FwY`Kp-G)pOi22g}eotD79Gv)SL;xkJzyB?)qk@deAp|>} zw_SCQ4fXH_CqW}_yKMMrc57cJ2Vb*P5%f=Y@E?<-&JIHo=Nr4&;4D3Ku=6huIB}az zK>ERNj8p%7pIJ3vB2-%D)bF3LuYW6&c9j_>CCo&uwXADM%vdLNR(4KH+4&co>u&oT z5g`X4T~=0BhVJkGP7dz5{`8-gFtG1F5wz?Kx;ocK6NMSkyGvNAJpIcPs=G=X! z{@wG}8~fTG=mC5WS>S z?N0#yV<9H;u@}Vbw#j^-bh3nmm5QioOHlS{J>B8ZL>K}wf8@S#2endb$}wCdoUdj> zew?U=Y%R(Rk3A?y#b_!Sskn|MLHl$F^vZ8j{C<3oBO%LZb!IplYV&R%j5yi39O7iS zq5JA5l$(FEU2lqeG2AH0O{X+`=tFWkD{KSaTH8#7qqBlRsuz=I!bdhT8y~h5GbdS9 zM^Q?pa(2^=Jgi-J z(ypBw4)E-7BfbBi6BY9lo>`-x6uc=SOW{~7IDKR&+mXv#5(+F3_YIpACVoJnwb;8u zc|Yjk3%3ee-|T~h{3;sdO!V+Ie7Yy(C~NhFWq%%2ROGTT`iZW?2o)(RlsITp_nKCn zsEwT+Z!h`~A;VWUf4*g6xU%&Kc}^d}w+2=fYYR>)Mj)9w!d4AN$4vsfZVYacDL#uP z4l+2b-(O-pD7DQHg6L$e4q?XoMm^@f@e)dr5*4oV8?HAGw3f6`lOVcGicGZ@EIvxh z_f02re8CZ;@QfRBKZPY>ad}^`Rs6tcmNl;H(CEs1raIK!R7Cge$rOA-=9dS}>Hc`Z zEL8Cqvu3%6C$Sp89$`h}!zVwa`!9@v=k)%APrgHzM~AuH{mgUaiJJU}j#6=1Io2I0 zIz@KI`~Sqn8~F*k_u1-^bfSohpMeoR;?bM&XmK}H{GhZF-?Ha-gL!yMX#+buiZ9!0 zB*i;ZF|E|jZ>w0(D4sjT-u(&Vwg$=|O<@VvUJ^R-G z*tJHBK^jx6rGowm*0Q#ccX;C$2l#;BEGm8So(F_yMw;}G-34C#szX`rv7M8${EGk<*!|fBP zOJJoZm-E5aXdA0LM}D+`w)<0K@q7bBcoHp1~3DS)XH|5^kQ^rr$ zW1xR^KlN|<+vAaDc6JAchZ!xtFX`wae{JsXF94t>0F43A8NblYP}#bGpddiSvMAK_ zC$P@X&Z572_s()S?d0S{&~0B^US2-YYh!KAdHr`Rtjx#;tgEff^J&}+fS(1!RXc*P z0OSY&IKRh9q2;U27HL3fgx${6E~GuB%$gVzq!A zvM~39jR9pnJw{V&Yce-OQ@1D{0|B?q3*#CtZ!@*}s-y9m^I@068phsNKrU_4Uat+q zRq^pidM3}1lcjE{$O3ACQX;9XiILEgX|~Y^UMos9RjN8ZwfIJGwI^7PlIX4$SvKEL zS^6DYZi|AnkNCpTYroDCc8GvMu;@44nN@0Z*P;*h$UTndXJ%7}&5z!$@LJlZZm*ej z&+&hS6>44pTOtPRak(Rnuu?2OH5?9^^Q+?;Uj!~|-_HVmLkG#}7yrwV#d361X&8^j z9zYprl^yCWk5s@D^*LE^=KA~D%cO}PEt=ehH->u0=p(^`dL)MJ?mIVLePTPV%6_>r zD6r|c4Qz9lv2a*0t?mB%h>_p*wGyRMzD`vd|BE}Lu#gNi7YyRCFAAP;`jf>o46N=h z*C*+)^gj&+qZBzNEoR_0N>Y=LhL?Z^2FNr3v?O6+VN9xdzX2wcH--P{lPD@)7M;qx zwzm6|wZ0m9fIuar;t}w@aZy(OCZ1hhQSqmQ=oJG)0riIekMMArjs65~IY+)`4dCOh zV5(L*r5s1RB5ycR2Eej|?{QM8C}_^YA*OHsP*nlzZj4yYAupHJsY5U%t&Pw^8X?+YFDrhgQ*xquLV6?Z*38<63VjT5dKAK+|a> zCD+biIpN{qx+T~^pK-rDglALo&n=IbC@(5dr17!E+=ntc3tdeR3vnyJW@jG#bA zNALMcUR7BsyW{^AV2{b!bSJyJUokOl12idBQ~>}EXJlj)6chl#pf3ibq^Jl$vqc6? zwfXtxE-ux{Iz?^M>FMbJ)>@>@1VoBEfT#wp1cC^!-Q3x1m1Uhhyhy*H5(g#D6Jx2X ziw%({w^u0dNGrHJ-!sghq;&M-&5rs9qIac&jh@T|pTz_-`tO)pUifq^q{y zdwvgJI3;`To3}9^pcZ}lYyN2||DA)!;;XZ00FZx?R+%4AkwCE)#sdg;t^fVq=H}+y zzGeH^*vU#40mVLk1yktp`1#H#H$$;Hrw7`V7fGE zgTQX*>b~E;{x?(MA^Ke2fu1EDqVd^Lw8VBlM6V=%=lKh#h*xj_-FE+t#gbqRD>VS% zb_JbxI)HxI3otDtu^Y&K_~3DUnqR97^iasV)JN2mlz@kHXmBtsJsq(z0Q;%(vVCDe zL_}m^cJ>-D6#!N8R#wP0fkk6=Z4E$e$Ez;@a{u#Z%;&&chKGq9+}v6n*RUUvl9EzT z6d28pjd2MG2yk(!%o8dpDYf_XYyc9Cx8maBK!AlpZ5$lRBO)Ri8a%bNe~4!T7qe@Z zFC(@_YN9Z)v8|D1s=v;X9NZioJyBN`ZodK47bsu+hYuYb9hrf!EtOkQR~Hi*i3#B0 zrABQ;-MzgbAtAf&9M7L;Nkvf+5fK65N9~vTCn;UP004~ZMd~d1s-H|u9zR1Sf+`W0 zmyqe=*>qudT2Zg~!lcOmE!h7pS+h8w1As*Y;uhW)I^RtfASM96w4}r?<#c_0orfOp zOJ{qhy`vI8wba7#^P&yPmx*?06Ldh45j$r z-(H53G6HY2&LYBz^U;s^i&id}| z4v1zywaOOtoINGP#TOP9(11Yz0}F2(f=5U=#eP6Q#;p2Y?+TDt0GrpDFD2@j}BJUk~yN5E1dQ?=^qHIfN-cXx*ssQDjv-~hcI(5?V+7`Or`)~PAp zd~YCvV*yf;>M^-&b(XleIKU`@+@IwJL<@G=eA%=8{Va5J;w74TIyxJ`XPHwTOkZ7j z*x1Q?gKNy>LxusJ>b@#Flm=()qaWR|8F7v8!5#fo*bDaBK{+I`CFEN z76m7cQ3(iaeiH=>Pz`9UaYs;$*v(I1G{UE37jZw-m#qQJMnJgNglY}}cOfWvJ(er2 zRjMzuqzim0;HzkEX+f{i^}RXUIX*sS)hgw&nI`Z!S(TJc1*{@$0KN|NJ^O`vlPquG zs$T%jf3n(>o16Rc<;!z^Noi>yY&HQuUPeY{Btvw90%(m5fK3F5h0|6%K)pJwNbkH~nUJ4|XHRqDMCW0lkN$h0=T<3w<6^eHi$mxxjmsmhwl z&)3$OOR9V}*+$--c-W?FB5@;nI$aG+Bd2dPp_Q*;do`*>MMVP9Ndx9fml71`oK0~1 zEkr~7saFfp^{5J&V6NRuqVkq!t~6rDJiKjrCgT#{wj57)82!-_gwxFFW~~(u~PMc yJs Date: Thu, 16 Jan 2025 12:04:49 -0900 Subject: [PATCH 528/589] adding inference state back to inference engine --- .../torch/sharded_inference_engine.py | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index ffad807ce..dba037aba 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -8,6 +8,7 @@ from concurrent.futures import ThreadPoolExecutor import asyncio import uuid +from typing import Optional import numpy as np import torch @@ -102,21 +103,19 @@ def sample_wrapper(): return await asyncio.get_running_loop().run_in_executor(self.executor, functools.partial(sample_wrapper)) - async def infer_tensor( - self, - request_id: str, - shard: Shard, - input_data: np.ndarray, - ) -> np.ndarray: + async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarray, inference_state: Optional[dict] = None) -> tuple[np.ndarray, Optional[dict]]: # ensure shard if DEBUG >= 4: print("infer_tensor called") print(f"shard: {shard}") print(f"input_data: {input_data}") - print(f"self.past_tokens: {self.past_tokens}") + print(f"inference_state: {inference_state}") await self.ensure_shard(shard) + if inference_state.get("past_tokens") is not None: + self.past_tokens = torch.tensor(inference_state["past_tokens"]) + self.request_id = request_id if not self.request_id else self.request_id hidden_state = None @@ -144,16 +143,20 @@ def infer_wrapper(): else: model_hs, model_logits = self.sharded_model.generate(tokens=input_tensor) + curr_inference_state = { + "past_tokens": self.past_tokens.numpy(force=True).tolist(), + } + if model_hs is not None: # model_hs = model_hs.detach().cpu() # possibly make this into a tensor that has past_tokens also # to pass to node, currently only hidden state is - return model_hs.numpy(force=True) + return model_hs.numpy(force=True), curr_inference_state # model_logits = model_logits.detach().cpu() # token = await self.sample(model_logits, TEMP, TOP_K) - return model_logits[:, -1].numpy(force=True) + return model_logits[:, -1].numpy(force=True), curr_inference_state return await asyncio.get_running_loop().run_in_executor(self.executor, infer_wrapper) From 6d29ba65c5a74c612e1a227dfeef040396aafa25 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Thu, 16 Jan 2025 12:27:50 -0900 Subject: [PATCH 529/589] fixing inference engine selection by adding torch to supported engines, fixing inference state issues, testing server nodes --- .gitignore | 1 + .../torch/sharded_inference_engine.py | 2 +- exo/orchestration/node.py | 41 ++++++++++--------- 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/.gitignore b/.gitignore index ef39e5f06..681562c3e 100644 --- a/.gitignore +++ b/.gitignore @@ -176,3 +176,4 @@ cython_debug/ .aider* exo/tinychat/images/*.png +.vscode/ diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index dba037aba..cd6c86ebe 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -114,7 +114,7 @@ async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarr await self.ensure_shard(shard) if inference_state.get("past_tokens") is not None: - self.past_tokens = torch.tensor(inference_state["past_tokens"]) + self.past_tokens = torch.tensor(inference_state["past_tokens"]).to(self.device) self.request_id = request_id if not self.request_id else self.request_id diff --git a/exo/orchestration/node.py b/exo/orchestration/node.py index 10b820d76..4e1ccbdef 100644 --- a/exo/orchestration/node.py +++ b/exo/orchestration/node.py @@ -17,6 +17,7 @@ from exo.inference.inference_engine import get_inference_engine, InferenceEngine from exo.download.hf.hf_shard_download import HFShardDownloader + class Node: def __init__( self, @@ -43,7 +44,7 @@ def __init__( self.buffered_inputs: Dict[str, List[np.ndarray]] = {} self.buffered_partials: Dict[str, List[np.ndarray]] = {} self.checkpoints: Dict[str, Dict[str, int]] = {} - + self.max_generate_tokens = max_generate_tokens self.topology_viz = topology_viz self.default_sample_temperature = default_sample_temperature @@ -98,6 +99,9 @@ def get_supported_inference_engines(self): supported_engine_names.append('tinygrad') else: supported_engine_names.append('tinygrad') + + supported_engine_names.append('torch') + return supported_engine_names async def broadcast_supported_engines(self, supported_engines_names: List[str]): @@ -106,7 +110,7 @@ async def broadcast_supported_engines(self, supported_engines_names: List[str]): def get_topology_inference_engines(self) -> List[List[str]]: return self.topology_inference_engines_pool - + async def process_inference_result( self, shard, @@ -144,10 +148,9 @@ async def process_inference_result( self.outstanding_requests.pop(request_id) else: self.outstanding_requests[request_id] = "waiting" - asyncio.create_task(self.forward_tensor(shard, forward, request_id, self.get_partition_index(offset = 1), inference_state)) - - return np.array(self.buffered_token_output[request_id][0]) if shard.model_id != 'stable-diffusion-2-1-base' else intermediate_result + asyncio.create_task(self.forward_tensor(shard, forward, request_id, self.get_partition_index(offset=1), inference_state)) + return np.array(self.buffered_token_output[request_id][0]) if shard.model_id != 'stable-diffusion-2-1-base' else intermediate_result async def process_prompt( self, @@ -214,7 +217,7 @@ async def enqueue_example( self, base_shard: Shard, example: np.ndarray, - target: np.ndarray, + target: np.ndarray, length: np.ndarray, request_id: Optional[str] = None, train: bool = False, @@ -227,7 +230,7 @@ async def enqueue_example( if request_id is None: request_id = str(uuid.uuid4()) self.outstanding_requests[request_id] = "waiting" - loss = await self.forward_example(shard, example, target, length, train, request_id, 0) + loss = await self.forward_example(shard, example, target, length, train, request_id, 0) return loss async def coordinate_save( @@ -258,7 +261,7 @@ async def process_example( self, base_shard: Shard, example: np.ndarray, - target: np.ndarray, + target: np.ndarray, length: np.ndarray, train: bool = False, request_id: Optional[str] = None, @@ -303,7 +306,7 @@ async def _process_example( self, base_shard: Shard, example: np.ndarray, - target: np.ndarray, + target: np.ndarray, length: np.ndarray, train: bool = False, request_id: Optional[str] = None, @@ -322,7 +325,7 @@ async def _process_example( self.outstanding_requests[request_id] = "preprocessing" step, _ = await self.inference_engine.infer_tensor(request_id, shard, example) self.outstanding_requests[request_id] = "waiting" - loss, backgrad = await self.forward_example(shard, step, target, length, train, request_id, self.get_partition_index(offset = 1)) + loss, backgrad = await self.forward_example(shard, step, target, length, train, request_id, self.get_partition_index(offset=1)) self.outstanding_requests[request_id] = "training" partial_loss, grad = await self.inference_engine.train(request_id, shard, example, backgrad, length, loss="back_gradient") self.outstanding_requests.pop(request_id) @@ -338,7 +341,7 @@ async def _process_example( self.outstanding_requests[request_id] = "preprocessing" step, _ = await self.inference_engine.infer_tensor(request_id, shard, example) self.outstanding_requests[request_id] = "waiting" - loss = await self.forward_example(shard, step, target, length, train, request_id, self.get_partition_index(offset = 1)) + loss = await self.forward_example(shard, step, target, length, train, request_id, self.get_partition_index(offset=1)) self.outstanding_requests.pop(request_id) return loss except Exception as e: @@ -346,7 +349,7 @@ async def _process_example( print(f"Error processing example for shard {shard}: {e}") traceback.print_exc() return None - + async def process_tensor( self, base_shard: Shard, @@ -406,14 +409,14 @@ async def _process_tensor( try: self.outstanding_requests[request_id] = "processing" result, inference_state = await self.inference_engine.infer_tensor(request_id, shard, tensor, inference_state) - ret = await self.process_inference_result(shard, result, request_id, inference_state) + ret = await self.process_inference_result(shard, result, request_id, inference_state) return ret except Exception as e: self.outstanding_requests.pop(request_id) print(f"Error processing tensor for shard {shard}: {e}") traceback.print_exc() return None - + async def forward_example( self, base_shard: Shard, @@ -455,7 +458,7 @@ async def forward_prompt( raise ValueError(f"Peer for {target_index} not found") if DEBUG >= 1: print(f"Sending prompt to {target_peer.id()}: {prompt}") await target_peer.send_prompt(next_shard, prompt, request_id=request_id, inference_state=inference_state) - + async def forward_tensor( self, base_shard: Shard, @@ -485,7 +488,7 @@ def get_partition_index(self, offset: int = 0): current_partition_index = next((i for i, p in enumerate(partitions) if p.node_id == self.id), None) if current_partition_index is None: raise ValueError(f"No current partition found for node: {self.id}") - return (current_partition_index + offset) % len(partitions) + return (current_partition_index+offset) % len(partitions) def get_current_shard(self, base_shard: Shard, index: Optional[int] = None) -> Shard: if index is None: @@ -616,7 +619,7 @@ def on_opaque_status(self) -> AsyncCallbackSystem[str, Tuple[str, str]]: def trigger_on_token_callbacks(self, request_id: str, tokens: List[int], is_finished: bool) -> None: if DEBUG >= 2: print(f"Triggering all on_token callbacks with {request_id=} num_tokens={len(tokens)} {is_finished=}") self.on_token.trigger_all(request_id, tokens, is_finished) - + async def broadcast_result(self, request_id: str, result: List[int], is_finished: bool) -> None: async def send_result_to_peer(peer): try: @@ -651,8 +654,8 @@ def current_topology(self) -> Topology: def handle_stable_diffusion(self, inference_state, result): if inference_state['is_step_finished']: - inference_state['step']+=1 - progress = [inference_state['step'],inference_state['total_steps']] + inference_state['step'] += 1 + progress = [inference_state['step'], inference_state['total_steps']] intermediate_result = result if progress[0] == progress[1]: intermediate_result = result From 38878cbc986b1acc009f5ab5f46e6cb5efd74525 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 17 Jan 2025 00:25:24 -0900 Subject: [PATCH 530/589] adding in tracking of selected exo infrace engine via env var EXO_INFER_ENGINE --- exo/main.py | 53 ++++++++++++++++++++++++--------------- exo/orchestration/node.py | 5 ++-- 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/exo/main.py b/exo/main.py index 6cffbefad..e7980baf6 100644 --- a/exo/main.py +++ b/exo/main.py @@ -83,6 +83,8 @@ inference_engine_name = args.inference_engine or ("mlx" if system_info == "Apple Silicon Mac" else "tinygrad") print(f"Inference engine name after selection: {inference_engine_name}") +os.environ["EXO_INFER_ENGINE"] = inference_engine_name + inference_engine = get_inference_engine(inference_engine_name, shard_downloader) print(f"Using inference engine: {inference_engine.__class__.__name__} with shard downloader: {shard_downloader.__class__.__name__}") @@ -127,7 +129,9 @@ elif args.discovery_module == "manual": if not args.discovery_config_path: raise ValueError(f"--discovery-config-path is required when using manual discovery. Please provide a path to a config json file.") - discovery = ManualDiscovery(args.discovery_config_path, args.node_id, create_peer_handle=lambda peer_id, address, description, device_capabilities: GRPCPeerHandle(peer_id, address, description, device_capabilities)) + discovery = ManualDiscovery( + args.discovery_config_path, args.node_id, create_peer_handle=lambda peer_id, address, description, device_capabilities: GRPCPeerHandle(peer_id, address, description, device_capabilities) + ) topology_viz = TopologyViz(chatgpt_api_endpoints=chatgpt_api_endpoints, web_chat_urls=web_chat_urls) if not args.disable_tui else None node = Node( args.node_id, @@ -151,9 +155,11 @@ system_prompt=args.system_prompt ) node.on_token.register("update_topology_viz").on_next( - lambda req_id, tokens, __: topology_viz.update_prompt_output(req_id, inference_engine.tokenizer.decode(tokens)) if topology_viz and hasattr(inference_engine, "tokenizer") and inference_engine.shard.model_id != 'stable-diffusion-2-1-base' else None + lambda req_id, tokens, __: topology_viz.update_prompt_output(req_id, inference_engine.tokenizer.decode(tokens)) + if topology_viz and hasattr(inference_engine, "tokenizer") and inference_engine.shard.model_id != 'stable-diffusion-2-1-base' else None ) + def preemptively_start_download(request_id: str, opaque_status: str): try: status = json.loads(opaque_status) @@ -186,6 +192,7 @@ def throttled_broadcast(shard: Shard, event: RepoProgressEvent): shard_downloader.on_progress.register("broadcast").on_next(throttled_broadcast) + async def run_model_cli(node: Node, inference_engine: InferenceEngine, model_name: str, prompt: str): inference_class = inference_engine.__class__.__name__ shard = build_base_shard(model_name, inference_class) @@ -214,29 +221,33 @@ async def run_model_cli(node: Node, inference_engine: InferenceEngine, model_nam finally: node.on_token.deregister(callback_id) + def clean_path(path): - """Clean and resolve path""" - if path.startswith("Optional("): - path = path.strip('Optional("').rstrip('")') - return os.path.expanduser(path) + """Clean and resolve path""" + if path.startswith("Optional("): + path = path.strip('Optional("').rstrip('")') + return os.path.expanduser(path) + async def hold_outstanding(node: Node): while node.outstanding_requests: await asyncio.sleep(.5) - return + return + async def run_iter(node: Node, shard: Shard, train: bool, data, batch_size=1): losses = [] tokens = [] for batch in tqdm(iterate_batches(data, batch_size), total=len(data) // batch_size): _, _, lengths = batch - losses.append(np.sum(lengths * await node.enqueue_example(shard, *batch, train=train))) + losses.append(np.sum(lengths*await node.enqueue_example(shard, *batch, train=train))) tokens.append(np.sum(lengths)) total_tokens = np.sum(tokens) - total_loss = np.sum(losses) / total_tokens - + total_loss = np.sum(losses)/total_tokens + return total_loss, total_tokens + async def eval_model_cli(node: Node, inference_engine: InferenceEngine, model_name, dataloader, batch_size, num_batches=-1): inference_class = inference_engine.__class__.__name__ shard = build_base_shard(model_name, inference_class) @@ -251,6 +262,7 @@ async def eval_model_cli(node: Node, inference_engine: InferenceEngine, model_na print("Waiting for outstanding tasks") await hold_outstanding(node) + async def train_model_cli(node: Node, inference_engine: InferenceEngine, model_name, dataloader, batch_size, iters, save_interval=0, checkpoint_dir=None): inference_class = inference_engine.__class__.__name__ shard = build_base_shard(model_name, inference_class) @@ -270,7 +282,7 @@ async def train_model_cli(node: Node, inference_engine: InferenceEngine, model_n await hold_outstanding(node) await hold_outstanding(node) - + async def main(): loop = asyncio.get_running_loop() @@ -279,13 +291,15 @@ async def main(): if DEBUG >= 1: print(f"Model storage directory: {hf_home}") print(f"{has_read=}, {has_write=}") if not has_read or not has_write: - print(f""" + print( + f""" WARNING: Limited permissions for model storage directory: {hf_home}. This may prevent model downloads from working correctly. {"❌ No read access" if not has_read else ""} {"❌ No write access" if not has_write else ""} - """) - + """ + ) + if not args.models_seed_dir is None: try: models_seed_dir = clean_path(args.models_seed_dir) @@ -295,7 +309,7 @@ async def main(): def restore_cursor(): if platform.system() != "Windows": - os.system("tput cnorm") # Show cursor + os.system("tput cnorm") # Show cursor # Restore the cursor when the program exits atexit.register(restore_cursor) @@ -318,8 +332,7 @@ def handle_exit(): await run_model_cli(node, inference_engine, model_name, args.prompt) elif args.command == "eval" or args.command == 'train': model_name = args.model_name - dataloader = lambda tok: load_dataset(args.data, preprocess=lambda item: tok(item) - , loadline=lambda line: json.loads(line).get("text","")) + dataloader = lambda tok: load_dataset(args.data, preprocess=lambda item: tok(item), loadline=lambda line: json.loads(line).get("text", "")) if args.command == 'eval': if not model_name: print("Error: Much like a human, I can't evaluate anything without a model") @@ -330,11 +343,11 @@ def handle_exit(): print("Error: This train ain't leaving the station without a model") return await train_model_cli(node, inference_engine, model_name, dataloader, args.batch_size, args.iters, save_interval=args.save_every, checkpoint_dir=args.save_checkpoint_dir) - + else: asyncio.create_task(api.run(port=args.chatgpt_api_port)) # Start the API server as a non-blocking task await asyncio.Event().wait() - + if args.wait_for_peers > 0: print("Cooldown to allow peers to exit gracefully") for i in tqdm(range(50)): @@ -346,7 +359,7 @@ def run(): asyncio.set_event_loop(loop) try: loop.run_until_complete(main()) - + except KeyboardInterrupt: print("Received keyboard interrupt. Shutting down...") finally: diff --git a/exo/orchestration/node.py b/exo/orchestration/node.py index 4e1ccbdef..683d56804 100644 --- a/exo/orchestration/node.py +++ b/exo/orchestration/node.py @@ -1,3 +1,4 @@ +import os import numpy as np import json import asyncio @@ -98,9 +99,7 @@ def get_supported_inference_engines(self): supported_engine_names.append('mlx') supported_engine_names.append('tinygrad') else: - supported_engine_names.append('tinygrad') - - supported_engine_names.append('torch') + supported_engine_names.append(os.environ.get("EXO_INFER_ENGINE", 'tinygrad')) return supported_engine_names From 0a985a77ca7d9bf32137d9d72a966d20b7eba1ee Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 17 Jan 2025 13:57:29 -0900 Subject: [PATCH 531/589] delete build folder, add it to .gitignore, added OOM error fix to reset model at ever eot end generation, removed using past_tokens and inference_state if using cache --- .gitignore | 1 + build/lib/exo/__init__.py | 1 - build/lib/exo/api/__init__.py | 1 - build/lib/exo/api/chatgpt_api.py | 358 ----------- build/lib/exo/download/__init__.py | 0 build/lib/exo/download/download_progress.py | 61 -- build/lib/exo/download/hf/__init__.py | 0 build/lib/exo/download/hf/hf_helpers.py | 403 ------------ .../lib/exo/download/hf/hf_shard_download.py | 77 --- build/lib/exo/download/shard_download.py | 26 - build/lib/exo/helpers.py | 234 ------- build/lib/exo/inference/__init__.py | 0 .../exo/inference/debug_inference_engine.py | 59 -- build/lib/exo/inference/inference_engine.py | 34 - build/lib/exo/inference/mlx/__init__.py | 0 .../lib/exo/inference/mlx/models/__init__.py | 0 build/lib/exo/inference/mlx/models/base.py | 9 - .../exo/inference/mlx/models/deepseek_v2.py | 127 ---- build/lib/exo/inference/mlx/models/llama.py | 125 ---- build/lib/exo/inference/mlx/models/llava.py | 585 ------------------ .../inference/mlx/sharded_inference_engine.py | 40 -- build/lib/exo/inference/mlx/sharded_model.py | 86 --- build/lib/exo/inference/mlx/sharded_utils.py | 207 ------- .../exo/inference/mlx/test_sharded_llama.py | 40 -- .../exo/inference/mlx/test_sharded_llava.py | 64 -- .../exo/inference/mlx/test_sharded_model.py | 52 -- build/lib/exo/inference/pytorch/__init__.py | 0 build/lib/exo/inference/pytorch/helpers.py | 24 - build/lib/exo/inference/pytorch/inference.py | 211 ------- .../exo/inference/pytorch/model/__init__.py | 0 build/lib/exo/inference/pytorch/model/hf.py | 155 ----- .../lib/exo/inference/pytorch/model/utils.py | 83 --- .../pytorch/test_inference_engine.py | 141 ----- build/lib/exo/inference/shard.py | 39 -- .../exo/inference/test_inference_engine.py | 64 -- build/lib/exo/inference/tokenizers.py | 45 -- build/lib/exo/models.py | 44 -- build/lib/exo/networking/__init__.py | 5 - build/lib/exo/networking/discovery.py | 17 - build/lib/exo/networking/grpc/__init__.py | 0 .../lib/exo/networking/grpc/grpc_discovery.py | 188 ------ .../exo/networking/grpc/grpc_peer_handle.py | 109 ---- build/lib/exo/networking/grpc/grpc_server.py | 118 ---- .../exo/networking/grpc/node_service_pb2.py | 61 -- .../networking/grpc/node_service_pb2_grpc.py | 272 -------- .../networking/grpc/test_grpc_discovery.py | 22 - build/lib/exo/networking/peer_handle.py | 48 -- build/lib/exo/networking/server.py | 11 - build/lib/exo/orchestration/__init__.py | 4 - build/lib/exo/orchestration/node.py | 47 -- build/lib/exo/orchestration/standard_node.py | 385 ------------ build/lib/exo/orchestration/test_node.py | 57 -- build/lib/exo/stats/__init__.py | 0 build/lib/exo/stats/metrics.py | 29 - build/lib/exo/test_callbacks.py | 50 -- build/lib/exo/topology/__init__.py | 0 build/lib/exo/topology/device_capabilities.py | 207 ------- .../lib/exo/topology/partitioning_strategy.py | 40 -- ...g_memory_weighted_partitioning_strategy.py | 18 - .../exo/topology/test_device_capabilities.py | 91 --- build/lib/exo/topology/test_map_partitions.py | 81 --- ...g_memory_weighted_partitioning_strategy.py | 90 --- build/lib/exo/topology/topology.py | 49 -- build/lib/exo/viz/__init__.py | 0 build/lib/exo/viz/test_topology_viz.py | 129 ---- build/lib/exo/viz/topology_viz.py | 307 --------- exo/inference/torch/models/llama3.py | 60 +- .../torch/sharded_inference_engine.py | 59 +- 68 files changed, 73 insertions(+), 5877 deletions(-) delete mode 100644 build/lib/exo/__init__.py delete mode 100644 build/lib/exo/api/__init__.py delete mode 100644 build/lib/exo/api/chatgpt_api.py delete mode 100644 build/lib/exo/download/__init__.py delete mode 100644 build/lib/exo/download/download_progress.py delete mode 100644 build/lib/exo/download/hf/__init__.py delete mode 100644 build/lib/exo/download/hf/hf_helpers.py delete mode 100644 build/lib/exo/download/hf/hf_shard_download.py delete mode 100644 build/lib/exo/download/shard_download.py delete mode 100644 build/lib/exo/helpers.py delete mode 100644 build/lib/exo/inference/__init__.py delete mode 100644 build/lib/exo/inference/debug_inference_engine.py delete mode 100644 build/lib/exo/inference/inference_engine.py delete mode 100644 build/lib/exo/inference/mlx/__init__.py delete mode 100644 build/lib/exo/inference/mlx/models/__init__.py delete mode 100644 build/lib/exo/inference/mlx/models/base.py delete mode 100644 build/lib/exo/inference/mlx/models/deepseek_v2.py delete mode 100644 build/lib/exo/inference/mlx/models/llama.py delete mode 100644 build/lib/exo/inference/mlx/models/llava.py delete mode 100644 build/lib/exo/inference/mlx/sharded_inference_engine.py delete mode 100644 build/lib/exo/inference/mlx/sharded_model.py delete mode 100644 build/lib/exo/inference/mlx/sharded_utils.py delete mode 100644 build/lib/exo/inference/mlx/test_sharded_llama.py delete mode 100644 build/lib/exo/inference/mlx/test_sharded_llava.py delete mode 100644 build/lib/exo/inference/mlx/test_sharded_model.py delete mode 100644 build/lib/exo/inference/pytorch/__init__.py delete mode 100644 build/lib/exo/inference/pytorch/helpers.py delete mode 100644 build/lib/exo/inference/pytorch/inference.py delete mode 100644 build/lib/exo/inference/pytorch/model/__init__.py delete mode 100644 build/lib/exo/inference/pytorch/model/hf.py delete mode 100644 build/lib/exo/inference/pytorch/model/utils.py delete mode 100644 build/lib/exo/inference/pytorch/test_inference_engine.py delete mode 100644 build/lib/exo/inference/shard.py delete mode 100644 build/lib/exo/inference/test_inference_engine.py delete mode 100644 build/lib/exo/inference/tokenizers.py delete mode 100644 build/lib/exo/models.py delete mode 100644 build/lib/exo/networking/__init__.py delete mode 100644 build/lib/exo/networking/discovery.py delete mode 100644 build/lib/exo/networking/grpc/__init__.py delete mode 100644 build/lib/exo/networking/grpc/grpc_discovery.py delete mode 100644 build/lib/exo/networking/grpc/grpc_peer_handle.py delete mode 100644 build/lib/exo/networking/grpc/grpc_server.py delete mode 100644 build/lib/exo/networking/grpc/node_service_pb2.py delete mode 100644 build/lib/exo/networking/grpc/node_service_pb2_grpc.py delete mode 100644 build/lib/exo/networking/grpc/test_grpc_discovery.py delete mode 100644 build/lib/exo/networking/peer_handle.py delete mode 100644 build/lib/exo/networking/server.py delete mode 100644 build/lib/exo/orchestration/__init__.py delete mode 100644 build/lib/exo/orchestration/node.py delete mode 100644 build/lib/exo/orchestration/standard_node.py delete mode 100644 build/lib/exo/orchestration/test_node.py delete mode 100644 build/lib/exo/stats/__init__.py delete mode 100644 build/lib/exo/stats/metrics.py delete mode 100644 build/lib/exo/test_callbacks.py delete mode 100644 build/lib/exo/topology/__init__.py delete mode 100644 build/lib/exo/topology/device_capabilities.py delete mode 100644 build/lib/exo/topology/partitioning_strategy.py delete mode 100644 build/lib/exo/topology/ring_memory_weighted_partitioning_strategy.py delete mode 100644 build/lib/exo/topology/test_device_capabilities.py delete mode 100644 build/lib/exo/topology/test_map_partitions.py delete mode 100644 build/lib/exo/topology/test_ring_memory_weighted_partitioning_strategy.py delete mode 100644 build/lib/exo/topology/topology.py delete mode 100644 build/lib/exo/viz/__init__.py delete mode 100644 build/lib/exo/viz/test_topology_viz.py delete mode 100644 build/lib/exo/viz/topology_viz.py diff --git a/.gitignore b/.gitignore index 681562c3e..4a8414e5a 100644 --- a/.gitignore +++ b/.gitignore @@ -177,3 +177,4 @@ cython_debug/ exo/tinychat/images/*.png .vscode/ +build/ diff --git a/build/lib/exo/__init__.py b/build/lib/exo/__init__.py deleted file mode 100644 index e802d331b..000000000 --- a/build/lib/exo/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from exo.helpers import DEBUG as DEBUG, DEBUG_DISCOVERY as DEBUG_DISCOVERY, VERSION as VERSION diff --git a/build/lib/exo/api/__init__.py b/build/lib/exo/api/__init__.py deleted file mode 100644 index 660e75078..000000000 --- a/build/lib/exo/api/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from exo.api.chatgpt_api import ChatGPTAPI as ChatGPTAPI diff --git a/build/lib/exo/api/chatgpt_api.py b/build/lib/exo/api/chatgpt_api.py deleted file mode 100644 index 1abda85fe..000000000 --- a/build/lib/exo/api/chatgpt_api.py +++ /dev/null @@ -1,358 +0,0 @@ -import uuid -import time -import asyncio -import json -from pathlib import Path -from transformers import AutoTokenizer -from typing import List, Literal, Union, Dict -from aiohttp import web -import aiohttp_cors -import traceback -from exo import DEBUG, VERSION -from exo.helpers import PrefixDict -from exo.inference.shard import Shard -from exo.inference.tokenizers import resolve_tokenizer -from exo.orchestration import Node -from exo.models import model_base_shards -from typing import Callable - -class Message: - def __init__(self, role: str, content: Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]): - self.role = role - self.content = content - - def to_dict(self): - return {"role": self.role, "content": self.content} - - -class ChatCompletionRequest: - def __init__(self, model: str, messages: List[Message], temperature: float): - self.model = model - self.messages = messages - self.temperature = temperature - - def to_dict(self): - return {"model": self.model, "messages": [message.to_dict() for message in self.messages], "temperature": self.temperature} - - -def generate_completion( - chat_request: ChatCompletionRequest, - tokenizer, - prompt: str, - request_id: str, - tokens: List[int], - stream: bool, - finish_reason: Union[Literal["length", "stop"], None], - object_type: Literal["chat.completion", "text_completion"], -) -> dict: - completion = { - "id": f"chatcmpl-{request_id}", - "object": object_type, - "created": int(time.time()), - "model": chat_request.model, - "system_fingerprint": f"exo_{VERSION}", - "choices": [{ - "index": 0, - "message": {"role": "assistant", "content": tokenizer.decode(tokens)}, - "logprobs": None, - "finish_reason": finish_reason, - }], - } - - if not stream: - completion["usage"] = { - "prompt_tokens": len(tokenizer.encode(prompt)), - "completion_tokens": len(tokens), - "total_tokens": len(tokenizer.encode(prompt)) + len(tokens), - } - - choice = completion["choices"][0] - if object_type.startswith("chat.completion"): - key_name = "delta" if stream else "message" - choice[key_name] = {"role": "assistant", "content": tokenizer.decode(tokens)} - elif object_type == "text_completion": - choice["text"] = tokenizer.decode(tokens) - else: - ValueError(f"Unsupported response type: {object_type}") - - return completion - - -def remap_messages(messages: List[Message]) -> List[Message]: - remapped_messages = [] - last_image = None - for message in messages: - if not isinstance(message.content, list): - remapped_messages.append(message) - continue - - remapped_content = [] - for content in message.content: - if isinstance(content, dict): - if content.get("type") in ["image_url", "image"]: - image_url = content.get("image_url", {}).get("url") or content.get("image") - if image_url: - last_image = {"type": "image", "image": image_url} - remapped_content.append({"type": "text", "text": "[An image was uploaded but is not displayed here]"}) - else: - remapped_content.append(content) - else: - remapped_content.append(content) - remapped_messages.append(Message(role=message.role, content=remapped_content)) - - if last_image: - # Replace the last image placeholder with the actual image content - for message in reversed(remapped_messages): - for i, content in enumerate(message.content): - if isinstance(content, dict): - if content.get("type") == "text" and content.get("text") == "[An image was uploaded but is not displayed here]": - message.content[i] = last_image - return remapped_messages - - return remapped_messages - - -def build_prompt(tokenizer, _messages: List[Message]): - if len(_messages) == 1: - user_msg = _messages[0] - - # get instruct sys message - sys_msg = Message(role="system", content="You are a helpful assistant.") - - # restructure for sys_msg to go first - _messages = [sys_msg, user_msg] - - messages = remap_messages(_messages) - prompt = tokenizer.apply_chat_template( - messages, - tokenize=False, - add_generation_prompt=True - ) - - if DEBUG >= 3: - print(f"prompt: {str(prompt)}") - for msg in messages: - print(f"chat role: {msg.role}\ncontent: {msg.content}") - - image_str = None - for message in messages: - if not isinstance(message.content, list): - continue - - for content in message.content: - # note: we only support one image at a time right now. Multiple is possible. See: https://github.com/huggingface/transformers/blob/e68ec18ce224af879f22d904c7505a765fb77de3/docs/source/en/model_doc/llava.md?plain=1#L41 - # follows the convention in https://platform.openai.com/docs/guides/vision - if isinstance(content, dict) and content.get("type", None) == "image": - image_str = content.get("image", None) - break - - return prompt, image_str - - -def parse_message(data: dict): - if "role" not in data or "content" not in data: - raise ValueError(f"Invalid message: {data}. Must have 'role' and 'content'") - return Message(data["role"], data["content"]) - - -def parse_chat_request(data: dict): - return ChatCompletionRequest( - data.get("model", "llama-3.1-8b"), - [parse_message(msg) for msg in data["messages"]], - data.get("temperature", 0.0), - ) - - -class PromptSession: - def __init__(self, request_id: str, timestamp: int, prompt: str): - self.request_id = request_id - self.timestamp = timestamp - self.prompt = prompt - - -class ChatGPTAPI: - def __init__(self, node: Node, inference_engine_classname: str, response_timeout_secs: int = 90, on_chat_completion_request: Callable[[str, ChatCompletionRequest, str], None] = None): - self.node = node - self.inference_engine_classname = inference_engine_classname - self.response_timeout_secs = response_timeout_secs - self.on_chat_completion_request = on_chat_completion_request - self.app = web.Application(client_max_size=100*1024*1024) # 100MB to support image upload - self.prompts: PrefixDict[str, PromptSession] = PrefixDict() - self.prev_token_lens: Dict[str, int] = {} - self.stream_tasks: Dict[str, asyncio.Task] = {} - cors = aiohttp_cors.setup(self.app) - cors_options = aiohttp_cors.ResourceOptions( - allow_credentials=True, - expose_headers="*", - allow_headers="*", - allow_methods="*", - ) - cors.add(self.app.router.add_get("/models", self.handle_get_models), {"*": cors_options}) - cors.add(self.app.router.add_get("/v1/models", self.handle_get_models), {"*": cors_options}) - cors.add(self.app.router.add_post("/chat/token/encode", self.handle_post_chat_token_encode), {"*": cors_options}) - cors.add(self.app.router.add_post("/v1/chat/token/encode", self.handle_post_chat_token_encode), {"*": cors_options}) - cors.add(self.app.router.add_post("/chat/completions", self.handle_post_chat_completions), {"*": cors_options}) - cors.add(self.app.router.add_post("/v1/chat/completions", self.handle_post_chat_completions), {"*": cors_options}) - - self.static_dir = Path(__file__).parent.parent.parent/"tinychat/examples/tinychat" - self.app.router.add_get("/", self.handle_root) - self.app.router.add_static("/", self.static_dir, name="static") - - # Add middleware to log every request - self.app.middlewares.append(self.log_request) - - async def log_request(self, app, handler): - async def middleware(request): - if DEBUG >= 2: print(f"Received request: {request.method} {request.path}") - return await handler(request) - - return middleware - - async def handle_root(self, request): - return web.FileResponse(self.static_dir/"index.html") - - async def handle_get_models(self, request): - return web.json_response([{"id": model_name, "object": "model", "owned_by": "exo", "ready": True } for model_name, _ in model_base_shards.items()]) - - async def handle_post_chat_token_encode(self, request): - data = await request.json() - shard = model_base_shards.get(data.get("model", "llama-3.1-8b"), {}).get(self.inference_engine_classname) - messages = [parse_message(msg) for msg in data.get("messages", [])] - tokenizer = await resolve_tokenizer(shard.model_id) - return web.json_response({"length": len(build_prompt(tokenizer, messages)[0])}) - - async def handle_post_chat_completions(self, request): - data = await request.json() - if DEBUG >= 2: print(f"Handling chat completions request from {request.remote}: {data}") - stream = data.get("stream", False) - chat_request = parse_chat_request(data) - if chat_request.model and chat_request.model.startswith("gpt-"): # to be compatible with ChatGPT tools, point all gpt- model requests to llama instead - chat_request.model = "llama-3.1-8b" - if not chat_request.model or chat_request.model not in model_base_shards: - if DEBUG >= 1: print(f"Invalid model: {chat_request.model}. Supported: {list(model_base_shards.keys())}. Defaulting to llama-3.1-8b") - chat_request.model = "llama-3.1-8b" - shard = model_base_shards[chat_request.model].get(self.inference_engine_classname, None) - if not shard: - supported_models = [model for model, engines in model_base_shards.items() if self.inference_engine_classname in engines] - return web.json_response( - {"detail": f"Unsupported model: {chat_request.model} with inference engine {self.inference_engine_classname}. Supported models for this engine: {supported_models}"}, - status=400, - ) - - tokenizer = await resolve_tokenizer(shard.model_id) - if DEBUG >= 4: print(f"Resolved tokenizer: {tokenizer}") - - prompt, image_str = build_prompt(tokenizer, chat_request.messages) - request_id = str(uuid.uuid4()) - if self.on_chat_completion_request: - try: - self.on_chat_completion_request(request_id, chat_request, prompt) - except Exception as e: - if DEBUG >= 2: traceback.print_exc() - # request_id = None - # match = self.prompts.find_longest_prefix(prompt) - # if match and len(prompt) > len(match[1].prompt): - # if DEBUG >= 2: - # print(f"Prompt for request starts with previous prompt {len(match[1].prompt)} of {len(prompt)}: {match[1].prompt}") - # request_id = match[1].request_id - # self.prompts.add(prompt, PromptSession(request_id=request_id, timestamp=int(time.time()), prompt=prompt)) - # # remove the matching prefix from the prompt - # prompt = prompt[len(match[1].prompt):] - # else: - # request_id = str(uuid.uuid4()) - # self.prompts.add(prompt, PromptSession(request_id=request_id, timestamp=int(time.time()), prompt=prompt)) - - callback_id = f"chatgpt-api-wait-response-{request_id}" - callback = self.node.on_token.register(callback_id) - - if DEBUG >= 2: print(f"Sending prompt from ChatGPT api {request_id=} {shard=} {prompt=} {image_str=}") - try: - await self.node.process_prompt(shard, prompt, image_str, request_id=request_id) - except Exception as e: - if DEBUG >= 2: traceback.print_exc() - return web.json_response({"detail": f"Error processing prompt (see logs with DEBUG>=2): {str(e)}"}, status=500) - - try: - if DEBUG >= 2: print(f"Waiting for response to finish. timeout={self.response_timeout_secs}s") - - if stream: - response = web.StreamResponse( - status=200, - reason="OK", - headers={ - "Content-Type": "text/event-stream", - "Cache-Control": "no-cache", - }, - ) - await response.prepare(request) - - async def stream_result(request_id: str, tokens: List[int], is_finished: bool): - prev_last_tokens_len = self.prev_token_lens.get(request_id, 0) - self.prev_token_lens[request_id] = max(prev_last_tokens_len, len(tokens)) - new_tokens = tokens[prev_last_tokens_len:] - finish_reason = None - eos_token_id = tokenizer.special_tokens_map.get("eos_token_id") if hasattr(tokenizer, "_tokenizer") and isinstance(tokenizer._tokenizer, - AutoTokenizer) else getattr(tokenizer, "eos_token_id", None) - if len(new_tokens) > 0 and new_tokens[-1] == eos_token_id: - new_tokens = new_tokens[:-1] - if is_finished: - finish_reason = "stop" - if is_finished and not finish_reason: - finish_reason = "length" - - completion = generate_completion( - chat_request, - tokenizer, - prompt, - request_id, - new_tokens, - stream, - finish_reason, - "chat.completion", - ) - if DEBUG >= 2: print(f"Streaming completion: {completion}") - try: - await response.write(f"data: {json.dumps(completion)}\n\n".encode()) - except Exception as e: - if DEBUG >= 2: print(f"Error streaming completion: {e}") - if DEBUG >= 2: traceback.print_exc() - - def on_result(_request_id: str, tokens: List[int], is_finished: bool): - self.stream_tasks[request_id] = asyncio.create_task(stream_result(request_id, tokens, is_finished)) - - return _request_id == request_id and is_finished - - _, tokens, _ = await callback.wait(on_result, timeout=self.response_timeout_secs) - if request_id in self.stream_tasks: # in case there is still a stream task running, wait for it to complete - if DEBUG >= 2: print("Pending stream task. Waiting for stream task to complete.") - try: - await asyncio.wait_for(self.stream_tasks[request_id], timeout=30) - except asyncio.TimeoutError: - print("WARNING: Stream task timed out. This should not happen.") - await response.write_eof() - return response - else: - _, tokens, _ = await callback.wait( - lambda _request_id, tokens, is_finished: _request_id == request_id and is_finished, - timeout=self.response_timeout_secs, - ) - - finish_reason = "length" - eos_token_id = tokenizer.special_tokens_map.get("eos_token_id") if isinstance(getattr(tokenizer, "_tokenizer", None), AutoTokenizer) else tokenizer.eos_token_id - if DEBUG >= 2: print(f"Checking if end of tokens result {tokens[-1]=} is {eos_token_id=}") - if tokens[-1] == eos_token_id: - tokens = tokens[:-1] - finish_reason = "stop" - - return web.json_response(generate_completion(chat_request, tokenizer, prompt, request_id, tokens, stream, finish_reason, "chat.completion")) - except asyncio.TimeoutError: - return web.json_response({"detail": "Response generation timed out"}, status=408) - finally: - deregistered_callback = self.node.on_token.deregister(callback_id) - if DEBUG >= 2: print(f"Deregister {callback_id=} {deregistered_callback=}") - - async def run(self, host: str = "0.0.0.0", port: int = 8000): - runner = web.AppRunner(self.app) - await runner.setup() - site = web.TCPSite(runner, host, port) - await site.start() diff --git a/build/lib/exo/download/__init__.py b/build/lib/exo/download/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/build/lib/exo/download/download_progress.py b/build/lib/exo/download/download_progress.py deleted file mode 100644 index 779e53287..000000000 --- a/build/lib/exo/download/download_progress.py +++ /dev/null @@ -1,61 +0,0 @@ -from typing import Dict, Callable, Coroutine, Any, Literal -from dataclasses import dataclass -from datetime import timedelta - - -@dataclass -class RepoFileProgressEvent: - repo_id: str - repo_revision: str - file_path: str - downloaded: int - downloaded_this_session: int - total: int - speed: int - eta: timedelta - status: Literal["not_started", "in_progress", "complete"] - - def to_dict(self): - return { - "repo_id": self.repo_id, "repo_revision": self.repo_revision, "file_path": self.file_path, "downloaded": self.downloaded, "downloaded_this_session": self.downloaded_this_session, - "total": self.total, "speed": self.speed, "eta": self.eta.total_seconds(), "status": self.status - } - - @classmethod - def from_dict(cls, data): - if 'eta' in data: data['eta'] = timedelta(seconds=data['eta']) - return cls(**data) - - -@dataclass -class RepoProgressEvent: - repo_id: str - repo_revision: str - completed_files: int - total_files: int - downloaded_bytes: int - downloaded_bytes_this_session: int - total_bytes: int - overall_speed: int - overall_eta: timedelta - file_progress: Dict[str, RepoFileProgressEvent] - status: Literal["not_started", "in_progress", "complete"] - - def to_dict(self): - return { - "repo_id": self.repo_id, "repo_revision": self.repo_revision, "completed_files": self.completed_files, "total_files": self.total_files, "downloaded_bytes": self.downloaded_bytes, - "downloaded_bytes_this_session": self.downloaded_bytes_this_session, "total_bytes": self.total_bytes, "overall_speed": self.overall_speed, "overall_eta": self.overall_eta.total_seconds(), - "file_progress": {k: v.to_dict() - for k, v in self.file_progress.items()}, "status": self.status - } - - @classmethod - def from_dict(cls, data): - if 'overall_eta' in data: data['overall_eta'] = timedelta(seconds=data['overall_eta']) - if 'file_progress' in data: data['file_progress'] = {k: RepoFileProgressEvent.from_dict(v) for k, v in data['file_progress'].items()} - - return cls(**data) - - -RepoFileProgressCallback = Callable[[RepoFileProgressEvent], Coroutine[Any, Any, None]] -RepoProgressCallback = Callable[[RepoProgressEvent], Coroutine[Any, Any, None]] diff --git a/build/lib/exo/download/hf/__init__.py b/build/lib/exo/download/hf/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/build/lib/exo/download/hf/hf_helpers.py b/build/lib/exo/download/hf/hf_helpers.py deleted file mode 100644 index 8fd96dc5f..000000000 --- a/build/lib/exo/download/hf/hf_helpers.py +++ /dev/null @@ -1,403 +0,0 @@ -import asyncio -import aiohttp -import json -import os -from urllib.parse import urljoin -from typing import Callable, Optional, Coroutine, Any, Dict, List, Union, Literal -from datetime import datetime, timedelta -from fnmatch import fnmatch -from pathlib import Path -from typing import Generator, Iterable, TypeVar, TypedDict -from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type -from exo.helpers import DEBUG -from exo.download.download_progress import RepoProgressEvent, RepoFileProgressEvent, RepoProgressCallback, RepoFileProgressCallback -from exo.inference.shard import Shard -import aiofiles -from aiofiles import os as aios - -T = TypeVar("T") - -async def get_local_snapshot_dir(repo_id: str, revision: str = "main") -> Optional[Path]: - refs_dir = get_repo_root(repo_id)/"refs" - refs_file = refs_dir/revision - if await aios.path.exists(refs_file): - async with aiofiles.open(refs_file, 'r') as f: - commit_hash = (await f.read()).strip() - snapshot_dir = get_repo_root(repo_id)/"snapshots"/commit_hash - return snapshot_dir - return None - - -def filter_repo_objects( - items: Iterable[T], - *, - allow_patterns: Optional[Union[List[str], str]] = None, - ignore_patterns: Optional[Union[List[str], str]] = None, - key: Optional[Callable[[T], str]] = None, -) -> Generator[T, None, None]: - if isinstance(allow_patterns, str): - allow_patterns = [allow_patterns] - if isinstance(ignore_patterns, str): - ignore_patterns = [ignore_patterns] - if allow_patterns is not None: - allow_patterns = [_add_wildcard_to_directories(p) for p in allow_patterns] - if ignore_patterns is not None: - ignore_patterns = [_add_wildcard_to_directories(p) for p in ignore_patterns] - - if key is None: - - def _identity(item: T) -> str: - if isinstance(item, str): - return item - if isinstance(item, Path): - return str(item) - raise ValueError(f"Please provide `key` argument in `filter_repo_objects`: `{item}` is not a string.") - - key = _identity - - for item in items: - path = key(item) - if allow_patterns is not None and not any(fnmatch(path, r) for r in allow_patterns): - continue - if ignore_patterns is not None and any(fnmatch(path, r) for r in ignore_patterns): - continue - yield item - - -def _add_wildcard_to_directories(pattern: str) -> str: - if pattern[-1] == "/": - return pattern + "*" - return pattern - - -def get_hf_home() -> Path: - """Get the Hugging Face home directory.""" - return Path(os.environ.get("HF_HOME", Path.home()/".cache"/"huggingface")) - - -async def get_hf_token(): - """Retrieve the Hugging Face token from the user's HF_HOME directory.""" - token_path = get_hf_home()/"token" - if await aios.path.exists(token_path): - async with aiofiles.open(token_path, 'r') as f: - return (await f.read()).strip() - return None - - -async def get_auth_headers(): - """Get authentication headers if a token is available.""" - token = await get_hf_token() - if token: - return {"Authorization": f"Bearer {token}"} - return {} - - -def get_repo_root(repo_id: str) -> Path: - """Get the root directory for a given repo ID in the Hugging Face cache.""" - sanitized_repo_id = repo_id.replace("/", "--") - return get_hf_home()/"hub"/f"models--{sanitized_repo_id}" - - -async def fetch_file_list(session, repo_id, revision, path=""): - api_url = f"https://huggingface.co/api/models/{repo_id}/tree/{revision}" - url = f"{api_url}/{path}" if path else api_url - - headers = await get_auth_headers() - async with session.get(url, headers=headers) as response: - if response.status == 200: - data = await response.json() - files = [] - for item in data: - if item["type"] == "file": - files.append({"path": item["path"], "size": item["size"]}) - elif item["type"] == "directory": - subfiles = await fetch_file_list(session, repo_id, revision, item["path"]) - files.extend(subfiles) - return files - else: - raise Exception(f"Failed to fetch file list: {response.status}") - - -@retry( - stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=60), retry=retry_if_exception_type((aiohttp.ClientError, asyncio.TimeoutError, aiohttp.ClientResponseError)), reraise=True -) -async def download_file( - session: aiohttp.ClientSession, repo_id: str, revision: str, file_path: str, save_directory: str, progress_callback: Optional[RepoFileProgressCallback] = None, use_range_request: bool = True -): - base_url = f"https://huggingface.co/{repo_id}/resolve/{revision}/" - url = urljoin(base_url, file_path) - local_path = os.path.join(save_directory, file_path) - - await aios.makedirs(os.path.dirname(local_path), exist_ok=True) - - # Check if file already exists and get its size - local_file_size = await aios.path.getsize(local_path) if await aios.path.exists(local_path) else 0 - - headers = await get_auth_headers() - if use_range_request: - headers["Range"] = f"bytes={local_file_size}-" - - async with session.get(url, headers=headers) as response: - total_size = int(response.headers.get('Content-Length', 0)) - downloaded_size = local_file_size - downloaded_this_session = 0 - mode = 'ab' if use_range_request else 'wb' - if downloaded_size == total_size: - if DEBUG >= 2: print(f"File already downloaded: {file_path}") - if progress_callback: - await progress_callback(RepoFileProgressEvent(repo_id, revision, file_path, downloaded_size, downloaded_this_session, total_size, 0, timedelta(0), "complete")) - return - - if response.status == 200: - # File doesn't support range requests or we're not using them, start from beginning - mode = 'wb' - downloaded_size = 0 - elif response.status == 206: - # Partial content, resume download - content_range = response.headers.get('Content-Range', '') - try: - total_size = int(content_range.split('/')[-1]) - except ValueError: - if DEBUG >= 1: print(f"Failed to parse Content-Range header: {content_range}. Starting download from scratch...") - return await download_file(session, repo_id, revision, file_path, save_directory, progress_callback, use_range_request=False) - elif response.status == 416: - # Range not satisfiable, get the actual file size - content_range = response.headers.get('Content-Range', '') - try: - total_size = int(content_range.split('/')[-1]) - if downloaded_size == total_size: - if DEBUG >= 2: print(f"File fully downloaded on first pass: {file_path}") - if progress_callback: - await progress_callback(RepoFileProgressEvent(repo_id, revision, file_path, downloaded_size, downloaded_this_session, total_size, 0, timedelta(0), "complete")) - return - except ValueError: - if DEBUG >= 1: print(f"Failed to parse Content-Range header: {content_range}. Starting download from scratch...") - return await download_file(session, repo_id, revision, file_path, save_directory, progress_callback, use_range_request=False) - else: - raise aiohttp.ClientResponseError(response.request_info, response.history, status=response.status, message=f"Failed to download {file_path}: {response.status}") - - if downloaded_size == total_size: - print(f"File already downloaded: {file_path}") - if progress_callback: - await progress_callback(RepoFileProgressEvent(repo_id, revision, file_path, downloaded_size, downloaded_this_session, total_size, 0, timedelta(0), "complete")) - return - - DOWNLOAD_CHUNK_SIZE = 32768 - start_time = datetime.now() - async with aiofiles.open(local_path, mode) as f: - async for chunk in response.content.iter_chunked(DOWNLOAD_CHUNK_SIZE): - await f.write(chunk) - downloaded_size += len(chunk) - downloaded_this_session += len(chunk) - if progress_callback and total_size: - elapsed_time = (datetime.now() - start_time).total_seconds() - speed = int(downloaded_this_session/elapsed_time) if elapsed_time > 0 else 0 - remaining_size = total_size - downloaded_size - eta = timedelta(seconds=remaining_size/speed) if speed > 0 else timedelta(0) - status = "in_progress" if downloaded_size < total_size else "complete" - if DEBUG >= 8: print(f"HF repo file download progress: {file_path=} {elapsed_time=} {speed=} Downloaded={downloaded_size}/{total_size} {remaining_size=} {eta=} {status=}") - await progress_callback(RepoFileProgressEvent(repo_id, revision, file_path, downloaded_size, downloaded_this_session, total_size, speed, eta, status)) - if DEBUG >= 2: print(f"Downloaded: {file_path}") - - -async def download_repo_files( - repo_id: str, - revision: str = "main", - progress_callback: Optional[RepoProgressCallback] = None, - allow_patterns: Optional[Union[List[str], str]] = None, - ignore_patterns: Optional[Union[List[str], str]] = None, - max_parallel_downloads: int = 4 -) -> Path: - repo_root = get_repo_root(repo_id) - refs_dir = repo_root/"refs" - snapshots_dir = repo_root/"snapshots" - cachedreqs_dir = repo_root/"cachedreqs" - - # Ensure directories exist - await aios.makedirs(refs_dir, exist_ok=True) - await aios.makedirs(snapshots_dir, exist_ok=True) - await aios.makedirs(cachedreqs_dir, exist_ok=True) - - # Check if we have a cached commit hash - refs_file = refs_dir/revision - if await aios.path.exists(refs_file): - async with aiofiles.open(refs_file, 'r') as f: - commit_hash = (await f.read()).strip() - if DEBUG >= 2: print(f"Commit hash is already hashed at {refs_file}: {commit_hash}") - else: - async with aiohttp.ClientSession() as session: - # Fetch the commit hash for the given revision - api_url = f"https://huggingface.co/api/models/{repo_id}/revision/{revision}" - headers = await get_auth_headers() - async with session.get(api_url, headers=headers) as response: - if response.status != 200: - raise Exception(f"Failed to fetch revision info from {api_url}: {response.status}") - revision_info = await response.json() - commit_hash = revision_info['sha'] - - # Cache the commit hash - async with aiofiles.open(refs_file, 'w') as f: - await f.write(commit_hash) - - # Set up the snapshot directory - snapshot_dir = snapshots_dir/commit_hash - await aios.makedirs(snapshot_dir, exist_ok=True) - - # Set up the cached file list directory - cached_file_list_dir = cachedreqs_dir/commit_hash - await aios.makedirs(cached_file_list_dir, exist_ok=True) - cached_file_list_path = cached_file_list_dir/"fetch_file_list.json" - - async with aiohttp.ClientSession() as session: - # Check if we have a cached file list - if await aios.path.exists(cached_file_list_path): - async with aiofiles.open(cached_file_list_path, 'r') as f: - file_list = json.loads(await f.read()) - if DEBUG >= 2: print(f"Using cached file list from {cached_file_list_path}") - else: - file_list = await fetch_file_list(session, repo_id, revision) - # Cache the file list - async with aiofiles.open(cached_file_list_path, 'w') as f: - await f.write(json.dumps(file_list)) - if DEBUG >= 2: print(f"Cached file list at {cached_file_list_path}") - - filtered_file_list = list(filter_repo_objects(file_list, allow_patterns=allow_patterns, ignore_patterns=ignore_patterns, key=lambda x: x["path"])) - total_files = len(filtered_file_list) - total_bytes = sum(file["size"] for file in filtered_file_list) - file_progress: Dict[str, RepoFileProgressEvent] = { - file["path"]: RepoFileProgressEvent(repo_id, revision, file["path"], 0, 0, file["size"], 0, timedelta(0), "not_started") - for file in filtered_file_list - } - start_time = datetime.now() - - async def download_with_progress(file_info, progress_state): - local_path = snapshot_dir/file_info["path"] - if await aios.path.exists(local_path) and (await aios.stat(local_path)).st_size == file_info["size"]: - if DEBUG >= 2: print(f"File already fully downloaded: {file_info['path']}") - progress_state['completed_files'] += 1 - progress_state['downloaded_bytes'] += file_info["size"] - file_progress[file_info["path"]] = RepoFileProgressEvent(repo_id, revision, file_info["path"], file_info["size"], 0, file_info["size"], 0, timedelta(0), "complete") - if progress_callback: - elapsed_time = (datetime.now() - start_time).total_seconds() - overall_speed = int(progress_state['downloaded_bytes_this_session']/elapsed_time) if elapsed_time > 0 else 0 - remaining_bytes = total_bytes - progress_state['downloaded_bytes'] - overall_eta = timedelta(seconds=remaining_bytes/overall_speed) if overall_speed > 0 else timedelta(seconds=0) - status = "in_progress" if progress_state['completed_files'] < total_files else "complete" - await progress_callback( - RepoProgressEvent( - repo_id, revision, progress_state['completed_files'], total_files, progress_state['downloaded_bytes'], progress_state['downloaded_bytes_this_session'], total_bytes, overall_speed, - overall_eta, file_progress, status - ) - ) - return - - async def file_progress_callback(event: RepoFileProgressEvent): - progress_state['downloaded_bytes'] += event.downloaded - file_progress[event.file_path].downloaded - progress_state['downloaded_bytes_this_session'] += event.downloaded_this_session - file_progress[event.file_path].downloaded_this_session - file_progress[event.file_path] = event - if progress_callback: - elapsed_time = (datetime.now() - start_time).total_seconds() - overall_speed = int(progress_state['downloaded_bytes_this_session']/elapsed_time) if elapsed_time > 0 else 0 - remaining_bytes = total_bytes - progress_state['downloaded_bytes'] - overall_eta = timedelta(seconds=remaining_bytes/overall_speed) if overall_speed > 0 else timedelta(seconds=0) - status = "in_progress" if progress_state['downloaded_bytes'] < total_bytes else "complete" - await progress_callback( - RepoProgressEvent( - repo_id, revision, progress_state['completed_files'], total_files, progress_state['downloaded_bytes'], progress_state['downloaded_bytes_this_session'], total_bytes, overall_speed, - overall_eta, file_progress, status - ) - ) - - await download_file(session, repo_id, revision, file_info["path"], snapshot_dir, file_progress_callback) - progress_state['completed_files'] += 1 - file_progress[ - file_info["path"] - ] = RepoFileProgressEvent(repo_id, revision, file_info["path"], file_info["size"], file_progress[file_info["path"]].downloaded_this_session, file_info["size"], 0, timedelta(0), "complete") - if progress_callback: - elapsed_time = (datetime.now() - start_time).total_seconds() - overall_speed = int(progress_state['downloaded_bytes_this_session']/elapsed_time) if elapsed_time > 0 else 0 - remaining_bytes = total_bytes - progress_state['downloaded_bytes'] - overall_eta = timedelta(seconds=remaining_bytes/overall_speed) if overall_speed > 0 else timedelta(seconds=0) - status = "in_progress" if progress_state['completed_files'] < total_files else "complete" - await progress_callback( - RepoProgressEvent( - repo_id, revision, progress_state['completed_files'], total_files, progress_state['downloaded_bytes'], progress_state['downloaded_bytes_this_session'], total_bytes, overall_speed, - overall_eta, file_progress, status - ) - ) - - progress_state = {'completed_files': 0, 'downloaded_bytes': 0, 'downloaded_bytes_this_session': 0} - - semaphore = asyncio.Semaphore(max_parallel_downloads) - - async def download_with_semaphore(file_info): - async with semaphore: - await download_with_progress(file_info, progress_state) - - tasks = [asyncio.create_task(download_with_semaphore(file_info)) for file_info in filtered_file_list] - await asyncio.gather(*tasks) - - return snapshot_dir - - -async def get_weight_map(repo_id: str, revision: str = "main") -> Optional[Dict[str, str]]: - """ - Retrieve the weight map from the model.safetensors.index.json file. - - Args: - repo_id (str): The Hugging Face repository ID. - revision (str): The revision of the repository to use. - - Returns: - Optional[Dict[str, str]]: The weight map if it exists, otherwise None. - """ - - # Download the index file - await download_repo_files(repo_id=repo_id, revision=revision, allow_patterns="model.safetensors.index.json") - - # Check if the file exists - repo_root = get_repo_root(repo_id) - snapshot_dir = repo_root/"snapshots" - index_file = next((f for f in await aios.listdir(snapshot_dir) if f.endswith("model.safetensors.index.json")), None) - - if index_file: - index_file_path = snapshot_dir/index_file - if await aios.path.exists(index_file_path): - async with aiofiles.open(index_file_path, 'r') as f: - index_data = json.loads(await f.read()) - return index_data.get("weight_map") - - return None - - -def extract_layer_num(tensor_name: str) -> Optional[int]: - # This is a simple example and might need to be adjusted based on the actual naming convention - parts = tensor_name.split('.') - for part in parts: - if part.isdigit(): - return int(part) - return None - - -def get_allow_patterns(weight_map: Dict[str, str], shard: Shard) -> List[str]: - default_patterns = [ - "*.json", - "*.py", - "tokenizer.model", - "*.tiktoken", - "*.txt", - ] - shard_specific_patterns = [] - if weight_map: - for tensor_name, filename in weight_map.items(): - layer_num = extract_layer_num(tensor_name) - if layer_num is not None and shard.start_layer <= layer_num <= shard.end_layer: - shard_specific_patterns.append(filename) - sorted_file_names = sorted(weight_map.values()) - if shard.is_first_layer(): - shard_specific_patterns.append(sorted_file_names[0]) - elif shard.is_last_layer(): - shard_specific_patterns.append(sorted_file_names[-1]) - else: - shard_specific_patterns = ["*.safetensors"] - return list(set(default_patterns + shard_specific_patterns)) # Remove duplicates diff --git a/build/lib/exo/download/hf/hf_shard_download.py b/build/lib/exo/download/hf/hf_shard_download.py deleted file mode 100644 index eb562c3c9..000000000 --- a/build/lib/exo/download/hf/hf_shard_download.py +++ /dev/null @@ -1,77 +0,0 @@ -import asyncio -import traceback -from pathlib import Path -from typing import Dict, List, Tuple -from exo.inference.shard import Shard -from exo.download.shard_download import ShardDownloader -from exo.download.download_progress import RepoProgressEvent -from exo.download.hf.hf_helpers import download_repo_files, RepoProgressEvent, get_weight_map, get_allow_patterns, get_repo_root -from exo.helpers import AsyncCallbackSystem, DEBUG - - -class HFShardDownloader(ShardDownloader): - def __init__(self, quick_check: bool = False, max_parallel_downloads: int = 4): - self.quick_check = quick_check - self.max_parallel_downloads = max_parallel_downloads - self.active_downloads: Dict[Shard, asyncio.Task] = {} - self.completed_downloads: Dict[Shard, Path] = {} - self._on_progress = AsyncCallbackSystem[str, Tuple[Shard, RepoProgressEvent]]() - - async def ensure_shard(self, shard: Shard) -> Path: - if shard in self.completed_downloads: - return self.completed_downloads[shard] - if self.quick_check: - repo_root = get_repo_root(shard.model_id) - snapshots_dir = repo_root/"snapshots" - if snapshots_dir.exists(): - visible_dirs = [d for d in snapshots_dir.iterdir() if not d.name.startswith('.')] - if visible_dirs: - most_recent_dir = max(visible_dirs, key=lambda x: x.stat().st_mtime) - return most_recent_dir - - # If a download on this shard is already in progress, keep that one - for active_shard in self.active_downloads: - if active_shard == shard: - if DEBUG >= 2: print(f"Download already in progress for {shard}. Keeping that one.") - return await self.active_downloads[shard] - - # Cancel any downloads for this model_id on a different shard - existing_active_shards = [active_shard for active_shard in self.active_downloads.keys() if active_shard.model_id == shard.model_id] - for active_shard in existing_active_shards: - if DEBUG >= 2: print(f"Cancelling download for {active_shard} (replacing with {shard})") - task = self.active_downloads[active_shard] - task.cancel() - try: - await task - except asyncio.CancelledError: - pass # This is expected when cancelling a task - except Exception as e: - if DEBUG >= 2: print(f"Error in cancelling download {active_shard}: {e}") - traceback.print_exc() - self.active_downloads = {active_shard: task for active_shard, task in self.active_downloads.items() if active_shard.model_id != shard.model_id} - - # Start new download - download_task = asyncio.create_task(self._download_shard(shard)) - self.active_downloads[shard] = download_task - try: - path = await download_task - self.completed_downloads[shard] = path - return path - finally: - # Ensure the task is removed even if an exception occurs - print(f"Removing download task for {shard}: {shard in self.active_downloads}") - if shard in self.active_downloads: - self.active_downloads.pop(shard) - - async def _download_shard(self, shard: Shard) -> Path: - async def wrapped_progress_callback(event: RepoProgressEvent): - self._on_progress.trigger_all(shard, event) - - weight_map = await get_weight_map(shard.model_id) - allow_patterns = get_allow_patterns(weight_map, shard) - - return await download_repo_files(repo_id=shard.model_id, progress_callback=wrapped_progress_callback, allow_patterns=allow_patterns, max_parallel_downloads=self.max_parallel_downloads) - - @property - def on_progress(self) -> AsyncCallbackSystem[str, Tuple[Shard, RepoProgressEvent]]: - return self._on_progress diff --git a/build/lib/exo/download/shard_download.py b/build/lib/exo/download/shard_download.py deleted file mode 100644 index 771fb8683..000000000 --- a/build/lib/exo/download/shard_download.py +++ /dev/null @@ -1,26 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Optional, Tuple -from pathlib import Path -from exo.inference.shard import Shard -from exo.download.download_progress import RepoProgressEvent -from exo.helpers import AsyncCallbackSystem - - -class ShardDownloader(ABC): - @abstractmethod - async def ensure_shard(self, shard: Shard) -> Path: - """ - Ensures that the shard is downloaded. - Does not allow multiple overlapping downloads at once. - If you try to download a Shard which overlaps a Shard that is already being downloaded, - the download will be cancelled and a new download will start. - - Args: - shard (Shard): The shard to download. - """ - pass - - @property - @abstractmethod - def on_progress(self) -> AsyncCallbackSystem[str, Tuple[Shard, RepoProgressEvent]]: - pass diff --git a/build/lib/exo/helpers.py b/build/lib/exo/helpers.py deleted file mode 100644 index d8a5c6cc2..000000000 --- a/build/lib/exo/helpers.py +++ /dev/null @@ -1,234 +0,0 @@ -import os -import asyncio -from typing import Callable, TypeVar, Optional, Dict, Generic, Tuple, List -import socket -import random -import platform -import psutil -import uuid -import netifaces -from pathlib import Path - -DEBUG = int(os.getenv("DEBUG", default="0")) -DEBUG_DISCOVERY = int(os.getenv("DEBUG_DISCOVERY", default="0")) -VERSION = "0.0.1" - -exo_text = r""" - _____ _____ - / _ \ \/ / _ \ -| __/> < (_) | - \___/_/\_\___/ - """ - - -def get_system_info(): - if psutil.MACOS: - if platform.machine() == "arm64": - return "Apple Silicon Mac" - if platform.machine() in ["x86_64", "i386"]: - return "Intel Mac" - return "Unknown Mac architecture" - if psutil.LINUX: - return "Linux" - return "Non-Mac, non-Linux system" - -def find_available_port(host: str = "", min_port: int = 49152, max_port: int = 65535) -> int: - used_ports_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".exo_used_ports") - - def read_used_ports(): - if os.path.exists(used_ports_file): - with open(used_ports_file, "r") as f: - return [int(line.strip()) for line in f if line.strip().isdigit()] - return [] - - def write_used_port(port, used_ports): - with open(used_ports_file, "w") as f: - print(used_ports[-19:]) - for p in used_ports[-19:] + [port]: - f.write(f"{p}\n") - - used_ports = read_used_ports() - available_ports = set(range(min_port, max_port + 1)) - set(used_ports) - - while available_ports: - port = random.choice(list(available_ports)) - if DEBUG >= 2: print(f"Trying to find available port {port=}") - try: - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.bind((host, port)) - write_used_port(port, used_ports) - return port - except socket.error: - available_ports.remove(port) - - raise RuntimeError("No available ports in the specified range") - - -def print_exo(): - print(exo_text) - - -def print_yellow_exo(): - yellow = "\033[93m" # ANSI escape code for yellow - reset = "\033[0m" # ANSI escape code to reset color - print(f"{yellow}{exo_text}{reset}") - - -def terminal_link(uri, label=None): - if label is None: - label = uri - parameters = "" - - # OSC 8 ; params ; URI ST OSC 8 ;; ST - escape_mask = "\033]8;{};{}\033\\{}\033]8;;\033\\" - - return escape_mask.format(parameters, uri, label) - - -T = TypeVar("T") -K = TypeVar("K") - - -class AsyncCallback(Generic[T]): - def __init__(self) -> None: - self.condition: asyncio.Condition = asyncio.Condition() - self.result: Optional[Tuple[T, ...]] = None - self.observers: list[Callable[..., None]] = [] - - async def wait(self, check_condition: Callable[..., bool], timeout: Optional[float] = None) -> Tuple[T, ...]: - async with self.condition: - await asyncio.wait_for(self.condition.wait_for(lambda: self.result is not None and check_condition(*self.result)), timeout) - assert self.result is not None # for type checking - return self.result - - def on_next(self, callback: Callable[..., None]) -> None: - self.observers.append(callback) - - def set(self, *args: T) -> None: - self.result = args - for observer in self.observers: - observer(*args) - asyncio.create_task(self.notify()) - - async def notify(self) -> None: - async with self.condition: - self.condition.notify_all() - - -class AsyncCallbackSystem(Generic[K, T]): - def __init__(self) -> None: - self.callbacks: Dict[K, AsyncCallback[T]] = {} - - def register(self, name: K) -> AsyncCallback[T]: - if name not in self.callbacks: - self.callbacks[name] = AsyncCallback[T]() - return self.callbacks[name] - - def deregister(self, name: K) -> None: - if name in self.callbacks: - del self.callbacks[name] - - def trigger(self, name: K, *args: T) -> None: - if name in self.callbacks: - self.callbacks[name].set(*args) - - def trigger_all(self, *args: T) -> None: - for callback in self.callbacks.values(): - callback.set(*args) - - -K = TypeVar('K', bound=str) -V = TypeVar('V') - - -class PrefixDict(Generic[K, V]): - def __init__(self): - self.items: Dict[K, V] = {} - - def add(self, key: K, value: V) -> None: - self.items[key] = value - - def find_prefix(self, argument: str) -> List[Tuple[K, V]]: - return [(key, value) for key, value in self.items.items() if argument.startswith(key)] - - def find_longest_prefix(self, argument: str) -> Optional[Tuple[K, V]]: - matches = self.find_prefix(argument) - if len(matches) == 0: - return None - - return max(matches, key=lambda x: len(x[0])) - - -def is_valid_uuid(val): - try: - uuid.UUID(str(val)) - return True - except ValueError: - return False - - -def get_or_create_node_id(): - NODE_ID_FILE = Path(os.path.dirname(os.path.abspath(__file__)))/".exo_node_id" - try: - if NODE_ID_FILE.is_file(): - with open(NODE_ID_FILE, "r") as f: - stored_id = f.read().strip() - if is_valid_uuid(stored_id): - if DEBUG >= 2: print(f"Retrieved existing node ID: {stored_id}") - return stored_id - else: - if DEBUG >= 2: print("Stored ID is not a valid UUID. Generating a new one.") - - new_id = str(uuid.uuid4()) - with open(NODE_ID_FILE, "w") as f: - f.write(new_id) - - if DEBUG >= 2: print(f"Generated and stored new node ID: {new_id}") - return new_id - except IOError as e: - if DEBUG >= 2: print(f"IO error creating node_id: {e}") - return str(uuid.uuid4()) - except Exception as e: - if DEBUG >= 2: print(f"Unexpected error creating node_id: {e}") - return str(uuid.uuid4()) - - -def pretty_print_bytes(size_in_bytes: int) -> str: - if size_in_bytes < 1024: - return f"{size_in_bytes} B" - elif size_in_bytes < 1024**2: - return f"{size_in_bytes / 1024:.2f} KB" - elif size_in_bytes < 1024**3: - return f"{size_in_bytes / (1024 ** 2):.2f} MB" - elif size_in_bytes < 1024**4: - return f"{size_in_bytes / (1024 ** 3):.2f} GB" - else: - return f"{size_in_bytes / (1024 ** 4):.2f} TB" - - -def pretty_print_bytes_per_second(bytes_per_second: int) -> str: - if bytes_per_second < 1024: - return f"{bytes_per_second} B/s" - elif bytes_per_second < 1024**2: - return f"{bytes_per_second / 1024:.2f} KB/s" - elif bytes_per_second < 1024**3: - return f"{bytes_per_second / (1024 ** 2):.2f} MB/s" - elif bytes_per_second < 1024**4: - return f"{bytes_per_second / (1024 ** 3):.2f} GB/s" - else: - return f"{bytes_per_second / (1024 ** 4):.2f} TB/s" - - -def get_all_ip_addresses(): - try: - ip_addresses = [] - for interface in netifaces.interfaces(): - ifaddresses = netifaces.ifaddresses(interface) - if netifaces.AF_INET in ifaddresses: - for link in ifaddresses[netifaces.AF_INET]: - ip = link['addr'] - ip_addresses.append(ip) - return list(set(ip_addresses)) - except: - if DEBUG >= 1: print("Failed to get all IP addresses. Defaulting to localhost.") - return ["localhost"] diff --git a/build/lib/exo/inference/__init__.py b/build/lib/exo/inference/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/build/lib/exo/inference/debug_inference_engine.py b/build/lib/exo/inference/debug_inference_engine.py deleted file mode 100644 index 27bcb592f..000000000 --- a/build/lib/exo/inference/debug_inference_engine.py +++ /dev/null @@ -1,59 +0,0 @@ -from exo.inference.inference_engine import InferenceEngine -from exo.inference.shard import Shard -from exo.inference.tinygrad.inference import TinygradDynamicShardInferenceEngine -import asyncio -import numpy as np - - -# An inference engine should work the same for any number of Shards, as long as the Shards are continuous. -async def test_inference_engine(inference_engine_1: InferenceEngine, inference_engine_2: InferenceEngine, model_id: str): - from exo.inference.tinygrad.inference import Tokenizer - from pathlib import Path - - _tokenizer = Tokenizer(str(Path(model_id)/"tokenizer.model")) - - prompt = "In a single word only, what is the last name of the president of the United States? " - resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt("A", shard=Shard(model_id=model_id, start_layer=0, end_layer=31, n_layers=32), prompt=prompt) - next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( - "A", - shard=Shard(model_id=model_id, start_layer=0, end_layer=31, n_layers=32), - input_data=resp_full, - inference_state=inference_state_full, - ) - - resp1, inference_state_1, _ = await inference_engine_1.infer_prompt("B", shard=Shard(model_id=model_id, start_layer=0, end_layer=30, n_layers=32), prompt=prompt) - resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( - "B", - shard=Shard(model_id=model_id, start_layer=31, end_layer=31, n_layers=32), - input_data=resp1, - inference_state=inference_state_1, - ) - resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( - "B", - shard=Shard(model_id=model_id, start_layer=0, end_layer=30, n_layers=32), - input_data=resp2, - inference_state=inference_state_2, - ) - resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( - "B", - shard=Shard(model_id=model_id, start_layer=31, end_layer=31, n_layers=32), - input_data=resp3, - inference_state=inference_state_3, - ) - - print(f"{resp2=}") - print(f"full: {_tokenizer.decode(resp_full)}") - print(f"next full: {_tokenizer.decode(next_resp_full)}") - print(f"resp2: {_tokenizer.decode(resp2)}") - print(f"{resp4=}") - print(f"resp4: {_tokenizer.decode(resp4)}") - - assert np.array_equal(resp_full, resp2) - assert np.array_equal(next_resp_full, resp4) - - -asyncio.run(test_inference_engine( - TinygradDynamicShardInferenceEngine(), - TinygradDynamicShardInferenceEngine(), - "llama3-8b-sfr", -)) diff --git a/build/lib/exo/inference/inference_engine.py b/build/lib/exo/inference/inference_engine.py deleted file mode 100644 index c5dfc0e30..000000000 --- a/build/lib/exo/inference/inference_engine.py +++ /dev/null @@ -1,34 +0,0 @@ -import numpy as np -import os - -from typing import Tuple, Optional -from abc import ABC, abstractmethod -from .shard import Shard - - -class InferenceEngine(ABC): - @abstractmethod - async def infer_prompt(self, request_id: str, shard: Shard, prompt: str, image_str: Optional[str] = None, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: - pass - - @abstractmethod - async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarray, inference_state: Optional[str] = None) -> Tuple[np.ndarray, str, bool]: - pass - - -def get_inference_engine(inference_engine_name: str, shard_downloader: 'ShardDownloader'): - if inference_engine_name == "mlx": - from exo.inference.mlx.sharded_inference_engine import MLXDynamicShardInferenceEngine - - return MLXDynamicShardInferenceEngine(shard_downloader) - elif inference_engine_name == "tinygrad": - from exo.inference.tinygrad.inference import TinygradDynamicShardInferenceEngine - import tinygrad.helpers - tinygrad.helpers.DEBUG.value = int(os.getenv("TINYGRAD_DEBUG", default="0")) - - return TinygradDynamicShardInferenceEngine(shard_downloader) - elif inference_engine_name == "torch": - from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine - return PyTorchDynamicShardInferenceEngine(shard_downloader) - else: - raise ValueError(f"Inference engine {inference_engine_name} not supported") diff --git a/build/lib/exo/inference/mlx/__init__.py b/build/lib/exo/inference/mlx/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/build/lib/exo/inference/mlx/models/__init__.py b/build/lib/exo/inference/mlx/models/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/build/lib/exo/inference/mlx/models/base.py b/build/lib/exo/inference/mlx/models/base.py deleted file mode 100644 index a1f1878cb..000000000 --- a/build/lib/exo/inference/mlx/models/base.py +++ /dev/null @@ -1,9 +0,0 @@ -from typing import Optional -import mlx.core as mx -import mlx.nn as nn -from mlx_lm.models.base import KVCache - - -class IdentityBlock(nn.Module): - def __call__(self, x: mx.array, mask: Optional[mx.array] = None, cache: Optional[KVCache] = None) -> mx.array: - return x diff --git a/build/lib/exo/inference/mlx/models/deepseek_v2.py b/build/lib/exo/inference/mlx/models/deepseek_v2.py deleted file mode 100644 index 9ea271edf..000000000 --- a/build/lib/exo/inference/mlx/models/deepseek_v2.py +++ /dev/null @@ -1,127 +0,0 @@ -from dataclasses import dataclass, field -from typing import Optional - -import mlx.core as mx -import mlx.nn as nn - -from mlx_lm.models.base import KVCache -from mlx_lm.models.deepseek_v2 import ModelArgs, DeepseekV2DecoderLayer -from .base import IdentityBlock -from exo.inference.shard import Shard - - -@dataclass -class ModelArgs(ModelArgs): - shard: Shard = field(default_factory=lambda: Shard("", 0, 0, 0)) - - def __post_init__(self): - if isinstance(self.shard, Shard): - return - if not isinstance(self.shard, dict): - raise TypeError(f"Expected shard to be a Shard instance or a dict, got {type(self.shard)} instead") - - self.shard = Shard(**self.shard) - - -class DeepseekV2Model(nn.Module): - def __init__(self, config: ModelArgs): - super().__init__() - self.args = config - self.num_hidden_layers = config.num_hidden_layers - self.vocab_size = config.vocab_size - if self.args.shard.is_first_layer(): - self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size) - - self.layers = [] - for i in range(self.num_hidden_layers): - if self.args.shard.start_layer <= i <= self.args.shard.end_layer: - self.layers.append(DeepseekV2DecoderLayer(config, i)) - else: - self.layers.append(IdentityBlock()) - - if self.args.shard.is_last_layer(): - self.norm = nn.RMSNorm(config.hidden_size, eps=config.rms_norm_eps) - - def __call__( - self, - x: mx.array, - cache: Optional[KVCache] = None, - ) -> mx.array: - if self.args.shard.is_first_layer(): - h = self.embed_tokens(x) - else: - h = x - - mask = None - T = h.shape[1] - if T > 1: - mask = nn.MultiHeadAttention.create_additive_causal_mask(T) - mask = mask.astype(h.dtype) - - if cache is None: - cache = [None]*len(self.layers) - - for layer, c in zip(self.layers, cache): - h = layer(h, mask, c) - - if self.args.shard.is_last_layer(): - h = self.norm(h) - return h - - -class Model(nn.Module): - def __init__(self, config: ModelArgs): - super().__init__() - self.args = config - self.model_type = config.model_type - self.model = DeepseekV2Model(config) - if self.args.shard.is_last_layer(): - self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) - - def __call__( - self, - inputs: mx.array, - cache: Optional[KVCache] = None, - ): - out = self.model(inputs, cache) - if self.args.shard.is_last_layer(): - return self.lm_head(out) - return out - - def sanitize(self, weights): - shard_state_dict = {} - - for key, value in weights.items(): - if key.startswith('model.layers.'): - layer_num = int(key.split('.')[2]) - if self.args.shard.start_layer <= layer_num <= self.args.shard.end_layer: - shard_state_dict[key] = value - elif self.args.shard.is_first_layer() and key.startswith('model.embed_tokens'): - shard_state_dict[key] = value - elif self.args.shard.is_last_layer() and (key.startswith('model.norm') or key.startswith('lm_head')): - shard_state_dict[key] = value - - for l in range(self.args.num_hidden_layers): - prefix = f"model.layers.{l}" - for n, m in [("w1", "gate_proj"), ("w2", "down_proj"), ("w3", "up_proj")]: - for k in ["weight", "scales", "biases"]: - if f"{prefix}.mlp.experts.0.{m}.{k}" in shard_state_dict: - to_join = [shard_state_dict.pop(f"{prefix}.mlp.experts.{e}.{m}.{k}") for e in range(self.args.n_routed_experts)] - shard_state_dict[f"{prefix}.mlp.switch_mlp.{m}.{k}"] = mx.stack(to_join) - - return shard_state_dict - - @property - def layers(self): - return self.model.layers - - @property - def head_dim(self): - return ( - self.args.qk_nope_head_dim + self.args.qk_rope_head_dim, - self.args.v_head_dim, - ) - - @property - def n_kv_heads(self): - return self.args.num_key_value_heads diff --git a/build/lib/exo/inference/mlx/models/llama.py b/build/lib/exo/inference/mlx/models/llama.py deleted file mode 100644 index 719d6a886..000000000 --- a/build/lib/exo/inference/mlx/models/llama.py +++ /dev/null @@ -1,125 +0,0 @@ -from dataclasses import dataclass, field - -import mlx.core as mx -import mlx.nn as nn - -from mlx_lm.models.base import create_attention_mask -from mlx_lm.models.llama import TransformerBlock, ModelArgs - -from ...shard import Shard -from .base import IdentityBlock - - -@dataclass -class ModelArgs(ModelArgs): - shard: Shard = field(default_factory=lambda: Shard("", 0, 0, 0)) - - def __post_init__(self): - super().__post_init__() # Ensure parent initializations are respected - - if isinstance(self.shard, Shard): - return - if not isinstance(self.shard, dict): - raise TypeError(f"Expected shard to be a Shard instance or a dict, got {type(self.shard)} instead") - - self.shard = Shard(**self.shard) - - -class LlamaModel(nn.Module): - def __init__(self, args: ModelArgs): - super().__init__() - self.args = args - self.vocab_size = args.vocab_size - self.num_hidden_layers = args.num_hidden_layers - assert self.vocab_size > 0 - if self.args.shard.is_first_layer(): - self.embed_tokens = nn.Embedding(args.vocab_size, args.hidden_size) - self.layers = [] - for i in range(self.num_hidden_layers): - if self.args.shard.start_layer <= i <= self.args.shard.end_layer: - self.layers.append(TransformerBlock(args=args)) - else: - self.layers.append(IdentityBlock()) - if self.args.shard.is_last_layer(): - self.norm = nn.RMSNorm(args.hidden_size, eps=args.rms_norm_eps) - - def __call__( - self, - inputs: mx.array, - cache=None, - ): - if self.args.shard.is_first_layer(): - h = self.embed_tokens(inputs) - else: - h = inputs - - mask = None - if h.shape[1] > 1: - mask = create_attention_mask(h, cache) - - if cache is None: - cache = [None]*len(self.layers) - - for layer, c in zip(self.layers, cache): - h = layer(h, mask, cache=c) - - if self.args.shard.is_last_layer(): - h = self.norm(h) - return h - - -class Model(nn.Module): - def __init__(self, args: ModelArgs): - super().__init__() - self.args = args - self.model_type = args.model_type - self.model = LlamaModel(args) - if self.args.shard.is_last_layer(): - if not args.tie_word_embeddings: - self.lm_head = nn.Linear(args.hidden_size, args.vocab_size, bias=False) - - def __call__( - self, - inputs: mx.array, - cache=None, - ): - out = self.model(inputs, cache) - if self.args.shard.is_last_layer(): - if self.args.tie_word_embeddings: - out = self.model.embed_tokens.as_linear(out) - else: - out = self.lm_head(out) - return out - - def sanitize(self, weights): - shard_state_dict = {} - - for key, value in weights.items(): - if "self_attn.rotary_emb.inv_freq" in key: - continue - if key.startswith('model.layers.'): - layer_num = int(key.split('.')[2]) - if self.args.shard.start_layer <= layer_num <= self.args.shard.end_layer: - shard_state_dict[key] = value - elif self.args.shard.is_first_layer() and key.startswith('model.embed_tokens'): - shard_state_dict[key] = value - elif (self.args.shard.is_last_layer() and self.args.tie_word_embeddings) and key.startswith('model.embed_tokens'): - shard_state_dict[key] = value - elif (self.args.shard.is_last_layer() and not self.args.tie_word_embeddings) and key.startswith('lm_head'): - shard_state_dict[key] = value - elif self.args.shard.is_last_layer() and (key.startswith('model.norm')): - shard_state_dict[key] = value - - return shard_state_dict - - @property - def layers(self): - return self.model.layers - - @property - def head_dim(self): - return (self.args.head_dim or self.args.hidden_size // self.args.num_attention_heads) - - @property - def n_kv_heads(self): - return self.args.num_key_value_heads diff --git a/build/lib/exo/inference/mlx/models/llava.py b/build/lib/exo/inference/mlx/models/llava.py deleted file mode 100644 index b734b09b4..000000000 --- a/build/lib/exo/inference/mlx/models/llava.py +++ /dev/null @@ -1,585 +0,0 @@ -# Copyright © 2024 Apple Inc. - -import math -import inspect -from dataclasses import dataclass, field -from typing import Optional, Dict, Union - -import mlx.core as mx -import mlx.nn as nn -from mlx_lm.models.base import BaseModelArgs, KVCache -from exo.inference.shard import Shard -from .base import IdentityBlock -import numpy as np - - -@dataclass -class VisionConfig: - model_type: str - num_hidden_layers: int = 24 - hidden_size: int = 1024 - intermediate_size: int = 4096 - num_attention_heads: int = 16 - image_size: int = 336 - patch_size: int = 14 - projection_dim: int = 768 - vocab_size: int = 32000 - num_channels: int = 3 - layer_norm_eps: float = 1e-5 - - @classmethod - def from_dict(cls, params): - return cls(**{k: v for k, v in params.items() if k in inspect.signature(cls).parameters}) - - -class VisionAttention(nn.Module): - def __init__( - self, - dims: int, - num_heads: int, - query_input_dims: Optional[int] = None, - key_input_dims: Optional[int] = None, - value_input_dims: Optional[int] = None, - value_dims: Optional[int] = None, - value_output_dims: Optional[int] = None, - bias: bool = False, - ): - super().__init__() - - if (dims % num_heads) != 0: - raise ValueError("The input feature dimensions should be divisible by the " - f"number of heads ({dims} % {num_heads}) != 0") - - query_input_dims = query_input_dims or dims - key_input_dims = key_input_dims or dims - value_input_dims = value_input_dims or key_input_dims - value_dims = value_dims or dims - value_output_dims = value_output_dims or dims - - self.num_heads = num_heads - self.q_proj = nn.Linear(query_input_dims, dims, bias=bias) - self.k_proj = nn.Linear(key_input_dims, dims, bias=bias) - self.v_proj = nn.Linear(value_input_dims, value_dims, bias=bias) - self.out_proj = nn.Linear(value_dims, value_output_dims, bias=bias) - - def __call__(self, queries, keys, values, mask=None): - queries = self.q_proj(queries) - keys = self.k_proj(keys) - values = self.v_proj(values) - - num_heads = self.num_heads - B, L, D = queries.shape - _, S, _ = keys.shape - queries = queries.reshape(B, L, num_heads, -1).transpose(0, 2, 1, 3) - keys = keys.reshape(B, S, num_heads, -1).transpose(0, 2, 3, 1) - values = values.reshape(B, S, num_heads, -1).transpose(0, 2, 1, 3) - - scale = math.sqrt(1/queries.shape[-1]) - scores = (queries*scale) @ keys - if mask is not None: - scores = scores + mask.astype(scores.dtype) - scores = mx.softmax(scores, axis=-1) - values_hat = (scores @ values).transpose(0, 2, 1, 3).reshape(B, L, -1) - - return self.out_proj(values_hat) - - -class VisionMLP(nn.Module): - def __init__(self, config: VisionConfig): - super().__init__() - self.activation_fn = nn.GELU(approx="fast") - self.fc1 = nn.Linear(config.hidden_size, config.intermediate_size) - self.fc2 = nn.Linear(config.intermediate_size, config.hidden_size) - - def __call__(self, x: mx.array) -> mx.array: - x = self.activation_fn(self.fc1(x)) - x = self.fc2(x) - return x - - -class VisionEncoderLayer(nn.Module): - def __init__(self, config: VisionConfig): - super().__init__() - self.embed_dim = config.hidden_size - self.self_attn = VisionAttention(config.hidden_size, config.num_attention_heads, bias=True) - self.layer_norm1 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps) - self.mlp = VisionMLP(config) - self.layer_norm2 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps) - - def __call__(self, x: mx.array, mask: Optional[mx.array] = None) -> mx.array: - y = self.layer_norm1(x) - y = self.self_attn(y, y, y, mask) - x = x + y - y = self.layer_norm2(x) - y = self.mlp(y) - return x + y - - -class VisionEncoder(nn.Module): - def __init__(self, config: VisionConfig): - super().__init__() - self.layers = [VisionEncoderLayer(config) for _ in range(config.num_hidden_layers)] - - -class VisionEmbeddings(nn.Module): - def __init__(self, config: VisionConfig): - super().__init__() - self.config = config - self.embed_dim = config.hidden_size - self.image_size = config.image_size - self.patch_size = config.patch_size - - self.class_embedding = mx.zeros((config.hidden_size,)) - - self.patch_embedding = nn.Conv2d( - in_channels=config.num_channels, - out_channels=self.embed_dim, - kernel_size=self.patch_size, - stride=self.patch_size, - bias=False, - ) - - self.num_patches = (self.image_size // self.patch_size)**2 - self.num_positions = self.num_patches + 1 - self.position_embedding = nn.Embedding(self.num_positions, self.embed_dim) - - def __call__(self, x: mx.array) -> mx.array: - batch_size = x.shape[0] - patch_embeddings = self.patch_embedding(x) - patch_embeddings = mx.flatten(patch_embeddings, start_axis=1, end_axis=2) - embed_dim = patch_embeddings.shape[-1] - cls_embeddings = mx.broadcast_to(self.class_embedding, (batch_size, 1, embed_dim)) - embeddings = mx.concatenate((cls_embeddings, patch_embeddings), axis=1) - embeddings += self.position_embedding.weight - return embeddings - - -class ClipVisionModel(nn.Module): - def __init__(self, config: VisionConfig): - super().__init__() - self.embeddings = VisionEmbeddings(config) - self.pre_layrnorm = nn.LayerNorm(config.hidden_size) - self.encoder = VisionEncoder(config) - self.post_layernorm = nn.LayerNorm(config.hidden_size) - - def __call__( - self, - x: mx.array, - output_hidden_states: Optional[bool] = None, - ) -> mx.array: - x = self.embeddings(x) - x = self.pre_layrnorm(x) - - encoder_states = (x,) if output_hidden_states else None - - for l in self.encoder.layers: - x = l(x, mask=None) - if output_hidden_states: - encoder_states = encoder_states + (x,) - - pooler_output = self.post_layernorm(x[:, 0, :]) - return pooler_output, x, encoder_states - - -class VisionModel(nn.Module): - def __init__(self, config: VisionConfig): - super().__init__() - - self.model_type = config.model_type - if self.model_type != "clip_vision_model": - raise ValueError(f"Unsupported model type: {self.model_type}") - - self.vision_model = ClipVisionModel(config) - - def __call__(self, x: mx.array, output_hidden_states: Optional[bool] = None) -> mx.array: - return self.vision_model(x, output_hidden_states) - - def sanitize(self, weights): - sanitized_weights = {} - for k, v in weights.items(): - if "position_ids" in k: - # Remove unused position_ids - continue - elif "patch_embedding.weight" in k: - # PyTorch conv2d weight tensors have shape: - # [out_channels, in_channels, kH, KW] - # MLX conv2d expects the weight be of shape: - # [out_channels, kH, KW, in_channels] - sanitized_weights[k] = v.transpose(0, 2, 3, 1) - else: - sanitized_weights[k] = v - - return sanitized_weights - - -@dataclass -class TextConfig: - model_type: str - hidden_size: int = 4096 - num_hidden_layers: int = 32 - intermediate_size: int = 11008 - num_attention_heads: int = 32 - head_dim: int = None - rms_norm_eps: float = 1e-6 - vocab_size: int = 32000 - num_key_value_heads: int = None - rope_theta: float = 10000 - rope_traditional: bool = False - rope_scaling: Optional[Dict[str, Union[float, str]]] = None - - @classmethod - def from_dict(cls, params): - return cls(**{k: v for k, v in params.items() if k in inspect.signature(cls).parameters}) - - def __post_init__(self): - if self.num_key_value_heads is None: - self.num_key_value_heads = self.num_attention_heads - - if self.head_dim is None: - self.head_dim = self.hidden_size // self.num_attention_heads - - if self.model_type is None: - self.model_type = "llama" - - if self.rope_scaling: - required_keys = {"factor", "type"} - if not all(key in self.rope_scaling for key in required_keys): - raise ValueError(f"rope_scaling must contain keys {required_keys}") - - if self.rope_scaling["type"] != "linear": - raise ValueError("rope_scaling 'type' currently only supports 'linear'") - - -class TextAttention(nn.Module): - def __init__(self, config: TextConfig): - super().__init__() - - dim = config.hidden_size - self.n_heads = n_heads = config.num_attention_heads - self.n_kv_heads = n_kv_heads = config.num_key_value_heads - - self.repeats = n_heads // n_kv_heads - - head_dim = config.hidden_size // n_heads - self.scale = head_dim**-0.5 - - self.q_proj = nn.Linear(dim, n_heads*head_dim, bias=False) - self.k_proj = nn.Linear(dim, n_kv_heads*head_dim, bias=False) - self.v_proj = nn.Linear(dim, n_kv_heads*head_dim, bias=False) - self.o_proj = nn.Linear(n_heads*head_dim, dim, bias=False) - - rope_scale = (1/config.rope_scaling["factor"] if config.rope_scaling is not None and config.rope_scaling["type"] == "linear" else 1) - self.rope = nn.RoPE( - head_dim, - traditional=config.rope_traditional, - base=config.rope_theta, - scale=rope_scale, - ) - - def __call__( - self, - x: mx.array, - mask: Optional[mx.array] = None, - cache: Optional[KVCache] = None, - ) -> mx.array: - B, L, D = x.shape - - queries, keys, values = self.q_proj(x), self.k_proj(x), self.v_proj(x) - - # Prepare the queries, keys and values for the attention computation - queries = queries.reshape(B, L, self.n_heads, -1).transpose(0, 2, 1, 3) - keys = keys.reshape(B, L, self.n_kv_heads, -1).transpose(0, 2, 1, 3) - values = values.reshape(B, L, self.n_kv_heads, -1).transpose(0, 2, 1, 3) - - if cache is not None: - queries = self.rope(queries, offset=cache.offset) - keys = self.rope(keys, offset=cache.offset) - keys, values = cache.update_and_fetch(keys, values) - else: - queries = self.rope(queries) - keys = self.rope(keys) - - output = mx.fast.scaled_dot_product_attention(queries, keys, values, scale=self.scale, mask=mask) - output = output.transpose(0, 2, 1, 3).reshape(B, L, -1) - return self.o_proj(output) - - -class TextMLP(nn.Module): - def __init__(self, dim, hidden_dim): - super().__init__() - self.gate_proj = nn.Linear(dim, hidden_dim, bias=False) - self.down_proj = nn.Linear(hidden_dim, dim, bias=False) - self.up_proj = nn.Linear(dim, hidden_dim, bias=False) - - def __call__(self, x) -> mx.array: - return self.down_proj(nn.silu(self.gate_proj(x))*self.up_proj(x)) - - -class TransformerBlock(nn.Module): - def __init__(self, config: TextConfig): - super().__init__() - self.num_attention_heads = config.num_attention_heads - self.hidden_size = config.hidden_size - self.self_attn = TextAttention(config) - self.mlp = TextMLP(config.hidden_size, config.intermediate_size) - self.input_layernorm = nn.RMSNorm(config.hidden_size, eps=config.rms_norm_eps) - self.post_attention_layernorm = nn.RMSNorm(config.hidden_size, eps=config.rms_norm_eps) - self.config = config - - def __call__( - self, - x: mx.array, - mask: Optional[mx.array] = None, - cache: Optional[KVCache] = None, - ) -> mx.array: - r = self.self_attn(self.input_layernorm(x), mask, cache) - h = x + r - r = self.mlp(self.post_attention_layernorm(h)) - out = h + r - return out - - -class Llama(nn.Module): - def __init__(self, config: TextConfig, shard: Shard): - super().__init__() - self.config = config - self.shard = shard - self.vocab_size = config.vocab_size - self.model_type = config.model_type - self.num_hidden_layers = config.num_hidden_layers - self.num_key_value_heads = config.num_key_value_heads - self.head_dim = config.head_dim - assert self.vocab_size > 0 - if self.shard.is_first_layer(): - self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size) - self.layers = [] - for i in range(self.num_hidden_layers): - if self.shard.start_layer <= i <= self.shard.end_layer: - self.layers.append(TransformerBlock(config=config)) - else: - self.layers.append(IdentityBlock()) - if self.shard.is_last_layer(): - self.norm = nn.RMSNorm(config.hidden_size, eps=config.rms_norm_eps) - - def __call__( - self, - inputs: mx.array, - cache=None, - inputs_embeds=None, - ): - # for passing merged input embeddings - if inputs_embeds is None: - if self.shard.is_first_layer(): - h = self.embed_tokens(inputs) - else: - h = inputs - else: - h = inputs_embeds - - mask = None - if h.shape[1] > 1: - mask = nn.MultiHeadAttention.create_additive_causal_mask(h.shape[1]) - mask = mask.astype(h.dtype) - - if cache is None: - cache = [None]*len(self.layers) - - for layer, c in zip(self.layers, cache): - h = layer(h, mask, c) - - if self.shard.is_last_layer(): - h = self.norm(h) - return h - - -class LanguageModel(nn.Module): - def __init__(self, config: TextConfig, shard: Shard): - super().__init__() - self.model_type = config.model_type - if self.model_type != "llama": - raise ValueError(f"Model type {self.model_type} not supported. Currently only 'llama' is supported") - self.shard = shard - self.model = Llama(config, shard) - if self.shard.is_last_layer(): - self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) - - def __call__( - self, - inputs: mx.array, - cache=None, - inputs_embeds=None, - ): - out = self.model(inputs, cache, inputs_embeds) - if self.shard.is_last_layer(): - out = self.lm_head(out) - return out - - def sanitize(self, weights): - shard_state_dict = {} - for key, value in weights.items(): - if "self_attn.rotary_emb.inv_freq" in key: - continue - - if key.startswith('language_model.model.layers.'): - layer_num = int(key.split('.')[3]) - if layer_num < self.shard.start_layer or layer_num > self.shard.end_layer: - continue - if not self.shard.is_first_layer() and key.startswith('language_model.model.embed_tokens'): - continue - elif not self.shard.is_last_layer() and (key.startswith('language_model.model.norm') or key.startswith('language_model.lm_head')): - continue - - shard_state_dict[key] = value - - return shard_state_dict - - -@dataclass -class LlaVAConfig(BaseModelArgs): - text_config: TextConfig - vision_config: VisionConfig = None - model_type: str = "llava" - ignore_index: int = -100 - image_token_index: int = 32000 - vision_feature_select_strategy: str = "default" - vision_feature_layer: int = -2 - vocab_size: int = 32000 - - @classmethod - def from_dict(cls, params): - updated_params = {} - class_params = inspect.signature(cls).parameters - for k, v in params.items(): - if k in class_params: - if k in ["text_config", "vision_config"]: - v = class_params[k].annotation.from_dict(v) - updated_params.update({k: v}) - - return cls(**updated_params) - - -@dataclass -class ModelArgs(LlaVAConfig): - shard: Shard = field(default_factory=lambda: Shard("", 0, 0, 0)) - - def __post_init__(self): - if isinstance(self.shard, dict): - self.shard = Shard(**self.shard) - - if not isinstance(self.shard, Shard): - raise TypeError(f"Expected shard to be a Shard instance or a dict, got {type(self.shard)} instead") - - if not self.shard.is_first_layer(): - self.vision_config = None - - -class LlavaMultiModalProjector(nn.Module): - def __init__(self, config: LlaVAConfig): - super().__init__() - self.linear_1 = nn.Linear(config.vision_config.hidden_size, config.text_config.hidden_size, bias=True) - self.gelu = nn.GELU() - self.linear_2 = nn.Linear(config.text_config.hidden_size, config.text_config.hidden_size, bias=True) - - def __call__(self, x: mx.array) -> mx.array: - x = self.linear_1(x) - x = self.gelu(x) - x = self.linear_2(x) - return x - - -class Model(nn.Module): - def __init__(self, config: ModelArgs): - super().__init__() - self.config = config - self.model_type = config.model_type - if config.vision_config: - self.vision_tower = VisionModel(config.vision_config) - self.multi_modal_projector = LlavaMultiModalProjector(config) - self.vision_feature_layer = config.vision_feature_layer - self.vision_feature_select_strategy = config.vision_feature_select_strategy - self.language_model = LanguageModel(config.text_config, config.shard) - - def get_input_embeddings( - self, - input_ids: Optional[mx.array] = None, - pixel_values: Optional[mx.array] = None, - ): - if pixel_values is None: - return self.language_model(input_ids) - - # Get the input embeddings from the language model - inputs_embeds = self.language_model.model.embed_tokens(input_ids) - - # Get the ouptut hidden states from the vision model - *_, hidden_states = self.vision_tower(pixel_values.transpose(0, 2, 3, 1), output_hidden_states=True) - - # Select the hidden states from the desired layer - selected_image_feature = hidden_states[self.vision_feature_layer] - - if self.vision_feature_select_strategy == "default": - selected_image_feature = selected_image_feature[:, 1:] - elif self.vision_feature_select_strategy == "full": - selected_image_feature = selected_image_feature - else: - raise ValueError("Unexpected feature selection strategy: " - f"{self.vision_feature_select_strategy}") - - # Pass image features through the multi-modal projector - image_features = self.multi_modal_projector(selected_image_feature) - - # Insert special image tokens in the input_ids - final_inputs_embeds = self._merge_input_ids_with_image_features(image_features, inputs_embeds, input_ids) - return final_inputs_embeds - - def _merge_input_ids_with_image_features(self, image_features, inputs_embeds, input_ids): - image_token_index = self.config.image_token_index - num_images, num_image_patches, embed_dim = image_features.shape - - # Positions of tokens in input_ids, assuming batch size is 1 - image_positions = np.where(input_ids[0] == image_token_index)[0].tolist() - - if len(image_positions) != num_images: - raise ValueError(f"The number of image tokens ({len(image_positions)}) does not " - f" match the number of image inputs ({num_images}).") - - text_segments = [] - start_idx = 0 - - for position in image_positions: - text_segments.append(inputs_embeds[:, start_idx:position]) - start_idx = position + 1 - - image_embeddings = mx.split(image_features, image_features.shape[0]) - final_embeddings = [v for p in zip(text_segments, image_embeddings) for v in p] - final_embeddings += [inputs_embeds[:, start_idx:]] - - # Create a final embedding of shape - # (1, num_image_patches*num_images + sequence_len, embed_dim) - return mx.concatenate(final_embeddings, axis=1) - - def __call__(self, input_ids: mx.array, pixel_values: mx.array = None, cache=None): - input_embddings = None - if pixel_values is not None: - input_embddings = self.get_input_embeddings(input_ids, pixel_values) - logits = self.language_model(input_ids, cache=cache, inputs_embeds=input_embddings) - return logits - - def sanitize(self, weights): - if self.config.vision_config: - weights = self.vision_tower.sanitize(weights) - else: - weights = {k: v for k, v in weights.items() if not k.startswith(('vision_tower', 'multi_modal_projector', 'vision_feature_layer', 'vision_feature_select_strategy'))} - weights = self.language_model.sanitize(weights) - return weights - - @property - def layers(self): - return self.language_model.model.layers - - @property - def head_dim(self): - return (self.language_model.model.head_dim or self.language_model.model.hidden_size // self.language_model.model.num_attention_heads) - - @property - def n_kv_heads(self): - return self.language_model.model.num_key_value_heads diff --git a/build/lib/exo/inference/mlx/sharded_inference_engine.py b/build/lib/exo/inference/mlx/sharded_inference_engine.py deleted file mode 100644 index 40cabfeb6..000000000 --- a/build/lib/exo/inference/mlx/sharded_inference_engine.py +++ /dev/null @@ -1,40 +0,0 @@ -import numpy as np -import mlx.core as mx -from ..inference_engine import InferenceEngine -from .sharded_model import StatefulShardedModel -from .sharded_utils import load_shard, get_image_from_str -from ..shard import Shard -from typing import Optional -from exo.download.shard_download import ShardDownloader - - -class MLXDynamicShardInferenceEngine(InferenceEngine): - def __init__(self, shard_downloader: ShardDownloader): - self.shard = None - self.shard_downloader = shard_downloader - - async def infer_prompt(self, request_id: str, shard: Shard, prompt: str, image_str: Optional[str] = None, inference_state: Optional[str] = None) -> (np.ndarray, str, bool): - await self.ensure_shard(shard) - if image_str: - image = await get_image_from_str(image_str) - inputs = self.tokenizer(prompt, image, return_tensors="np") - pixel_values = mx.array(inputs["pixel_values"]) - input_ids = mx.array(inputs["input_ids"]) - output_data: np.ndarray = np.array(self.stateful_sharded_model.step(request_id, input_ids, pixel_values)) - else: - output_data: np.ndarray = np.array(self.stateful_sharded_model.step(request_id, mx.array(self.tokenizer.encode(prompt)))) - return output_data, "", output_data.size == 1 and output_data.item() == self.tokenizer.eos_token_id - - async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarray, inference_state: Optional[str] = None) -> (np.ndarray, str, bool): - await self.ensure_shard(shard) - output_data: np.ndarray = np.array(self.stateful_sharded_model.step(request_id, mx.array(input_data))) - return output_data, "", output_data.size == 1 and output_data.item() == self.tokenizer.eos_token_id - - async def ensure_shard(self, shard: Shard): - if self.shard == shard: - return - - model_path = await self.shard_downloader.ensure_shard(shard) - model_shard, self.tokenizer = await load_shard(model_path, shard) - self.stateful_sharded_model = StatefulShardedModel(shard, model_shard) - self.shard = shard diff --git a/build/lib/exo/inference/mlx/sharded_model.py b/build/lib/exo/inference/mlx/sharded_model.py deleted file mode 100644 index c4570fbf6..000000000 --- a/build/lib/exo/inference/mlx/sharded_model.py +++ /dev/null @@ -1,86 +0,0 @@ -from typing import Dict, Generator, Optional, Tuple -from collections import OrderedDict - -import mlx.core as mx -import mlx.nn as nn -from mlx_lm.models.base import KVCache, RotatingKVCache -from mlx_lm.sample_utils import top_p_sampling - -from ..shard import Shard - - -class StatefulShardedModel: - def __init__(self, shard: Shard, model: nn.Module, max_kv_size: int = 1024, max_caches: int = 2): - self.shard = shard - self.model = model - self.max_kv_size = max_kv_size - self.max_caches = max_caches - self.caches = OrderedDict() - - def step( - self, - request_id: str, - x, - pixel_values=None, - temp: float = 0.0, - top_p: float = 1.0, - logit_bias: Optional[Dict[int, float]] = None, - ) -> Generator[Tuple[mx.array, mx.array], None, None]: - def sample(logits: mx.array) -> Tuple[mx.array, float]: - if logit_bias: - indices = mx.array(list(logit_bias.keys())) - values = mx.array(list(logit_bias.values())) - logits[:, indices] += values - - if temp == 0: - token = mx.argmax(logits, axis=-1) - else: - if top_p > 0 and top_p < 1.0: - token = top_p_sampling(logits, top_p, temp) - else: - token = mx.random.categorical(logits*(1/temp)) - - return token - - y = x - - if request_id not in self.caches: - self.init_cache(request_id) - else: - self.caches.move_to_end(request_id) - - cache = self.caches[request_id] - - if pixel_values is None: - output = self.model(y[None] if self.shard.is_first_layer() else y, cache=cache) - else: - output = self.model(y, pixel_values=pixel_values, cache=cache) - - if self.shard.is_last_layer(): - logits = output[:, -1, :] - y = sample(logits) - return y - else: - return output - - def __call__( - self, - request_id: str, - x, - temp: float = 0.0, - top_p: float = 1.0, - logit_bias: Optional[Dict[int, float]] = None, - ) -> Generator[Tuple[mx.array, mx.array], None, None]: - return self.step(request_id, x, temp=temp, top_p=top_p, logit_bias=logit_bias) - - def init_cache(self, request_id: str): - kv_heads = ([self.model.n_kv_heads]*len(self.model.layers) if isinstance(self.model.n_kv_heads, int) else self.model.n_kv_heads) - if self.max_kv_size is not None: - cache = [RotatingKVCache(self.model.head_dim, n, max_size=self.max_kv_size, keep=4) for n in kv_heads] - else: - cache = [KVCache(self.model.head_dim, n) for n in kv_heads] - - if len(self.caches) >= self.max_caches: - self.caches.popitem(last=False) - - self.caches[request_id] = cache diff --git a/build/lib/exo/inference/mlx/sharded_utils.py b/build/lib/exo/inference/mlx/sharded_utils.py deleted file mode 100644 index 7fa38eaa6..000000000 --- a/build/lib/exo/inference/mlx/sharded_utils.py +++ /dev/null @@ -1,207 +0,0 @@ -# Adapted from https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/utils.py - -import glob -import importlib -import json -import logging -import asyncio -import aiohttp -from functools import partial -from pathlib import Path -from typing import Optional, Tuple, Union, List, Callable -from PIL import Image -from io import BytesIO -import base64 - -import mlx.core as mx -import mlx.nn as nn -from transformers import AutoProcessor - -from mlx_lm.tokenizer_utils import load_tokenizer, TokenizerWrapper -from mlx_lm.tuner.utils import apply_lora_layers - -from exo import DEBUG -from ..shard import Shard - - -class ModelNotFoundError(Exception): - def __init__(self, message): - self.message = message - super().__init__(self.message) - - -MODEL_REMAPPING = { - "mistral": "llama", # mistral is compatible with llama - "phi-msft": "phixtral", -} - - -def _get_classes(config: dict): - """ - Retrieve the model and model args classes based on the configuration. - - Args: - config (dict): The model configuration. - - Returns: - A tuple containing the Model class and the ModelArgs class. - """ - model_type = config["model_type"] - model_type = MODEL_REMAPPING.get(model_type, model_type) - try: - arch = importlib.import_module(f"exo.inference.mlx.models.{model_type}") - except ImportError: - msg = f"Model type {model_type} not supported." - logging.error(msg) - raise ValueError(msg) - - return arch.Model, arch.ModelArgs - - -def load_config(model_path: Path) -> dict: - try: - with open(model_path/"config.json", "r") as f: - config = json.load(f) - except FileNotFoundError: - logging.error(f"Config file not found in {model_path}") - raise - return config - - -def load_model_shard( - model_path: Path, - shard: Shard, - lazy: bool = False, - model_config: dict = {}, -) -> nn.Module: - """ - Load and initialize the model from a given path. - - Args: - model_path (Path): The path to load the model from. - lazy (bool): If False eval the model parameters to make sure they are - loaded in memory before returning, otherwise they will be loaded - when needed. Default: ``False`` - model_config(dict, optional): Configuration parameters for the model. - Defaults to an empty dictionary. - - Returns: - nn.Module: The loaded and initialized model. - - Raises: - FileNotFoundError: If the weight files (.safetensors) are not found. - ValueError: If the model class or args class are not found or cannot be instantiated. - """ - config = load_config(model_path) - config.update(model_config) - - # TODO hack - config["shard"] = { - "model_id": model_path.name, - "start_layer": shard.start_layer, - "end_layer": shard.end_layer, - "n_layers": shard.n_layers, - } - - weight_files = glob.glob(str(model_path/"model*.safetensors")) - - if not weight_files: - # Try weight for back-compat - weight_files = glob.glob(str(model_path/"weight*.safetensors")) - - if not weight_files: - logging.error(f"No safetensors found in {model_path}") - raise FileNotFoundError(f"No safetensors found in {model_path}") - - weights = {} - for wf in sorted(weight_files): - if DEBUG >= 8: - layer_nums = set() - for k in mx.load(wf): - if k.startswith("model.layers."): - layer_num = int(k.split(".")[2]) - layer_nums.add(layer_num) - if k.startswith("language_model.model.layers."): - layer_num = int(k.split(".")[3]) - layer_nums.add(layer_num) - print(f"\"{wf.split('/')[-1]}\": {sorted(layer_nums)},") - - weights.update(mx.load(wf)) - - model_class, model_args_class = _get_classes(config=config) - - model_args = model_args_class.from_dict(config) - model = model_class(model_args) - - if hasattr(model, "sanitize"): - weights = model.sanitize(weights) - - if (quantization := config.get("quantization", None)) is not None: - # Handle legacy models which may not have everything quantized - def class_predicate(p, m): - if not hasattr(m, "to_quantized"): - return False - return f"{p}.scales" in weights - - nn.quantize( - model, - **quantization, - class_predicate=class_predicate, - ) - - model.load_weights(list(weights.items()), strict=True) - - if not lazy: - mx.eval(model.parameters()) - - model.eval() - return model - - -async def load_shard( - model_path: str, - shard: Shard, - tokenizer_config={}, - model_config={}, - adapter_path: Optional[str] = None, - lazy: bool = False, -) -> Tuple[nn.Module, TokenizerWrapper]: - model = load_model_shard(model_path, shard, lazy, model_config) - if adapter_path is not None: - model = apply_lora_layers(model, adapter_path) - model.eval() - - # TODO: figure out a generic solution - if model.model_type == "llava": - processor = AutoProcessor.from_pretrained(model_path) - processor.eos_token_id = processor.tokenizer.eos_token_id - processor.encode = processor.tokenizer.encode - return model, processor - else: - tokenizer = load_tokenizer(model_path, tokenizer_config) - return model, tokenizer - - -async def get_image_from_str(_image_str: str): - image_str = _image_str.strip() - - if image_str.startswith("http"): - async with aiohttp.ClientSession() as session: - async with session.get(image_str, timeout=10) as response: - content = await response.read() - return Image.open(BytesIO(content)).convert("RGB") - elif image_str.startswith("data:image/"): - # Extract the image format and base64 data - format_prefix, base64_data = image_str.split(";base64,") - image_format = format_prefix.split("/")[1].lower() - if DEBUG >= 2: print(f"{image_str=} {image_format=}") - imgdata = base64.b64decode(base64_data) - img = Image.open(BytesIO(imgdata)) - - # Convert to RGB if not already - if img.mode != "RGB": - img = img.convert("RGB") - - return img - else: - raise ValueError("Invalid image_str format. Must be a URL or a base64 encoded image.") diff --git a/build/lib/exo/inference/mlx/test_sharded_llama.py b/build/lib/exo/inference/mlx/test_sharded_llama.py deleted file mode 100644 index 1c48b936c..000000000 --- a/build/lib/exo/inference/mlx/test_sharded_llama.py +++ /dev/null @@ -1,40 +0,0 @@ -import mlx.core as mx -from exo.inference.mlx.sharded_model import StatefulShardedModel -from exo.inference.mlx.sharded_utils import load_shard -from exo.inference.shard import Shard - -# 79, 80 for Llama-3-70B -shard_full = Shard("llama", 0, 31, 32) -shard1 = Shard("llama", 0, 12, 32) -shard2 = Shard("llama", 13, 31, 32) - -full_model_shard, full_tokenizer = load_shard("mlx-community/Meta-Llama-3-8B-Instruct-4bit", shard=shard_full) -model_shard1, tokenizer1 = load_shard("mlx-community/Meta-Llama-3-8B-Instruct-4bit", shard=shard1) -model_shard2, tokenizer2 = load_shard("mlx-community/Meta-Llama-3-8B-Instruct-4bit", shard=shard2) - -full = StatefulShardedModel(shard_full, full_model_shard) -m1 = StatefulShardedModel(shard1, model_shard1) -m2 = StatefulShardedModel(shard2, model_shard2) - -prompt = "write a beautiful haiku about a utopia where people own their AI with edge intelligence:" -prompt_tokens = mx.array(full_tokenizer.encode(prompt)) -max_tokens = 50 - -resp = prompt_tokens -full_generated_tokens = [] -for _ in range(max_tokens): - resp = full.step(resp) - full_generated_tokens.append(resp.item()) - -print("full response: ", full_tokenizer.decode(full_generated_tokens)) - -sharded_generated_tokens = [] -sharded_resp = prompt_tokens -for _ in range(max_tokens): - resp1 = m1.step(sharded_resp) - sharded_resp = m2.step(resp1) - sharded_generated_tokens.append(sharded_resp.item()) - -print("sharded response: ", tokenizer1.decode(sharded_generated_tokens)) - -assert tokenizer1.decode(full_generated_tokens) == tokenizer1.decode(sharded_generated_tokens) diff --git a/build/lib/exo/inference/mlx/test_sharded_llava.py b/build/lib/exo/inference/mlx/test_sharded_llava.py deleted file mode 100644 index 958a5acc8..000000000 --- a/build/lib/exo/inference/mlx/test_sharded_llava.py +++ /dev/null @@ -1,64 +0,0 @@ -import codecs -import asyncio -import requests -from PIL import Image -from io import BytesIO - -import mlx.core as mx -from mlx_lm.models.base import KVCache - -from exo.inference.mlx.sharded_model import StatefulShardedModel -from exo.inference.mlx.sharded_utils import load_shard -from exo.inference.shard import Shard - -shard_full = Shard("llava", 0, 31, 32) -shard1 = Shard("llava", 0, 12, 32) -shard2 = Shard("llava", 13, 31, 32) - -model_path = "llava-hf/llava-1.5-7b-hf" - -full_model_shard, full_processor = asyncio.run(load_shard(model_path, shard=shard_full)) -model_shard1, processor1 = asyncio.run(load_shard(model_path, shard=shard1)) -model_shard2, processor2 = asyncio.run(load_shard(model_path, shard=shard2)) - -full = StatefulShardedModel(shard_full, full_model_shard) -m1 = StatefulShardedModel(shard1, model_shard1) -m2 = StatefulShardedModel(shard2, model_shard2) - -PROMPT = "USER: \nWhat are these?\nASSISTANT:" -IMAGE_FILE = "http://images.cocodataset.org/val2017/000000039769.jpg" -response = requests.get(IMAGE_FILE) -img = Image.open(BytesIO(response.content)) -prompt = codecs.decode(PROMPT, "unicode_escape") -inputs = full_processor(prompt, img, return_tensors="np") -pixel_values = mx.array(inputs["pixel_values"]) -input_ids = mx.array(inputs["input_ids"]) - -print(prompt) -y = full.step("full", input_ids, pixel_values, temp=0) -full_generated_tokens = [y.item()] - -for _ in range(13): - y = full.step("full", y, temp=0) - full_generated_tokens.append(y.item()) - -full_response = full_processor.tokenizer.decode(full_generated_tokens) -print("full response:", full_response) - -inputs = processor1(prompt, img, return_tensors="np") -pixel_values = mx.array(inputs["pixel_values"]) -input_ids = mx.array(inputs["input_ids"]) - -y = m1.step("shard", input_ids, pixel_values, temp=0) -y = m2.step("shard", y, temp=0) -full_generated_tokens = [y.item()] - -for _ in range(13): - y = m1.step("shard", y, temp=0) - y = m2.step("shard", y, temp=0) - full_generated_tokens.append(y.item()) - -sharded_response = processor2.tokenizer.decode(full_generated_tokens) -print("sharded response:", sharded_response) - -assert full_response == sharded_response diff --git a/build/lib/exo/inference/mlx/test_sharded_model.py b/build/lib/exo/inference/mlx/test_sharded_model.py deleted file mode 100644 index c9743d078..000000000 --- a/build/lib/exo/inference/mlx/test_sharded_model.py +++ /dev/null @@ -1,52 +0,0 @@ -from exo.inference.shard import Shard -import mlx.core as mx -import mlx.nn as nn -from typing import Optional -import numpy as np - - -class DummyModel(nn.Module): - def __init__(self, shard: Optional[Shard] = None): - self.shard = shard - self.layers = [ - nn.Linear(8, 128), - nn.Linear(128, 128), - nn.Linear(128, 128), - nn.Linear(128, 128), - nn.Linear(128, 8), - ] - - self.n_kv_heads = 4 - self.head_dim = 4 - - def __call__(self, x, cache=None): - if self.shard: - for layer in self.layers[self.shard.start_layer:self.shard.end_layer + 1]: - x = layer(x) - if self.shard.is_last_layer(): - x = x.reshape((1, 2, 4)) - else: - for layer in self.layers: - x = layer(x) - x = x.reshape((1, 2, 4)) - - return x - - -model = DummyModel() -model.save_weights("./test_weights.npz") -n_layers = 5 -shard1 = Shard("test", 0, n_layers // 2, n_layers) -sharded_model1 = DummyModel(shard1) -shard2 = Shard("test", n_layers//2 + 1, n_layers - 1, n_layers) -sharded_model2 = DummyModel(shard2) - -model.load_weights("./test_weights.npz") -sharded_model1.load_weights("./test_weights.npz") -sharded_model2.load_weights("./test_weights.npz") - -fullresp = model(mx.array([1, 2, 3, 4, 5, 6, 7, 8])) -resp1 = sharded_model1(mx.array([1, 2, 3, 4, 5, 6, 7, 8])) -resp2 = sharded_model2(resp1) - -assert np.all(np.array(fullresp) == np.array(resp2)) diff --git a/build/lib/exo/inference/pytorch/__init__.py b/build/lib/exo/inference/pytorch/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/build/lib/exo/inference/pytorch/helpers.py b/build/lib/exo/inference/pytorch/helpers.py deleted file mode 100644 index addea2db7..000000000 --- a/build/lib/exo/inference/pytorch/helpers.py +++ /dev/null @@ -1,24 +0,0 @@ -# Helper functions for pytorch inference -# Some code coming from tinygrad but written towards pytorch - -import asyncio -import aiohttp -from tqdm import tqdm -from pathlib import Path -from typing import List - -async def fetch_file_async(session, url: str, output_path: Path): - async with session.get(url) as response: - response.raise_for_status() - with open(output_path, 'wb') as f: - async for chunk in response.content.iter_chunked(8192): - f.write(chunk) - -async def download_files(urls: List[str], output_paths: List[Path]): - async with aiohttp.ClientSession() as session: - tasks = [] - for url, output_path in zip(urls, output_paths): - tasks.append(fetch_file_async(session, url, output_path)) - - for f in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Downloading files"): - await f diff --git a/build/lib/exo/inference/pytorch/inference.py b/build/lib/exo/inference/pytorch/inference.py deleted file mode 100644 index ba834eb67..000000000 --- a/build/lib/exo/inference/pytorch/inference.py +++ /dev/null @@ -1,211 +0,0 @@ -# experimental, based off of tinygrad/inference.py -import numpy as np -import torch -import numpy as np -import json -from typing import Optional, Tuple -from exo.inference.shard import Shard -from exo.inference.inference_engine import InferenceEngine -from exo.inference.pytorch.model.hf import ShardedHuggingFaceModel -from exo.api.chatgpt_api import resolve_tokenizer -from exo.helpers import DEBUG -from transformers import DynamicCache -from accelerate import disk_offload - -class PyTorchDynamicShardInferenceEngine(InferenceEngine): - """ - PyTorch Dynamic Shard Inference Engine for performing model inference with sharded models. - """ - - def __init__(self, shard): - """ - Initialize the inference engine. - - Args: - debug (bool): If True, enables debug logging. Defaults to False. - """ - self.shard = shard - self.model = None - self.tokenizer = None - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - async def infer_prompt( - self, - request_id: str, - shard: Optional[Shard] = None, - prompt: str = "", - image_str: Optional[str] = None, - inference_state: Optional[str] = None - ) -> Tuple[np.ndarray, str, bool]: - - await self.ensure_shard(shard) - - # need to make this so inference_state is not a string - # cant use it with dynamic cache - - tokens = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device) - tokens = self.model.embed_tokens(tokens) - current_kvs = None - - if DEBUG >= 4: - print("infer_prompt called") - print(f"tokens: {tokens}\n") - print(f"layer_count: {self.shard.get_layer_count()}") - print(f"is_first_layer: {self.shard.is_first_layer()}") - print(f"is_last_layer: {self.shard.is_last_layer()}") - - # convert inference_state or cache from json to DynamicCache - past_kv = DynamicCache() - if inference_state != None: - cache_dict = json.loads(inference_state) - past_kv.key_cache = [torch.tensor(data).to(self.device) for data in cache_dict['key_cache']] - past_kv.value_cache = [torch.tensor(data).to(self.device) for data in cache_dict['value_cache']] - - output_data, current_kvs = self.model.forward( - tokens, - past_kv - ) - - is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] - - if DEBUG >= 4: - print(f"output_data: {output_data}\n") - print(f"output_data.size {output_data.size}\n") - - print(f"finished: {is_finished}") - print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") - print(f"output_data[-1] {output_data[-1]}") - - if output_data.size == 1: - print(f"size 1 output_data.item() {output_data.item()}") - print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") - - cache_dict = { - 'key_cache': [tensor.tolist() for tensor in current_kvs.key_cache], - 'value_cache': [tensor.tolist() for tensor in current_kvs.value_cache] - } - - return ( - output_data, - json.dumps(cache_dict), - is_finished - ) - - async def infer_tensor( - self, - request_id: str, - shard: Shard, - input_data: np.ndarray, - inference_state: Optional[str] = None - ) -> Tuple[np.ndarray, str, bool]: - - await self.ensure_shard(shard) - - current_kvs = None - - if input_data.size == 1: - in_tensor = torch.from_numpy( - input_data, - ).unsqueeze(0).long().to(self.device) - else: - in_tensor = torch.from_numpy( - input_data - ).long().to(self.device) - - in_tensor = self.model.embed_tokens(in_tensor) - - if DEBUG >= 4: - print("infer_tensor called") - print(f"input_data: {input_data}") - print(f"input_data.size: {input_data.size}") - print(f"input_tensor: {in_tensor}\n") - print(f"shard: {self.shard}") - print(f"layer_count: {self.shard.get_layer_count()}") - print(f"is_first_layer: {self.shard.is_first_layer()}") - print(f"is_last_layer: {self.shard.is_last_layer()}") - - # convert inference_state or cache from json to DynamicCache - past_kv = DynamicCache() - if inference_state != None: - try: - cache_dict = json.loads(inference_state) - past_kv.key_cache = [torch.tensor(data).to(self.device) for data in cache_dict['key_cache']] - past_kv.value_cache = [torch.tensor(data).to(self.device) for data in cache_dict['value_cache']] - - if DEBUG >= 4: - print("Loaded past_kv from JSON") - print(f"past_kv: {past_kv}") - print(f"past_kv.key_cache len: {len(past_kv.key_cache)}") - print(f"past_kv.value_cache len: {len(past_kv.value_cache)}") - except json.JSONDecodeError: - print(f"ERROR DECODING INFERENCE STATE") - - output_data, current_kvs = self.model.forward( - in_tensor, - past_kv - ) - - is_finished = output_data.size == 1 and output_data.item() in [self.tokenizer.eos_token_id] - - if DEBUG >= 4: - print(f"in_tensor: {in_tensor}\n") - print(f"output_data: {output_data}\n") - print(f"output_data.size {output_data.size}\n") - print(f"finished: {is_finished}") - print(f"self.tokenizer.eos_token_id {self.tokenizer.eos_token_id}") - print(f"output_data[-1] {output_data[-1]}") - - if output_data.size == 1: - print(f"size 1 output_data.item() {output_data.item()}") - print(f"output_data.item() in [self.tokenizer.eos_token_id]: {output_data.item() in [self.tokenizer.eos_token_id]}") - - - cache_dict = { - 'key_cache': [tensor.tolist() for tensor in current_kvs.key_cache], - 'value_cache': [tensor.tolist() for tensor in current_kvs.value_cache] - } - - return ( - output_data, - json.dumps(cache_dict), - is_finished - ) - - async def ensure_shard(self, shard: Optional[Shard]): - """ - Ensure the model shard is loaded and ready for inference. - - Args: - shard (Optional[Shard]): Shard information for the model. - """ - # if self.shard == shard: - # return - - if DEBUG >= 4: - print(f"Loading new shard: {shard}") - - if self.model: - if DEBUG >= 2: - print(f"\nCLEARING MODEL {shard.model_id}\n") - print(f"before allocated: {torch.cuda.memory_allocated()}") - print(f"before reserved: {torch.cuda.memory_reserved()}") - - # delete model and free up memory to reload - # self.model.cuda() - # disk_offload(model=self.model, offload_dir="./.offload") - import gc - - del self.model - gc.collect() - torch.cuda.empty_cache() - - if DEBUG >= 2: - print(f"after allocated: {torch.cuda.memory_allocated()}") - print(f"after reserved: {torch.cuda.memory_reserved()}") - - self.shard = shard - self.tokenizer = await resolve_tokenizer(shard.model_id) - self.model = ShardedHuggingFaceModel(shard, self.tokenizer) - - if DEBUG >= 4: - print(f"Shard loaded successfully: {shard}") \ No newline at end of file diff --git a/build/lib/exo/inference/pytorch/model/__init__.py b/build/lib/exo/inference/pytorch/model/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/build/lib/exo/inference/pytorch/model/hf.py b/build/lib/exo/inference/pytorch/model/hf.py deleted file mode 100644 index aa2873c56..000000000 --- a/build/lib/exo/inference/pytorch/model/hf.py +++ /dev/null @@ -1,155 +0,0 @@ -import torch -import numpy as np -from transformers import AutoModelForCausalLM, DynamicCache, Cache -from exo.inference.shard import Shard -from exo.helpers import DEBUG -from typing import Tuple, Optional, Union, List -from exo.inference.pytorch.model.utils import sample_logits - -TOP_P = 0.75 #0.95 -TOP_K = 20 -TEMP = 0.8 - -class ShardedHuggingFaceModel(torch.nn.Module): - def __init__(self, shard: Shard, tokenizer: any): - super(ShardedHuggingFaceModel, self).__init__() - - if torch.cuda.is_available(): - self.device = torch.device("cuda") - else: - self.device = torch.device("cpu") - - self.shard = shard - self.tokenizer = tokenizer - - # Load the model - try: - self.llm_model = AutoModelForCausalLM.from_pretrained( - shard.model_id, - torch_dtype=torch.float32, - device_map="auto", - # offload_buffers=True - ) - - # disk_offload(model=self.llm_model, offload_dir="./.offload") - - self.base_model = self.llm_model.model - except Exception as err: - print(f"Error loading model: {err}") - raise - - if DEBUG >= 2: - print(f"\nShardedHuggingFaceModel init with shard {shard}") - print(f"self.llm_model: {self.llm_model}") - print(f"self.base_model: {self.base_model}") - - if DEBUG >= 2: - print(f"full_model.model layer: {len(self.base_model.layers)}") - - # Embeddings and final layer norm - # used for doing what forward LlamaModel does in transformers - self.norm = self.base_model.norm - self.lm_head = self.llm_model.lm_head - self.embed_tokens = self.base_model.embed_tokens - - def forward( - self, - input_ids: torch.tensor, - past_kvs: Optional[Union[Cache, List[torch.FloatTensor]]] = None, - ) -> Tuple[np.ndarray, any]: - """ - Forward through layers using the base model - - Args: - input_ids: tensor input - past_kvs: past key value stores for cache - use_cache: use cache - - Returns: - hidden_states: numpy of states between layers - or logits: numpy of normalization and linearization of last hidden state - past_kvs: DynamicCache of past key values if use_cache is true - - Ref: - https://github.com/huggingface/transformers/blob/v4.44.2/src/transformers/models/qwen2/modeling_qwen2.py#L804 - https://github.com/huggingface/transformers/blob/v4.44.2/src/transformers/models/llama/modeling_llama.py#L887 - """ - if DEBUG >= 4: - print("forward called") - print(f"input_ids: {input_ids}\n") - print(f"layer_count: {self.shard.get_layer_count()}") - print(f"is_first_layer: {self.shard.is_first_layer()}") - print(f"is_last_layer: {self.shard.is_last_layer()}") - - past_kvs = DynamicCache.from_legacy_cache(past_kvs) - past_seen_tokens = past_kvs.get_seq_length() if past_kvs is not None else 0 - - cache_position = torch.arange( - past_seen_tokens, - past_seen_tokens + input_ids.shape[1], - device=input_ids.device - ).to(self.device) - - position_ids = cache_position.unsqueeze(0).to(self.device) - - try: - position_embeddings = self.base_model.rotary_emb( - input_ids, - position_ids - ) - except Exception as err: - print(f"rotary_emb not found in base_model") - position_embeddings = None - - # progress through layers - for i in range(self.shard.start_layer, self.shard.end_layer + 1): - decoder_layer = self.base_model.layers[i] - - if DEBUG >= 4: - print("Going through layer") - print(f"{decoder_layer}") - print("input_ids") - print(f"{input_ids}") - - layer_outputs = decoder_layer( - input_ids, - position_ids=position_ids if not position_embeddings else None, - position_embeddings=position_embeddings, - past_key_value=past_kvs, - use_cache=True, - cache_position=cache_position, - ) - - hidden_states = layer_outputs[0] - next_kvs = layer_outputs[1] - - if DEBUG >= 3: - print(f"layer_outputs {layer_outputs}") - - if self.shard.is_last_layer(): - hs_norm = self.norm(hidden_states) - hs_lm_head = self.llm_model.lm_head(hs_norm).float() - - # Use the sampling function with default settings - with torch.no_grad(): - output_token = sample_logits( - hs_lm_head[:, -1, :], - TEMP, - TOP_P, - TOP_K - ).numpy(force=True).flatten() - - if DEBUG >= 2: - print(f"hs_norm: {hs_norm}") - print(f"hs_lm_head: {hs_lm_head}") - print(f"output_token: {output_token}") - - return (output_token, next_kvs) - - with torch.no_grad(): - out_hidden_states = hidden_states.numpy(force=True) - - return ( - out_hidden_states, - next_kvs - ) \ No newline at end of file diff --git a/build/lib/exo/inference/pytorch/model/utils.py b/build/lib/exo/inference/pytorch/model/utils.py deleted file mode 100644 index df84b3977..000000000 --- a/build/lib/exo/inference/pytorch/model/utils.py +++ /dev/null @@ -1,83 +0,0 @@ -import torch -from torch.nn import functional as F - -def top_p_sampling(scaled_logits: torch.Tensor, top_p: float) -> torch.Tensor: - """ - Apply top-p (nucleus) sampling to logits. - - Args: - scaled_logits (torch.Tensor): The scaled logits from the model's output. - top_p (float): The cumulative probability threshold for top-p filtering. - temp (float): Temperature parameter for softmax distribution reshaping. - - Returns: - torch.Tensor: Token selected based on the top-p criterion. - - Ref: - https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/sample_utils.py#L67C1-L97C17 - """ - scaled_logits = torch.where(torch.isnan(scaled_logits), torch.zeros_like(scaled_logits), scaled_logits) - scaled_logits = torch.where(torch.isinf(scaled_logits), torch.full_like(scaled_logits, 1e6), scaled_logits) - - probs = torch.softmax(scaled_logits, dim=-1) - - sorted_probs, sorted_indices = torch.sort( - probs, - descending=True, - dim=-1 - ) - - cumulative_probs = torch.cumsum(sorted_probs, dim=-1) - mask = cumulative_probs > top_p - - top_probs = torch.where(mask, torch.zeros_like(sorted_probs), sorted_probs) - sum_probs = top_probs.sum(dim=-1, keepdim=True) - top_probs = torch.where(sum_probs > 0, top_probs / sum_probs, torch.ones_like(top_probs) / top_probs.size(-1)) - - if torch.isnan(top_probs).any() or torch.isinf(top_probs).any(): - print("Warning: Top probabilities contain NaN or Inf values after normalization") - top_probs = torch.where(torch.isnan(top_probs) | torch.isinf(top_probs), - 1.0 / top_probs.size(-1), - top_probs) - - sorted_token = torch.multinomial(top_probs, num_samples=1) - - token = sorted_indices.gather(-1, sorted_token) - - return token.squeeze(-1) - -def sample_logits(logits, temp, top_p, top_k): - """ - Sample tokens from logits using temperature, top-k, and top-p (nucleus) sampling. - - Args: - logits (torch.Tensor): The logits distribution to sample from. - temp (float): temp for scaling logits. - top_p (float): The cumulative probability threshold for nucleus sampling. - - Returns: - torch.Tensor: The selected token index. - """ - - # Ensure logits are float - logits = logits.float() - - # If temp is very low, just use argmax - if temp == 0: - return logits.argmax(dim=-1) - - scaled_logits = logits/temp - - # top k - if top_k > 0: - top_values, top_indices = torch.topk(scaled_logits, top_k, dim=-1) - scaled_logits = torch.zeros_like(logits).scatter_(-1, top_indices, top_values) - - # Top-p sampling - if 0 < top_p < 1.0: - return top_p_sampling(scaled_logits, top_p) - else: - # random distribution selection - probs = torch.softmax(scaled_logits, dim=-1) - rand_sample = torch.distributions.Categorical(probs) - return rand_sample.sample().squeeze() \ No newline at end of file diff --git a/build/lib/exo/inference/pytorch/test_inference_engine.py b/build/lib/exo/inference/pytorch/test_inference_engine.py deleted file mode 100644 index bacf53bcc..000000000 --- a/build/lib/exo/inference/pytorch/test_inference_engine.py +++ /dev/null @@ -1,141 +0,0 @@ - -import asyncio -from exo.inference.shard import Shard -from exo.inference.pytorch.inference import PyTorchDynamicShardInferenceEngine -from exo.download.hf.hf_shard_download import HFShardDownloader -from exo.inference.inference_engine import InferenceEngine -from exo.inference.shard import Shard -from exo.helpers import DEBUG -import os -import numpy as np - -async def test_inference_engine(inference_engine_1: InferenceEngine, inference_engine_2: InferenceEngine, model_id: str, n_layers: int): - # prompt = "Why is the sky blue?" - prompt = "In a single word only, what is the last name of the current president of the USA?" - - # shard = Shard( - # model_id=model_id, - # start_layer=0, - # end_layer=n_layers-1, - # n_layers=n_layers - # ) - - # resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt( - # "A", - # shard=shard, - # prompt=prompt - # ) - - # print(f"resp_full: {resp_full}") - - # next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( - # "A", - # shard=shard, - # input_data=resp_full, - # inference_state=inference_state_full, - # ) - - # print(f"next_resp_full: {next_resp_full}") - - pp = int(n_layers/2) - - resp_shard = Shard( - model_id=model_id, - start_layer=0, - end_layer=pp, - n_layers=n_layers - ) - - resp_shard2 = Shard( - model_id=model_id, - start_layer=pp + 1, - end_layer=n_layers-1, - n_layers=n_layers - ) - - resp1, inference_state_1, _ = await inference_engine_1.infer_prompt( - "B", - shard=resp_shard, - prompt=prompt - ) - - resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( - "B", - shard=resp_shard2, - input_data=resp1, - inference_state=inference_state_1, - ) - - # resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( - # "B", - # shard=resp_shard, - # input_data=resp2, - # inference_state=inference_state_2, - # ) - - # resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( - # "B", - # shard=resp_shard2, - # input_data=resp3, - # inference_state=inference_state_3, - # ) - - assert np.array_equal(resp_full, resp2) - assert np.array_equal(next_resp_full, resp4) - -if __name__ == '__main__': - # try: - # print(f"\n\n -------- TEST QWEN2 -------- \n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "Qwen/Qwen2-0.5B-Instruct", - # 24 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! QWEN2 TEST FAILED \n{err}\n") - - # try: - # print(f"\n\n -------- TEST LLAMA3-1B-Base -------- \n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "andrijdavid/Llama3-1B-Base", - # 3 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! LLAMA3-1B-Base TEST FAILED \n{err}\n") - - # try: - # print(f"\n\n -------- TEST META LLAMA 3.1 8B -------- \n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "meta-llama/Meta-Llama-3.1-8B", - # 32 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! META LLAMA 3.1 8B TEST FAILED \n{err}\n") - - # try: - # print(f"\n\n ------- TEST Chickaboo/ChickaQ-Large -----\n\n") - # asyncio.run(test_inference_engine( - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - # "Chickaboo/ChickaQ-Large", - # 24 - # )) - # except Exception as err: - # print(f"\n\n !!!!!!!!!!! Chickaboo/ChickaQ-Large TEST FAILED \n{err}\n") - - try: - print(f"\n\n --------- TEST ambrosfitz/TinyLlama-1.1B-Chat-yawp -------\n\n") - asyncio.run(test_inference_engine( - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - PyTorchDynamicShardInferenceEngine(HFShardDownloader()), - "ambrosfitz/TinyLlama-1.1B-Chat-yawp", - 22 - )) - except Exception as err: - print(f"\n\n !!!!!!!!!!! ambrosfitz/TinyLlama-1.1B-Chat-yawp TEST FAILED \n{err}\n") - diff --git a/build/lib/exo/inference/shard.py b/build/lib/exo/inference/shard.py deleted file mode 100644 index 21b662f63..000000000 --- a/build/lib/exo/inference/shard.py +++ /dev/null @@ -1,39 +0,0 @@ -from dataclasses import dataclass, field - - -@dataclass(frozen=True) -class Shard: - model_id: str - start_layer: int - end_layer: int - n_layers: int - - def __hash__(self): - return hash((self.model_id, self.start_layer, self.end_layer, self.n_layers)) - - def is_first_layer(self) -> bool: - return self.start_layer == 0 - - def is_last_layer(self) -> bool: - return self.end_layer == self.n_layers - 1 - - def get_layer_count(self) -> int: - return self.end_layer - self.start_layer + 1 - - def to_dict(self) -> dict: - return { - "model_id": self.model_id, - "start_layer": self.start_layer, - "end_layer": self.end_layer, - "n_layers": self.n_layers, - } - - def from_dict(data: dict) -> 'Shard': - return Shard(**data) - - def overlaps(self, other: 'Shard') -> bool: - return shards_overlap(self, other) - - -def shards_overlap(shard1: Shard, shard2: Shard) -> bool: - return (shard1.model_id == shard2.model_id and max(shard1.start_layer, shard2.start_layer) <= min(shard1.end_layer, shard2.end_layer)) diff --git a/build/lib/exo/inference/test_inference_engine.py b/build/lib/exo/inference/test_inference_engine.py deleted file mode 100644 index e57c608d9..000000000 --- a/build/lib/exo/inference/test_inference_engine.py +++ /dev/null @@ -1,64 +0,0 @@ -from exo.inference.mlx.sharded_inference_engine import MLXDynamicShardInferenceEngine -from exo.download.hf.hf_shard_download import HFShardDownloader -from exo.inference.inference_engine import InferenceEngine -from exo.inference.shard import Shard -from exo.helpers import DEBUG -import os -import asyncio -import numpy as np - - -# An inference engine should work the same for any number of Shards, as long as the Shards are continuous. -async def test_inference_engine(inference_engine_1: InferenceEngine, inference_engine_2: InferenceEngine, model_id: str): - prompt = "In a single word only, what is the last name of the current president of the USA?" - resp_full, inference_state_full, _ = await inference_engine_1.infer_prompt("A", shard=Shard(model_id=model_id, start_layer=0, end_layer=31, n_layers=32), prompt=prompt) - next_resp_full, _next_inference_state_full, _ = await inference_engine_1.infer_tensor( - "A", - shard=Shard(model_id=model_id, start_layer=0, end_layer=31, n_layers=32), - input_data=resp_full, - inference_state=inference_state_full, - ) - - pp = 15 - resp1, inference_state_1, _ = await inference_engine_1.infer_prompt("B", shard=Shard(model_id=model_id, start_layer=0, end_layer=pp, n_layers=32), prompt=prompt) - resp2, inference_state_2, _ = await inference_engine_2.infer_tensor( - "B", - shard=Shard(model_id=model_id, start_layer=pp + 1, end_layer=31, n_layers=32), - input_data=resp1, - inference_state=inference_state_1, - ) - resp3, inference_state_3, _ = await inference_engine_1.infer_tensor( - "B", - shard=Shard(model_id=model_id, start_layer=0, end_layer=pp, n_layers=32), - input_data=resp2, - inference_state=inference_state_2, - ) - resp4, _inference_state_4, _ = await inference_engine_2.infer_tensor( - "B", - shard=Shard(model_id=model_id, start_layer=pp + 1, end_layer=31, n_layers=32), - input_data=resp3, - inference_state=inference_state_3, - ) - - assert np.array_equal(resp_full, resp2) - assert np.array_equal(next_resp_full, resp4) - - -asyncio.run(test_inference_engine( - MLXDynamicShardInferenceEngine(HFShardDownloader()), - MLXDynamicShardInferenceEngine(HFShardDownloader()), - "mlx-community/Meta-Llama-3-8B-Instruct-4bit", -)) - -if os.getenv("RUN_TINYGRAD", default="0") == "1": - import tinygrad - import os - from exo.inference.tinygrad.inference import TinygradDynamicShardInferenceEngine - tinygrad.helpers.DEBUG.value = int(os.getenv("TINYGRAD_DEBUG", default="0")) - asyncio.run( - test_inference_engine( - TinygradDynamicShardInferenceEngine(HFShardDownloader()), - TinygradDynamicShardInferenceEngine(HFShardDownloader()), - "TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R", - ) - ) diff --git a/build/lib/exo/inference/tokenizers.py b/build/lib/exo/inference/tokenizers.py deleted file mode 100644 index 9accd9436..000000000 --- a/build/lib/exo/inference/tokenizers.py +++ /dev/null @@ -1,45 +0,0 @@ -import traceback -from aiofiles import os as aios -from transformers import AutoTokenizer, AutoProcessor -from exo.download.hf.hf_helpers import get_local_snapshot_dir -from exo.helpers import DEBUG - -async def resolve_tokenizer(model_id: str): - local_path = await get_local_snapshot_dir(model_id) - if DEBUG >= 2: print(f"Checking if local path exists to load tokenizer from local {local_path=}") - try: - if await aios.path.exists(local_path): - if DEBUG >= 2: print(f"Resolving tokenizer for {model_id=} from {local_path=}") - return await _resolve_tokenizer(local_path) - except: - if DEBUG >= 5: print(f"Local check for {local_path=} failed. Resolving tokenizer for {model_id=} normally...") - if DEBUG >= 5: traceback.print_exc() - return await _resolve_tokenizer(model_id) - -async def _resolve_tokenizer(model_id_or_local_path: str): - try: - if DEBUG >= 4: print(f"Trying AutoProcessor for {model_id_or_local_path}") - if "Mistral-Large" in str(model_id_or_local_path): - use_fast = True - else: - use_fast = False - processor = AutoProcessor.from_pretrained(model_id_or_local_path, use_fast=use_fast) - if not hasattr(processor, 'eos_token_id'): - processor.eos_token_id = getattr(processor, 'tokenizer', getattr(processor, '_tokenizer', processor)).eos_token_id - if not hasattr(processor, 'encode'): - processor.encode = getattr(processor, 'tokenizer', getattr(processor, '_tokenizer', processor)).encode - if not hasattr(processor, 'decode'): - processor.decode = getattr(processor, 'tokenizer', getattr(processor, '_tokenizer', processor)).decode - return processor - except Exception as e: - if DEBUG >= 4: print(f"Failed to load processor for {model_id_or_local_path}. Error: {e}") - if DEBUG >= 4: print(traceback.format_exc()) - - try: - if DEBUG >= 4: print(f"Trying AutoTokenizer for {model_id_or_local_path}") - return AutoTokenizer.from_pretrained(model_id_or_local_path) - except Exception as e: - if DEBUG >= 4: print(f"Failed to load tokenizer for {model_id_or_local_path}. Falling back to tinygrad tokenizer. Error: {e}") - if DEBUG >= 4: print(traceback.format_exc()) - - raise ValueError(f"[TODO] Unsupported model: {model_id_or_local_path}") diff --git a/build/lib/exo/models.py b/build/lib/exo/models.py deleted file mode 100644 index 137b881ce..000000000 --- a/build/lib/exo/models.py +++ /dev/null @@ -1,44 +0,0 @@ -from exo.inference.shard import Shard - -model_base_shards = { - ### llama - "llama-3.1-8b": { - "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32), - "TinygradDynamicShardInferenceEngine": Shard(model_id="mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", start_layer=0, end_layer=0, n_layers=32), - "PyTorchDynamicShardInferenceEngine": Shard(model_id="meta-llama/Meta-Llama-3.1-8B", start_layer=0, end_layer=0, n_layers=32), - }, - "llama-3.1-70b": { - "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), - "TinygradDynamicShardInferenceEngine": Shard(model_id="NousResearch/Meta-Llama-3.1-70B", start_layer=0, end_layer=0, n_layers=80), - }, - "llama-3.1-405b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3.1-405B-4bit", start_layer=0, end_layer=0, n_layers=126),}, - "llama-3-8b": { - "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-8B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=32), - "TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R", start_layer=0, end_layer=0, n_layers=32), - }, - "llama-3-70b": { - "MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Meta-Llama-3-70B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80), - "TinygradDynamicShardInferenceEngine": Shard(model_id="TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-70B-R", start_layer=0, end_layer=0, n_layers=80), - }, - "llama-3-2B-Base": { - "PyTorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-2B-Base", start_layer=0, end_layer=0, n_layers=6), - }, - "llama-3-1B-Base": { - "PyTorchDynamicShardInferenceEngine": Shard(model_id="andrijdavid/Llama3-1B-Base", start_layer=0, end_layer=0, n_layers=3), - }, - "TinyLlama-1.1B-Chat-yaw": { - "PyTorchDynamicShardInferenceEngine": Shard(model_id="ambrosfitz/TinyLlama-1.1B-Chat-yawp", start_layer=0, end_layer=0, n_layers=22), - }, - ### mistral - "mistral-nemo": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Nemo-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=40),}, - "mistral-large": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Mistral-Large-Instruct-2407-4bit", start_layer=0, end_layer=0, n_layers=88),}, - ### deepseek v2 - "deepseek-coder-v2-lite": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx", start_layer=0, end_layer=0, n_layers=27),}, - ### llava - "llava-1.5-7b-hf": {"MLXDynamicShardInferenceEngine": Shard(model_id="llava-hf/llava-1.5-7b-hf", start_layer=0, end_layer=0, n_layers=32),}, - ### qwen - "Qwen2-0.5B-Instruct": { - "PyTorchDynamicShardInferenceEngine": Shard(model_id="Qwen/Qwen2-0.5B-Instruct", start_layer=0, end_layer=0, n_layers=24), - }, - -} diff --git a/build/lib/exo/networking/__init__.py b/build/lib/exo/networking/__init__.py deleted file mode 100644 index 44a10a30e..000000000 --- a/build/lib/exo/networking/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .discovery import Discovery -from .peer_handle import PeerHandle -from .server import Server - -__all__ = ["Discovery", "PeerHandle", "Server"] diff --git a/build/lib/exo/networking/discovery.py b/build/lib/exo/networking/discovery.py deleted file mode 100644 index cdcbfabc1..000000000 --- a/build/lib/exo/networking/discovery.py +++ /dev/null @@ -1,17 +0,0 @@ -from abc import ABC, abstractmethod -from typing import List -from .peer_handle import PeerHandle - - -class Discovery(ABC): - @abstractmethod - async def start(self) -> None: - pass - - @abstractmethod - async def stop(self) -> None: - pass - - @abstractmethod - async def discover_peers(self, wait_for_peers: int = 0) -> List[PeerHandle]: - pass diff --git a/build/lib/exo/networking/grpc/__init__.py b/build/lib/exo/networking/grpc/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/build/lib/exo/networking/grpc/grpc_discovery.py b/build/lib/exo/networking/grpc/grpc_discovery.py deleted file mode 100644 index eb08a8385..000000000 --- a/build/lib/exo/networking/grpc/grpc_discovery.py +++ /dev/null @@ -1,188 +0,0 @@ -import asyncio -import json -import socket -import time -from typing import List, Dict, Callable, Tuple, Coroutine -from ..discovery import Discovery -from ..peer_handle import PeerHandle -from .grpc_peer_handle import GRPCPeerHandle -from exo.topology.device_capabilities import DeviceCapabilities, device_capabilities, UNKNOWN_DEVICE_CAPABILITIES -from exo import DEBUG_DISCOVERY - - -class ListenProtocol(asyncio.DatagramProtocol): - def __init__(self, on_message: Callable[[bytes, Tuple[str, int]], Coroutine]): - super().__init__() - self.on_message = on_message - self.loop = asyncio.get_event_loop() - - def connection_made(self, transport): - self.transport = transport - - def datagram_received(self, data, addr): - asyncio.create_task(self.on_message(data, addr)) - - -class GRPCDiscovery(Discovery): - def __init__( - self, - node_id: str, - node_port: int, - listen_port: int, - broadcast_port: int = None, - broadcast_interval: int = 1, - device_capabilities: DeviceCapabilities = UNKNOWN_DEVICE_CAPABILITIES, - discovery_timeout: int = 30, - ): - self.node_id = node_id - self.node_port = node_port - self.device_capabilities = device_capabilities - self.listen_port = listen_port - self.broadcast_port = broadcast_port if broadcast_port is not None else listen_port - self.broadcast_interval = broadcast_interval - self.known_peers: Dict[str, Tuple[GRPCPeerHandle, float, float]] = {} - self.broadcast_task = None - self.listen_task = None - self.cleanup_task = None - self.discovery_timeout = discovery_timeout - - async def start(self): - self.device_capabilities = device_capabilities() - self.broadcast_task = asyncio.create_task(self.task_broadcast_presence()) - self.listen_task = asyncio.create_task(self.task_listen_for_peers()) - self.cleanup_task = asyncio.create_task(self.task_cleanup_peers()) - - async def stop(self): - if self.broadcast_task: - self.broadcast_task.cancel() - if self.listen_task: - self.listen_task.cancel() - if self.cleanup_task: - self.cleanup_task.cancel() - if self.broadcast_task or self.listen_task or self.cleanup_task: - await asyncio.gather(self.broadcast_task, self.listen_task, self.cleanup_task, return_exceptions=True) - - async def discover_peers(self, wait_for_peers: int = 0) -> List[PeerHandle]: - if DEBUG_DISCOVERY >= 2: - print("Starting peer discovery process...") - - if wait_for_peers > 0: - while len(self.known_peers) == 0: - if DEBUG_DISCOVERY >= 2: - print("No peers discovered yet, retrying in 1 second...") - await asyncio.sleep(1) # Keep trying to find peers - if DEBUG_DISCOVERY >= 2: - print(f"Discovered first peer: {next(iter(self.known_peers.values()))}") - - grace_period = 5 # seconds - while True: - initial_peer_count = len(self.known_peers) - if DEBUG_DISCOVERY >= 2: - print(f"Current number of known peers: {initial_peer_count}. Waiting {grace_period} seconds to discover more...") - if len(self.known_peers) == initial_peer_count: - if wait_for_peers > 0: - await asyncio.sleep(grace_period) - if DEBUG_DISCOVERY >= 2: - print(f"Waiting additional {wait_for_peers} seconds for more peers.") - wait_for_peers = 0 - else: - if DEBUG_DISCOVERY >= 2: - print("No new peers discovered in the last grace period. Ending discovery process.") - break # No new peers found in the grace period, we are done - - return [peer_handle for peer_handle, _, _ in self.known_peers.values()] - - async def task_broadcast_presence(self): - transport, _ = await asyncio.get_event_loop().create_datagram_endpoint(lambda: asyncio.DatagramProtocol(), local_addr=("0.0.0.0", 0), family=socket.AF_INET) - sock = transport.get_extra_info("socket") - sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) - - message = json.dumps({ - "type": "discovery", - "node_id": self.node_id, - "grpc_port": self.node_port, - "device_capabilities": self.device_capabilities.to_dict(), - }).encode("utf-8") - - while True: - try: - if DEBUG_DISCOVERY >= 3: - print(f"Broadcast presence: {message}") - transport.sendto(message, ("", self.broadcast_port)) - await asyncio.sleep(self.broadcast_interval) - except Exception as e: - print(f"Error in broadcast presence: {e}") - import traceback - - print(traceback.format_exc()) - - async def on_listen_message(self, data, addr): - if not data: - return - - decoded_data = data.decode("utf-8", errors="ignore") - - # Check if the decoded data starts with a valid JSON character - if not (decoded_data.strip() and decoded_data.strip()[0] in "{["): - if DEBUG_DISCOVERY >= 2: - print(f"Received invalid JSON data from {addr}: {decoded_data[:100]}") - return - - try: - decoder = json.JSONDecoder(strict=False) - message = decoder.decode(decoded_data) - except json.JSONDecodeError as e: - if DEBUG_DISCOVERY >= 2: - print(f"Error decoding JSON data from {addr}: {e}") - return - - if DEBUG_DISCOVERY >= 2: - print(f"received from peer {addr}: {message}") - - if message["type"] == "discovery" and message["node_id"] != self.node_id: - peer_id = message["node_id"] - peer_host = addr[0] - peer_port = message["grpc_port"] - device_capabilities = DeviceCapabilities(**message["device_capabilities"]) - if peer_id not in self.known_peers: - self.known_peers[peer_id] = ( - GRPCPeerHandle(peer_id, f"{peer_host}:{peer_port}", device_capabilities), - time.time(), - time.time(), - ) - if DEBUG_DISCOVERY >= 2: - print(f"Discovered new peer {peer_id} at {peer_host}:{peer_port}") - self.known_peers[peer_id] = (self.known_peers[peer_id][0], self.known_peers[peer_id][1], time.time()) - - async def task_listen_for_peers(self): - await asyncio.get_event_loop().create_datagram_endpoint(lambda: ListenProtocol(self.on_listen_message), local_addr=("0.0.0.0", self.listen_port)) - if DEBUG_DISCOVERY >= 2: - print("Started listen task") - - async def task_cleanup_peers(self): - while True: - try: - current_time = time.time() - peers_to_remove = [ - peer_handle.id() for peer_handle, connected_at, last_seen in self.known_peers.values() - if (not await peer_handle.is_connected() and current_time - connected_at > self.discovery_timeout) or current_time - last_seen > self.discovery_timeout - ] - if DEBUG_DISCOVERY >= 2: - print( - "Peer statuses:", - {peer_handle.id(): f"is_connected={await peer_handle.is_connected()}, {connected_at=}, {last_seen=}" - for peer_handle, connected_at, last_seen in self.known_peers.values()}, - ) - if DEBUG_DISCOVERY >= 2 and len(peers_to_remove) > 0: - print(f"Cleaning up peers: {peers_to_remove}") - for peer_id in peers_to_remove: - if peer_id in self.known_peers: - del self.known_peers[peer_id] - if DEBUG_DISCOVERY >= 2: - print(f"Removed peer {peer_id} due to inactivity.") - await asyncio.sleep(self.broadcast_interval) - except Exception as e: - print(f"Error in cleanup peers: {e}") - import traceback - - print(traceback.format_exc()) diff --git a/build/lib/exo/networking/grpc/grpc_peer_handle.py b/build/lib/exo/networking/grpc/grpc_peer_handle.py deleted file mode 100644 index 0629dc777..000000000 --- a/build/lib/exo/networking/grpc/grpc_peer_handle.py +++ /dev/null @@ -1,109 +0,0 @@ -import grpc -import numpy as np -from typing import Optional, Tuple, List - -# These would be generated from the .proto file -from . import node_service_pb2 -from . import node_service_pb2_grpc - -from ..peer_handle import PeerHandle -from exo.inference.shard import Shard -from exo.topology.topology import Topology -from exo.topology.device_capabilities import DeviceCapabilities - - -class GRPCPeerHandle(PeerHandle): - def __init__(self, _id: str, address: str, device_capabilities: DeviceCapabilities): - self._id = _id - self.address = address - self._device_capabilities = device_capabilities - self.channel = None - self.stub = None - - def id(self) -> str: - return self._id - - def device_capabilities(self) -> DeviceCapabilities: - return self._device_capabilities - - async def connect(self): - self.channel = grpc.aio.insecure_channel(self.address, options=[("grpc.max_metadata_size", 32*1024*1024)]) - self.stub = node_service_pb2_grpc.NodeServiceStub(self.channel) - - async def is_connected(self) -> bool: - return self.channel is not None and self.channel.get_state() == grpc.ChannelConnectivity.READY - - async def disconnect(self): - if self.channel: - await self.channel.close() - self.channel = None - self.stub = None - - async def send_prompt(self, shard: Shard, prompt: str, image_str: Optional[str] = None, request_id: Optional[str] = None, inference_state: Optional[str] = None) -> Optional[np.array]: - request = node_service_pb2.PromptRequest( - prompt=prompt, - image_str=image_str, - shard=node_service_pb2.Shard( - model_id=shard.model_id, - start_layer=shard.start_layer, - end_layer=shard.end_layer, - n_layers=shard.n_layers, - ), - request_id=request_id, - inference_state=inference_state, - ) - response = await self.stub.SendPrompt(request) - - if not response.tensor_data or not response.shape or not response.dtype: - return None - - return np.frombuffer(response.tensor_data, dtype=np.dtype(response.dtype)).reshape(response.shape) - - async def send_tensor(self, shard: Shard, tensor: np.ndarray, request_id: Optional[str] = None, inference_state: Optional[str] = None) -> Optional[np.array]: - request = node_service_pb2.TensorRequest( - shard=node_service_pb2.Shard( - model_id=shard.model_id, - start_layer=shard.start_layer, - end_layer=shard.end_layer, - n_layers=shard.n_layers, - ), - tensor=node_service_pb2.Tensor(tensor_data=tensor.tobytes(), shape=tensor.shape, dtype=str(tensor.dtype)), - request_id=request_id, - inference_state=inference_state, - ) - response = await self.stub.SendTensor(request) - - if not response.tensor_data or not response.shape or not response.dtype: - return None - - return np.frombuffer(response.tensor_data, dtype=np.dtype(response.dtype)).reshape(response.shape) - - async def get_inference_result(self, request_id: str) -> Tuple[Optional[np.ndarray], bool]: - request = node_service_pb2.GetInferenceResultRequest(request_id=request_id) - response = await self.stub.GetInferenceResult(request) - if response.tensor is None: - return None, response.is_finished - return ( - np.frombuffer(response.tensor.tensor_data, dtype=np.dtype(response.tensor.dtype)).reshape(response.tensor.shape), - response.is_finished, - ) - - async def collect_topology(self, visited: set[str], max_depth: int) -> Topology: - request = node_service_pb2.CollectTopologyRequest(visited=visited, max_depth=max_depth) - response = await self.stub.CollectTopology(request) - topology = Topology() - for node_id, capabilities in response.nodes.items(): - device_capabilities = DeviceCapabilities(model=capabilities.model, chip=capabilities.chip, memory=capabilities.memory, flops=capabilities.flops) - topology.update_node(node_id, device_capabilities) - for node_id, peers in response.peer_graph.items(): - for peer_id in peers.peer_ids: - topology.add_edge(node_id, peer_id) - return topology - - async def send_result(self, request_id: str, result: List[int], is_finished: bool) -> None: - request = node_service_pb2.SendResultRequest(request_id=request_id, result=result, is_finished=is_finished) - await self.stub.SendResult(request) - - async def send_opaque_status(self, request_id: str, status: str) -> None: - request = node_service_pb2.SendOpaqueStatusRequest(request_id=request_id, status=status) - await self.stub.SendOpaqueStatus(request) diff --git a/build/lib/exo/networking/grpc/grpc_server.py b/build/lib/exo/networking/grpc/grpc_server.py deleted file mode 100644 index 1481ef512..000000000 --- a/build/lib/exo/networking/grpc/grpc_server.py +++ /dev/null @@ -1,118 +0,0 @@ -import grpc -from concurrent import futures -import numpy as np -from asyncio import CancelledError - -from . import node_service_pb2 -from . import node_service_pb2_grpc -from exo import DEBUG -from exo.inference.shard import Shard -from exo.orchestration import Node - - -class GRPCServer(node_service_pb2_grpc.NodeServiceServicer): - def __init__(self, node: Node, host: str, port: int): - self.node = node - self.host = host - self.port = port - self.server = None - - async def start(self) -> None: - self.server = grpc.aio.server( - futures.ThreadPoolExecutor(max_workers=10), - options=[ - ("grpc.max_metadata_size", 32*1024*1024), - ("grpc.max_send_message_length", 128*1024*1024), - ("grpc.max_receive_message_length", 128*1024*1024), - ], - ) - node_service_pb2_grpc.add_NodeServiceServicer_to_server(self, self.server) - listen_addr = f"{self.host}:{self.port}" - self.server.add_insecure_port(listen_addr) - await self.server.start() - if DEBUG >= 1: print(f"Server started, listening on {listen_addr}") - - async def stop(self) -> None: - if self.server: - try: - await self.server.stop(grace=5) - await self.server.wait_for_termination() - except CancelledError: - pass - if DEBUG >= 1: print("Server stopped and all connections are closed") - - async def SendPrompt(self, request, context): - shard = Shard( - model_id=request.shard.model_id, - start_layer=request.shard.start_layer, - end_layer=request.shard.end_layer, - n_layers=request.shard.n_layers, - ) - prompt = request.prompt - image_str = request.image_str - request_id = request.request_id - result = await self.node.process_prompt(shard, prompt, image_str, request_id) - if DEBUG >= 5: print(f"SendPrompt {shard=} {prompt=} {image_str=} {request_id=} result: {result}") - tensor_data = result.tobytes() if result is not None else None - return node_service_pb2.Tensor(tensor_data=tensor_data, shape=result.shape, dtype=str(result.dtype)) if result is not None else node_service_pb2.Tensor() - - async def SendTensor(self, request, context): - shard = Shard( - model_id=request.shard.model_id, - start_layer=request.shard.start_layer, - end_layer=request.shard.end_layer, - n_layers=request.shard.n_layers, - ) - tensor = np.frombuffer(request.tensor.tensor_data, dtype=np.dtype(request.tensor.dtype)).reshape(request.tensor.shape) - request_id = request.request_id - inference_state = request.inference_state - - result = await self.node.process_tensor(shard, tensor, request_id, inference_state) - if DEBUG >= 5: print(f"SendTensor tensor {shard=} {tensor=} {request_id=} result: {result}") - tensor_data = result.tobytes() if result is not None else None - return node_service_pb2.Tensor(tensor_data=tensor_data, shape=result.shape, dtype=str(result.dtype)) if result is not None else node_service_pb2.Tensor() - - async def GetInferenceResult(self, request, context): - request_id = request.request_id - result = await self.node.get_inference_result(request_id) - if DEBUG >= 5: print(f"GetInferenceResult {request_id=}: {result}") - tensor_data = result[0].tobytes() if result[0] is not None else None - return ( - node_service_pb2.InferenceResult( - tensor=node_service_pb2.Tensor(tensor_data=tensor_data, shape=result[0].shape, dtype=str(result[0].dtype)), - is_finished=result[1], - ) if result[0] is not None else node_service_pb2.InferenceResult(is_finished=result[1]) - ) - - async def CollectTopology(self, request, context): - max_depth = request.max_depth - visited = set(request.visited) - topology = await self.node.collect_topology(visited, max_depth) - nodes = { - node_id: - node_service_pb2.DeviceCapabilities( - model=cap.model, - chip=cap.chip, - memory=cap.memory, - flops=node_service_pb2.DeviceFlops(fp32=cap.flops.fp32, fp16=cap.flops.fp16, int8=cap.flops.int8), - ) - for node_id, cap in topology.nodes.items() - } - peer_graph = {node_id: node_service_pb2.Peers(peer_ids=peers) for node_id, peers in topology.peer_graph.items()} - if DEBUG >= 5: print(f"CollectTopology {max_depth=} {visited=} {nodes=} {peer_graph=}") - return node_service_pb2.Topology(nodes=nodes, peer_graph=peer_graph) - - async def SendResult(self, request, context): - request_id = request.request_id - result = request.result - is_finished = request.is_finished - if DEBUG >= 5: print(f"Received SendResult request: {request_id=} {result=} {is_finished=}") - self.node.on_token.trigger_all(request_id, result, is_finished) - return node_service_pb2.Empty() - - async def SendOpaqueStatus(self, request, context): - request_id = request.request_id - status = request.status - if DEBUG >= 5: print(f"Received SendOpaqueStatus request: {request_id=} {status=}") - self.node.on_opaque_status.trigger_all(request_id, status) - return node_service_pb2.Empty() diff --git a/build/lib/exo/networking/grpc/node_service_pb2.py b/build/lib/exo/networking/grpc/node_service_pb2.py deleted file mode 100644 index cae2d0809..000000000 --- a/build/lib/exo/networking/grpc/node_service_pb2.py +++ /dev/null @@ -1,61 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: node_service.proto -# Protobuf Python Version: 5.26.1 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x12node_service.proto\x12\x0cnode_service\"S\n\x05Shard\x12\x10\n\x08model_id\x18\x01 \x01(\t\x12\x13\n\x0bstart_layer\x18\x02 \x01(\x05\x12\x11\n\tend_layer\x18\x03 \x01(\x05\x12\x10\n\x08n_layers\x18\x04 \x01(\x05\"\xc3\x01\n\rPromptRequest\x12\"\n\x05shard\x18\x01 \x01(\x0b\x32\x13.node_service.Shard\x12\x0e\n\x06prompt\x18\x02 \x01(\t\x12\x16\n\timage_str\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x17\n\nrequest_id\x18\x04 \x01(\tH\x01\x88\x01\x01\x12\x1c\n\x0finference_state\x18\x05 \x01(\tH\x02\x88\x01\x01\x42\x0c\n\n_image_strB\r\n\x0b_request_idB\x12\n\x10_inference_state\"\xb3\x01\n\rTensorRequest\x12\"\n\x05shard\x18\x01 \x01(\x0b\x32\x13.node_service.Shard\x12$\n\x06tensor\x18\x02 \x01(\x0b\x32\x14.node_service.Tensor\x12\x17\n\nrequest_id\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x1c\n\x0finference_state\x18\x04 \x01(\tH\x01\x88\x01\x01\x42\r\n\x0b_request_idB\x12\n\x10_inference_state\"/\n\x19GetInferenceResultRequest\x12\x12\n\nrequest_id\x18\x01 \x01(\t\"\\\n\x0fInferenceResult\x12)\n\x06tensor\x18\x01 \x01(\x0b\x32\x14.node_service.TensorH\x00\x88\x01\x01\x12\x13\n\x0bis_finished\x18\x02 \x01(\x08\x42\t\n\x07_tensor\";\n\x06Tensor\x12\x13\n\x0btensor_data\x18\x01 \x01(\x0c\x12\r\n\x05shape\x18\x02 \x03(\x05\x12\r\n\x05\x64type\x18\x03 \x01(\t\"<\n\x16\x43ollectTopologyRequest\x12\x0f\n\x07visited\x18\x01 \x03(\t\x12\x11\n\tmax_depth\x18\x02 \x01(\x05\"\x8e\x02\n\x08Topology\x12\x30\n\x05nodes\x18\x01 \x03(\x0b\x32!.node_service.Topology.NodesEntry\x12\x39\n\npeer_graph\x18\x02 \x03(\x0b\x32%.node_service.Topology.PeerGraphEntry\x1aN\n\nNodesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12/\n\x05value\x18\x02 \x01(\x0b\x32 .node_service.DeviceCapabilities:\x02\x38\x01\x1a\x45\n\x0ePeerGraphEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\"\n\x05value\x18\x02 \x01(\x0b\x32\x13.node_service.Peers:\x02\x38\x01\"\x19\n\x05Peers\x12\x10\n\x08peer_ids\x18\x01 \x03(\t\"7\n\x0b\x44\x65viceFlops\x12\x0c\n\x04\x66p32\x18\x01 \x01(\x02\x12\x0c\n\x04\x66p16\x18\x02 \x01(\x02\x12\x0c\n\x04int8\x18\x03 \x01(\x02\"k\n\x12\x44\x65viceCapabilities\x12\r\n\x05model\x18\x01 \x01(\t\x12\x0c\n\x04\x63hip\x18\x02 \x01(\t\x12\x0e\n\x06memory\x18\x03 \x01(\x05\x12(\n\x05\x66lops\x18\x04 \x01(\x0b\x32\x19.node_service.DeviceFlops\"L\n\x11SendResultRequest\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12\x0e\n\x06result\x18\x02 \x03(\x05\x12\x13\n\x0bis_finished\x18\x03 \x01(\x08\"=\n\x17SendOpaqueStatusRequest\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12\x0e\n\x06status\x18\x02 \x01(\t\"\x07\n\x05\x45mpty2\xde\x03\n\x0bNodeService\x12\x41\n\nSendPrompt\x12\x1b.node_service.PromptRequest\x1a\x14.node_service.Tensor\"\x00\x12\x41\n\nSendTensor\x12\x1b.node_service.TensorRequest\x1a\x14.node_service.Tensor\"\x00\x12^\n\x12GetInferenceResult\x12\'.node_service.GetInferenceResultRequest\x1a\x1d.node_service.InferenceResult\"\x00\x12Q\n\x0f\x43ollectTopology\x12$.node_service.CollectTopologyRequest\x1a\x16.node_service.Topology\"\x00\x12\x44\n\nSendResult\x12\x1f.node_service.SendResultRequest\x1a\x13.node_service.Empty\"\x00\x12P\n\x10SendOpaqueStatus\x12%.node_service.SendOpaqueStatusRequest\x1a\x13.node_service.Empty\"\x00\x62\x06proto3' -) - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'node_service_pb2', _globals) -if not _descriptor._USE_C_DESCRIPTORS: - DESCRIPTOR._loaded_options = None - _globals['_TOPOLOGY_NODESENTRY']._loaded_options = None - _globals['_TOPOLOGY_NODESENTRY']._serialized_options = b'8\001' - _globals['_TOPOLOGY_PEERGRAPHENTRY']._loaded_options = None - _globals['_TOPOLOGY_PEERGRAPHENTRY']._serialized_options = b'8\001' - _globals['_SHARD']._serialized_start = 36 - _globals['_SHARD']._serialized_end = 119 - _globals['_PROMPTREQUEST']._serialized_start = 122 - _globals['_PROMPTREQUEST']._serialized_end = 317 - _globals['_TENSORREQUEST']._serialized_start = 320 - _globals['_TENSORREQUEST']._serialized_end = 499 - _globals['_GETINFERENCERESULTREQUEST']._serialized_start = 501 - _globals['_GETINFERENCERESULTREQUEST']._serialized_end = 548 - _globals['_INFERENCERESULT']._serialized_start = 550 - _globals['_INFERENCERESULT']._serialized_end = 642 - _globals['_TENSOR']._serialized_start = 644 - _globals['_TENSOR']._serialized_end = 703 - _globals['_COLLECTTOPOLOGYREQUEST']._serialized_start = 705 - _globals['_COLLECTTOPOLOGYREQUEST']._serialized_end = 765 - _globals['_TOPOLOGY']._serialized_start = 768 - _globals['_TOPOLOGY']._serialized_end = 1038 - _globals['_TOPOLOGY_NODESENTRY']._serialized_start = 889 - _globals['_TOPOLOGY_NODESENTRY']._serialized_end = 967 - _globals['_TOPOLOGY_PEERGRAPHENTRY']._serialized_start = 969 - _globals['_TOPOLOGY_PEERGRAPHENTRY']._serialized_end = 1038 - _globals['_PEERS']._serialized_start = 1040 - _globals['_PEERS']._serialized_end = 1065 - _globals['_DEVICEFLOPS']._serialized_start = 1067 - _globals['_DEVICEFLOPS']._serialized_end = 1122 - _globals['_DEVICECAPABILITIES']._serialized_start = 1124 - _globals['_DEVICECAPABILITIES']._serialized_end = 1231 - _globals['_SENDRESULTREQUEST']._serialized_start = 1233 - _globals['_SENDRESULTREQUEST']._serialized_end = 1309 - _globals['_SENDOPAQUESTATUSREQUEST']._serialized_start = 1311 - _globals['_SENDOPAQUESTATUSREQUEST']._serialized_end = 1372 - _globals['_EMPTY']._serialized_start = 1374 - _globals['_EMPTY']._serialized_end = 1381 - _globals['_NODESERVICE']._serialized_start = 1384 - _globals['_NODESERVICE']._serialized_end = 1862 -# @@protoc_insertion_point(module_scope) diff --git a/build/lib/exo/networking/grpc/node_service_pb2_grpc.py b/build/lib/exo/networking/grpc/node_service_pb2_grpc.py deleted file mode 100644 index ea1d3c98f..000000000 --- a/build/lib/exo/networking/grpc/node_service_pb2_grpc.py +++ /dev/null @@ -1,272 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc -import warnings - -from . import node_service_pb2 as node__service__pb2 - -GRPC_GENERATED_VERSION = '1.64.1' -GRPC_VERSION = grpc.__version__ -EXPECTED_ERROR_RELEASE = '1.65.0' -SCHEDULED_RELEASE_DATE = 'June 25, 2024' -_version_not_supported = False - -try: - from grpc._utilities import first_version_is_lower - _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION) -except ImportError: - _version_not_supported = True - -if _version_not_supported: - warnings.warn( - f'The grpc package installed is at version {GRPC_VERSION},' + f' but the generated code in node_service_pb2_grpc.py depends on' + f' grpcio>={GRPC_GENERATED_VERSION}.' + - f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}' + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.' + - f' This warning will become an error in {EXPECTED_ERROR_RELEASE},' + f' scheduled for release on {SCHEDULED_RELEASE_DATE}.', RuntimeWarning - ) - - -class NodeServiceStub(object): - """Missing associated documentation comment in .proto file.""" - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.SendPrompt = channel.unary_unary( - '/node_service.NodeService/SendPrompt', - request_serializer=node__service__pb2.PromptRequest.SerializeToString, - response_deserializer=node__service__pb2.Tensor.FromString, - _registered_method=True - ) - self.SendTensor = channel.unary_unary( - '/node_service.NodeService/SendTensor', - request_serializer=node__service__pb2.TensorRequest.SerializeToString, - response_deserializer=node__service__pb2.Tensor.FromString, - _registered_method=True - ) - self.GetInferenceResult = channel.unary_unary( - '/node_service.NodeService/GetInferenceResult', - request_serializer=node__service__pb2.GetInferenceResultRequest.SerializeToString, - response_deserializer=node__service__pb2.InferenceResult.FromString, - _registered_method=True - ) - self.CollectTopology = channel.unary_unary( - '/node_service.NodeService/CollectTopology', - request_serializer=node__service__pb2.CollectTopologyRequest.SerializeToString, - response_deserializer=node__service__pb2.Topology.FromString, - _registered_method=True - ) - self.SendResult = channel.unary_unary( - '/node_service.NodeService/SendResult', - request_serializer=node__service__pb2.SendResultRequest.SerializeToString, - response_deserializer=node__service__pb2.Empty.FromString, - _registered_method=True - ) - self.SendOpaqueStatus = channel.unary_unary( - '/node_service.NodeService/SendOpaqueStatus', - request_serializer=node__service__pb2.SendOpaqueStatusRequest.SerializeToString, - response_deserializer=node__service__pb2.Empty.FromString, - _registered_method=True - ) - - -class NodeServiceServicer(object): - """Missing associated documentation comment in .proto file.""" - def SendPrompt(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def SendTensor(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def GetInferenceResult(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def CollectTopology(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def SendResult(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - def SendOpaqueStatus(self, request, context): - """Missing associated documentation comment in .proto file.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') - - -def add_NodeServiceServicer_to_server(servicer, server): - rpc_method_handlers = { - 'SendPrompt': - grpc.unary_unary_rpc_method_handler( - servicer.SendPrompt, - request_deserializer=node__service__pb2.PromptRequest.FromString, - response_serializer=node__service__pb2.Tensor.SerializeToString, - ), - 'SendTensor': - grpc.unary_unary_rpc_method_handler( - servicer.SendTensor, - request_deserializer=node__service__pb2.TensorRequest.FromString, - response_serializer=node__service__pb2.Tensor.SerializeToString, - ), - 'GetInferenceResult': - grpc.unary_unary_rpc_method_handler( - servicer.GetInferenceResult, - request_deserializer=node__service__pb2.GetInferenceResultRequest.FromString, - response_serializer=node__service__pb2.InferenceResult.SerializeToString, - ), - 'CollectTopology': - grpc.unary_unary_rpc_method_handler( - servicer.CollectTopology, - request_deserializer=node__service__pb2.CollectTopologyRequest.FromString, - response_serializer=node__service__pb2.Topology.SerializeToString, - ), - 'SendResult': - grpc.unary_unary_rpc_method_handler( - servicer.SendResult, - request_deserializer=node__service__pb2.SendResultRequest.FromString, - response_serializer=node__service__pb2.Empty.SerializeToString, - ), - 'SendOpaqueStatus': - grpc.unary_unary_rpc_method_handler( - servicer.SendOpaqueStatus, - request_deserializer=node__service__pb2.SendOpaqueStatusRequest.FromString, - response_serializer=node__service__pb2.Empty.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler('node_service.NodeService', rpc_method_handlers) - server.add_generic_rpc_handlers((generic_handler,)) - server.add_registered_method_handlers('node_service.NodeService', rpc_method_handlers) - - -# This class is part of an EXPERIMENTAL API. -class NodeService(object): - """Missing associated documentation comment in .proto file.""" - @staticmethod - def SendPrompt(request, target, options=(), channel_credentials=None, call_credentials=None, insecure=False, compression=None, wait_for_ready=None, timeout=None, metadata=None): - return grpc.experimental.unary_unary( - request, - target, - '/node_service.NodeService/SendPrompt', - node__service__pb2.PromptRequest.SerializeToString, - node__service__pb2.Tensor.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - _registered_method=True - ) - - @staticmethod - def SendTensor(request, target, options=(), channel_credentials=None, call_credentials=None, insecure=False, compression=None, wait_for_ready=None, timeout=None, metadata=None): - return grpc.experimental.unary_unary( - request, - target, - '/node_service.NodeService/SendTensor', - node__service__pb2.TensorRequest.SerializeToString, - node__service__pb2.Tensor.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - _registered_method=True - ) - - @staticmethod - def GetInferenceResult(request, target, options=(), channel_credentials=None, call_credentials=None, insecure=False, compression=None, wait_for_ready=None, timeout=None, metadata=None): - return grpc.experimental.unary_unary( - request, - target, - '/node_service.NodeService/GetInferenceResult', - node__service__pb2.GetInferenceResultRequest.SerializeToString, - node__service__pb2.InferenceResult.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - _registered_method=True - ) - - @staticmethod - def CollectTopology(request, target, options=(), channel_credentials=None, call_credentials=None, insecure=False, compression=None, wait_for_ready=None, timeout=None, metadata=None): - return grpc.experimental.unary_unary( - request, - target, - '/node_service.NodeService/CollectTopology', - node__service__pb2.CollectTopologyRequest.SerializeToString, - node__service__pb2.Topology.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - _registered_method=True - ) - - @staticmethod - def SendResult(request, target, options=(), channel_credentials=None, call_credentials=None, insecure=False, compression=None, wait_for_ready=None, timeout=None, metadata=None): - return grpc.experimental.unary_unary( - request, - target, - '/node_service.NodeService/SendResult', - node__service__pb2.SendResultRequest.SerializeToString, - node__service__pb2.Empty.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - _registered_method=True - ) - - @staticmethod - def SendOpaqueStatus(request, target, options=(), channel_credentials=None, call_credentials=None, insecure=False, compression=None, wait_for_ready=None, timeout=None, metadata=None): - return grpc.experimental.unary_unary( - request, - target, - '/node_service.NodeService/SendOpaqueStatus', - node__service__pb2.SendOpaqueStatusRequest.SerializeToString, - node__service__pb2.Empty.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - _registered_method=True - ) diff --git a/build/lib/exo/networking/grpc/test_grpc_discovery.py b/build/lib/exo/networking/grpc/test_grpc_discovery.py deleted file mode 100644 index 13372bbb4..000000000 --- a/build/lib/exo/networking/grpc/test_grpc_discovery.py +++ /dev/null @@ -1,22 +0,0 @@ -import asyncio -import unittest -from .grpc_discovery import GRPCDiscovery - - -class TestGRPCDiscovery(unittest.IsolatedAsyncioTestCase): - async def asyncSetUp(self): - self.node1 = GRPCDiscovery("node1", 50051, 5678, 5679) - self.node2 = GRPCDiscovery("node2", 50052, 5679, 5678) - await self.node1.start() - await self.node2.start() - - async def asyncTearDown(self): - await self.node1.stop() - await self.node2.stop() - - async def test_discovery(self): - await asyncio.sleep(4) - - # Check discovered peers - print("Node1 Peers:", ", ".join([f"{peer_id}: {peer}" for peer_id, peer in self.node1.known_peers.items()])) - print("Node2 Peers:", ", ".join([f"{peer_id}: {peer}" for peer_id, peer in self.node2.known_peers.items()])) diff --git a/build/lib/exo/networking/peer_handle.py b/build/lib/exo/networking/peer_handle.py deleted file mode 100644 index cf232d006..000000000 --- a/build/lib/exo/networking/peer_handle.py +++ /dev/null @@ -1,48 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Optional, Tuple, List -import numpy as np -from exo.inference.shard import Shard -from exo.topology.device_capabilities import DeviceCapabilities -from exo.topology.topology import Topology - - -class PeerHandle(ABC): - @abstractmethod - def id(self) -> str: - pass - - @abstractmethod - def device_capabilities(self) -> DeviceCapabilities: - pass - - @abstractmethod - async def connect(self) -> None: - pass - - @abstractmethod - async def is_connected(self) -> bool: - pass - - @abstractmethod - async def disconnect(self) -> None: - pass - - @abstractmethod - async def send_prompt(self, shard: Shard, prompt: str, image_str: Optional[str] = None, request_id: Optional[str] = None, inference_state: Optional[str] = None) -> Optional[np.array]: - pass - - @abstractmethod - async def send_tensor(self, shard: Shard, tensor: np.array, request_id: Optional[str] = None, inference_state: Optional[str] = None) -> Optional[np.array]: - pass - - @abstractmethod - async def get_inference_result(self, request_id: str) -> Tuple[Optional[np.ndarray], bool]: - pass - - @abstractmethod - async def collect_topology(self, visited: set[str], max_depth: int) -> Topology: - pass - - @abstractmethod - async def send_result(self, request_id: str, result: List[int], is_finished: bool) -> None: - pass diff --git a/build/lib/exo/networking/server.py b/build/lib/exo/networking/server.py deleted file mode 100644 index 8e7f9812f..000000000 --- a/build/lib/exo/networking/server.py +++ /dev/null @@ -1,11 +0,0 @@ -from abc import ABC, abstractmethod - - -class Server(ABC): - @abstractmethod - async def start(self) -> None: - pass - - @abstractmethod - async def stop(self) -> None: - pass diff --git a/build/lib/exo/orchestration/__init__.py b/build/lib/exo/orchestration/__init__.py deleted file mode 100644 index 478af5370..000000000 --- a/build/lib/exo/orchestration/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .node import Node -from .standard_node import StandardNode - -__all__ = ["Node", "StandardNode"] diff --git a/build/lib/exo/orchestration/node.py b/build/lib/exo/orchestration/node.py deleted file mode 100644 index 60b729748..000000000 --- a/build/lib/exo/orchestration/node.py +++ /dev/null @@ -1,47 +0,0 @@ -from typing import Optional, Tuple, List -import numpy as np -from abc import ABC, abstractmethod -from exo.helpers import AsyncCallbackSystem -from exo.inference.shard import Shard -from exo.topology.topology import Topology - - -class Node(ABC): - @abstractmethod - async def start(self, wait_for_peers: int = 0) -> None: - pass - - @abstractmethod - async def stop(self) -> None: - pass - - @abstractmethod - async def process_prompt(self, shard: Shard, prompt: str, image_str: Optional[str] = None, request_id: Optional[str] = None, inference_state: Optional[str] = None) -> Optional[np.ndarray]: - pass - - @abstractmethod - async def process_tensor(self, shard: Shard, tensor: np.ndarray, request_id: Optional[str] = None, inference_state: Optional[str] = None) -> Optional[np.ndarray]: - pass - - @abstractmethod - async def get_inference_result(self, request_id: str) -> Tuple[Optional[np.ndarray], bool]: - pass - - @abstractmethod - async def collect_topology(self, visited: set[str] = set(), max_depth: int = 2) -> Topology: - pass - - @property - @abstractmethod - def current_topology(self) -> Topology: - pass - - @property - @abstractmethod - def on_token(self) -> AsyncCallbackSystem[str, Tuple[str, List[int], bool]]: - pass - - @property - @abstractmethod - def on_opaque_status(self) -> AsyncCallbackSystem[str, Tuple[str, str]]: - pass diff --git a/build/lib/exo/orchestration/standard_node.py b/build/lib/exo/orchestration/standard_node.py deleted file mode 100644 index b968b6597..000000000 --- a/build/lib/exo/orchestration/standard_node.py +++ /dev/null @@ -1,385 +0,0 @@ -import numpy as np -import json -import asyncio -import uuid -import time -import traceback -from typing import List, Dict, Optional, Tuple, Union -from exo.networking import Discovery, PeerHandle, Server -from exo.inference.inference_engine import InferenceEngine, Shard -from .node import Node -from exo.topology.topology import Topology -from exo.topology.device_capabilities import device_capabilities -from exo.topology.partitioning_strategy import Partition, PartitioningStrategy, map_partitions_to_shards -from exo import DEBUG -from exo.helpers import AsyncCallbackSystem -from exo.viz.topology_viz import TopologyViz -from exo.download.hf.hf_helpers import RepoProgressEvent - - -class StandardNode(Node): - def __init__( - self, - _id: str, - server: Server, - inference_engine: InferenceEngine, - discovery: Discovery, - partitioning_strategy: PartitioningStrategy = None, - max_generate_tokens: int = 1024, - chatgpt_api_endpoints: List[str] = [], - web_chat_urls: List[str] = [], - disable_tui: Optional[bool] = False, - topology_viz: Optional[TopologyViz] = None, - ): - self.id = _id - self.inference_engine = inference_engine - self.server = server - self.discovery = discovery - self.partitioning_strategy = partitioning_strategy - self.peers: List[PeerHandle] = {} - self.topology: Topology = Topology() - self.device_capabilities = device_capabilities() - self.buffered_token_output: Dict[str, Tuple[List[int], bool]] = {} - self.max_generate_tokens = max_generate_tokens - self.topology_viz = topology_viz - self._on_token = AsyncCallbackSystem[str, Tuple[str, List[int], bool]]() - self._on_opaque_status = AsyncCallbackSystem[str, Tuple[str, str]]() - self._on_opaque_status.register("node_status").on_next(self.on_node_status) - self.node_download_progress: Dict[str, RepoProgressEvent] = {} - - async def start(self, wait_for_peers: int = 0) -> None: - await self.server.start() - await self.discovery.start() - await self.update_peers(wait_for_peers) - await self.collect_topology() - if DEBUG >= 2: print(f"Collected topology: {self.topology}") - asyncio.create_task(self.periodic_topology_collection(5)) - - async def stop(self) -> None: - await self.discovery.stop() - await self.server.stop() - - def on_node_status(self, request_id, opaque_status): - try: - status_data = json.loads(opaque_status) - if status_data.get("type", "") == "node_status": - if status_data.get("status", "").startswith("start_"): - self.current_topology.active_node_id = status_data.get("node_id") - elif status_data.get("status", "").startswith("end_"): - if status_data.get("node_id") == self.current_topology.active_node_id: - self.current_topology.active_node_id = None - download_progress = None - if status_data.get("type", "") == "download_progress": - if DEBUG >= 5: print(f"Download progress from {status_data.get('node_id')}: {status_data.get('progress')}") - download_progress = RepoProgressEvent.from_dict(status_data.get('progress')) - self.node_download_progress[status_data.get('node_id')] = download_progress - if self.topology_viz: - self.topology_viz.update_visualization(self.current_topology, self.partitioning_strategy.partition(self.current_topology), self.id, self.node_download_progress) - except Exception as e: - if DEBUG >= 1: print(f"Error updating visualization: {e}") - if DEBUG >= 1: traceback.print_exc() - - async def process_prompt(self, base_shard: Shard, prompt: str, image_str: Optional[str] = None, request_id: Optional[str] = None, inference_state: Optional[str] = None) -> Optional[np.ndarray]: - shard = self.get_current_shard(base_shard) - asyncio.create_task( - self.broadcast_opaque_status( - request_id, - json.dumps({ - "type": "node_status", - "node_id": self.id, - "status": "start_process_prompt", - "base_shard": base_shard.to_dict(), - "shard": shard.to_dict(), - "prompt": prompt, - "image_str": image_str, - "inference_state": inference_state, - "request_id": request_id, - }), - ) - ) - start_time = time.perf_counter_ns() - resp = await self._process_prompt(base_shard, prompt, image_str, request_id, inference_state) - end_time = time.perf_counter_ns() - elapsed_time_ns = end_time - start_time - asyncio.create_task( - self.broadcast_opaque_status( - request_id, - json.dumps({ - "type": "node_status", - "node_id": self.id, - "status": "end_process_prompt", - "base_shard": base_shard.to_dict(), - "shard": shard.to_dict(), - "prompt": prompt, - "image_str": image_str, - "inference_state": inference_state, - "request_id": request_id, - "elapsed_time_ns": elapsed_time_ns, - "result_size": resp.size if resp is not None else 0, - }), - ) - ) - return resp - - async def _process_prompt(self, base_shard: Shard, prompt: str, image_str: Optional[str] = None, request_id: Optional[str] = None, inference_state: Optional[str] = None) -> Optional[np.ndarray]: - if request_id is None: - request_id = str(uuid.uuid4()) - if request_id not in self.buffered_token_output: - self.buffered_token_output[request_id] = ([], False) - shard = self.get_current_shard(base_shard) - - if DEBUG >= 2: print(f"[{request_id}] process prompt: {base_shard=} {shard=} {prompt=} {image_str=}") - if shard.start_layer != 0: - if DEBUG >= 2: print(f"[{request_id}] forwarding to next shard: {base_shard=} {shard=} {prompt=} {image_str=}") - await self.forward_to_next_shard(shard, prompt, request_id, image_str=image_str, inference_state=inference_state) - return - - result, inference_state, is_finished = await self.inference_engine.infer_prompt(request_id, shard, prompt, image_str, inference_state=inference_state) - is_finished = is_finished or len(self.buffered_token_output[request_id][0]) >= self.max_generate_tokens - if is_finished: - self.buffered_token_output[request_id] = (self.buffered_token_output[request_id][0], True) - asyncio.create_task(self.broadcast_result(request_id, self.buffered_token_output[request_id][0], is_finished)) # TODO: this is n^2 communication complexity - - if result.size == 1: - self.buffered_token_output[request_id][0].append(result.item()) - self.trigger_on_token_callbacks(request_id, self.buffered_token_output[request_id][0], is_finished) - - if DEBUG >= 2: print(f"[{request_id}] result size: {result.size}, is finished: {is_finished}, buffered tokens: {len(self.buffered_token_output[request_id][0])}") - - if not is_finished: - asyncio.create_task(self.forward_to_next_shard(shard, result, request_id, image_str=image_str, inference_state=inference_state)) - - return np.array(self.buffered_token_output[request_id][0]) if len(self.buffered_token_output[request_id][0]) > 0 else None - - async def process_tensor( - self, - base_shard: Shard, - tensor: np.ndarray, - request_id: Optional[str] = None, - inference_state: Optional[str] = None, - ) -> Optional[np.ndarray]: - shard = self.get_current_shard(base_shard) - asyncio.create_task( - self.broadcast_opaque_status( - request_id, - json.dumps({ - "type": "node_status", - "node_id": self.id, - "status": "start_process_tensor", - "base_shard": base_shard.to_dict(), - "shard": shard.to_dict(), - "tensor_size": tensor.size, - "tensor_shape": tensor.shape, - "request_id": request_id, - "inference_state": inference_state, - }), - ) - ) - start_time = time.perf_counter_ns() - resp = await self._process_tensor(shard, tensor, request_id, inference_state) - end_time = time.perf_counter_ns() - elapsed_time_ns = end_time - start_time - asyncio.create_task( - self.broadcast_opaque_status( - request_id, - json.dumps({ - "type": "node_status", - "node_id": self.id, - "status": "end_process_tensor", - "base_shard": base_shard.to_dict(), - "shard": shard.to_dict(), - "request_id": request_id, - "elapsed_time_ns": elapsed_time_ns, - "result_size": resp.size if resp is not None else 0, - }), - ) - ) - return resp - - async def _process_tensor( - self, - base_shard: Shard, - tensor: np.ndarray, - request_id: Optional[str] = None, - inference_state: Optional[str] = None, - ) -> Optional[np.ndarray]: - if request_id is None: - request_id = str(uuid.uuid4()) - if request_id not in self.buffered_token_output: - self.buffered_token_output[request_id] = ([], False) - shard = self.get_current_shard(base_shard) - - try: - if DEBUG >= 1: print(f"[{request_id}] process_tensor: {tensor.size=} {tensor.shape=}") - result, inference_state, is_finished = await self.inference_engine.infer_tensor(request_id, shard, tensor, inference_state=inference_state) - is_finished = is_finished or len(self.buffered_token_output[request_id][0]) >= self.max_generate_tokens - if is_finished: - self.buffered_token_output[request_id] = (self.buffered_token_output[request_id][0], True) - asyncio.create_task(self.broadcast_result(request_id, self.buffered_token_output[request_id][0], is_finished)) # TODO: this is n^2 communication complexity - - if result.size == 1: # we got a new token out - self.buffered_token_output[request_id][0].append(result.item()) - self.trigger_on_token_callbacks(request_id, self.buffered_token_output[request_id][0], is_finished) - if DEBUG >= 2: print(f"[{request_id}] result size: {result.size}, is finished: {is_finished}, buffered tokens: {len(self.buffered_token_output[request_id][0])}") - - if not is_finished: - asyncio.create_task(self.forward_to_next_shard(shard, result, request_id, inference_state=inference_state)) - - return np.array(self.buffered_token_output[request_id][0]) if len(self.buffered_token_output[request_id][0]) > 0 else None - except Exception as e: - print(f"Error processing tensor for shard {shard}: {e}") - traceback.print_exc() - return None - - async def forward_to_next_shard( - self, - base_shard: Shard, - tensor_or_prompt: Union[np.ndarray, str], - request_id: str, - image_str: Optional[str] = None, - inference_state: Optional[str] = None, - ) -> None: - if not self.partitioning_strategy: - if DEBUG >= 1: print("No partitioning strategy found. Skipping forward.") - return - shard = self.get_current_shard(base_shard) - - partitions = self.partitioning_strategy.partition(self.topology) - shards = map_partitions_to_shards(self.partitioning_strategy.partition(self.topology), base_shard.n_layers, base_shard.model_id) - current_partition_index = next((i for i, p in enumerate(partitions) if p.node_id == self.id), None) - if DEBUG >= 1: print(f"Current partition index: {current_partition_index}") - if current_partition_index is not None: - next_partition_index = (current_partition_index+1) % len(partitions) - next_partition: Partition = partitions[next_partition_index] - next_shard = shards[next_partition_index] - if DEBUG >= 2: print(f"Computed next from: {shard}, {self.topology}. Next partition: {next_partition}") - - if next_partition.node_id == self.id: - if isinstance(tensor_or_prompt, np.ndarray): - await self.process_tensor(shard, tensor_or_prompt, request_id, inference_state=inference_state) - else: - await self.process_prompt(shard, tensor_or_prompt, image_str, request_id, inference_state=inference_state) - return - - target_peer = next((p for p in self.peers if p.id() == next_partition.node_id), None) - if not target_peer: - raise ValueError(f"Peer for {next_partition} not found") - - if DEBUG >= 1: print(f"Sending tensor_or_prompt to {target_peer.id()}: {tensor_or_prompt}") - - if isinstance(tensor_or_prompt, np.ndarray): - await target_peer.send_tensor(next_shard, tensor_or_prompt, request_id=request_id, inference_state=inference_state) - else: - await target_peer.send_prompt(next_shard, tensor_or_prompt, image_str=image_str, request_id=request_id, inference_state=inference_state) - - def get_current_shard(self, base_shard: Shard) -> Shard: - partitions = self.partitioning_strategy.partition(self.topology) - shards = map_partitions_to_shards(partitions, base_shard.n_layers, base_shard.model_id) - current_partition_index = next((i for i, p in enumerate(partitions) if p.node_id == self.id), None) - if current_partition_index is None: - raise ValueError(f"No current partition found for node: {self.id}") - return shards[current_partition_index] - - async def update_peers(self, wait_for_peers: int = 0) -> None: - self.peers = await self.discovery.discover_peers(wait_for_peers) - for peer in self.peers: - is_connected = await peer.is_connected() - if DEBUG >= 2 and is_connected: - print(f"Already connected to {peer.id()}: {is_connected}") - if not is_connected: - if DEBUG >= 2: print(f"Connecting to {peer.id()}...") - await peer.connect() - if DEBUG >= 1: print(f"Connected to peer {peer.device_capabilities()} ({peer.id()=})") - - async def periodic_topology_collection(self, interval: int): - while True: - await asyncio.sleep(interval) - try: - await self.update_peers() - await self.collect_topology() - except Exception as e: - print(f"Error collecting topology: {e}") - traceback.print_exc() - - async def get_inference_result(self, request_id: str) -> Tuple[Optional[np.ndarray], bool]: - if request_id not in self.buffered_token_output: - return None, False - return np.array(self.buffered_token_output[request_id][0]), self.buffered_token_output[request_id][1] - - async def collect_topology(self, visited: set[str] = set(), max_depth: int = 4) -> Topology: - next_topology = Topology() - next_topology.update_node(self.id, self.device_capabilities) - - if DEBUG >= 2: print(f"Collecting topology {max_depth=} {visited=}") - - prev_visited = visited.copy() - # TODO: should we add our own peer id here? - visited.update(p.id() for p in self.peers) - - for peer in self.peers: - next_topology.update_node(peer.id(), peer.device_capabilities()) - next_topology.add_edge(self.id, peer.id()) - - if peer.id() in prev_visited: - continue - - if max_depth <= 0: - if DEBUG >= 2: print("Max depth reached. Skipping...") - continue - - try: - other_topology = await peer.collect_topology(visited, max_depth=max_depth - 1) - if DEBUG >= 2: print(f"Collected topology from: {peer.id()}: {other_topology}") - self.topology.merge(other_topology) - except Exception as e: - print(f"Error collecting topology from {peer.id()}: {e}") - - next_topology.active_node_id = self.topology.active_node_id # this is not so clean. - self.topology = next_topology - if self.topology_viz: - self.topology_viz.update_visualization(self.current_topology, self.partitioning_strategy.partition(self.current_topology), self.id) - return next_topology - - @property - def on_token(self) -> AsyncCallbackSystem[str, Tuple[str, List[int], bool]]: - return self._on_token - - @property - def on_opaque_status(self) -> AsyncCallbackSystem[str, Tuple[str, str]]: - return self._on_opaque_status - - def trigger_on_token_callbacks(self, request_id: str, tokens: List[int], is_finished: bool) -> None: - if DEBUG >= 2: print(f"Triggering all on_token callbacks with {request_id=} num_tokens={len(tokens)} {is_finished=}") - self.on_token.trigger_all(request_id, tokens, is_finished) - - async def broadcast_result(self, request_id: str, result: List[int], is_finished: bool) -> None: - async def send_result_to_peer(peer): - try: - await asyncio.wait_for(peer.send_result(request_id, result, is_finished), timeout=15.0) - except asyncio.TimeoutError: - print(f"Timeout broadcasting result to {peer.id()}") - except Exception as e: - print(f"Error broadcasting result to {peer.id()}: {e}") - traceback.print_exc() - - await asyncio.gather(*[send_result_to_peer(peer) for peer in self.peers], return_exceptions=True) - - async def broadcast_opaque_status(self, request_id: str, status: str) -> None: - if DEBUG >= 5: print(f"Broadcasting opaque status: {request_id=} {status=}") - - async def send_status_to_peer(peer): - try: - await asyncio.wait_for(peer.send_opaque_status(request_id, status), timeout=15.0) - except asyncio.TimeoutError: - print(f"Timeout sending opaque status to {peer.id()}") - except Exception as e: - print(f"Error sending opaque status to {peer.id()}: {e}") - traceback.print_exc() - - await asyncio.gather(*[send_status_to_peer(peer) for peer in self.peers], return_exceptions=True) - # in the case of opaque status, we also want to receive our own opaque statuses - self.on_opaque_status.trigger_all(request_id, status) - - @property - def current_topology(self) -> Topology: - return self.topology diff --git a/build/lib/exo/orchestration/test_node.py b/build/lib/exo/orchestration/test_node.py deleted file mode 100644 index 230ef0cf6..000000000 --- a/build/lib/exo/orchestration/test_node.py +++ /dev/null @@ -1,57 +0,0 @@ -import unittest -from unittest.mock import Mock, AsyncMock -import numpy as np - -from .standard_node import StandardNode -from exo.networking.peer_handle import PeerHandle - - -class TestNode(unittest.IsolatedAsyncioTestCase): - def setUp(self): - self.mock_inference_engine = AsyncMock() - self.mock_server = AsyncMock() - self.mock_server.start = AsyncMock() - self.mock_server.stop = AsyncMock() - self.mock_discovery = AsyncMock() - self.mock_discovery.start = AsyncMock() - self.mock_discovery.stop = AsyncMock() - mock_peer1 = Mock(spec=PeerHandle) - mock_peer1.id.return_value = "peer1" - mock_peer2 = Mock(spec=PeerHandle) - mock_peer2.id.return_value = "peer2" - self.mock_discovery.discover_peers = AsyncMock(return_value=[mock_peer1, mock_peer2]) - - self.node = StandardNode("test_node", self.mock_server, self.mock_inference_engine, "localhost", 50051, self.mock_discovery) - - async def asyncSetUp(self): - await self.node.start() - - async def asyncTearDown(self): - await self.node.stop() - - async def test_node_initialization(self): - self.assertEqual(self.node.node_id, "test_node") - self.assertEqual(self.node.host, "localhost") - self.assertEqual(self.node.port, 50051) - - async def test_node_start(self): - self.mock_server.start.assert_called_once_with("localhost", 50051) - - async def test_node_stop(self): - await self.node.stop() - self.mock_server.stop.assert_called_once() - - async def test_discover_and_connect_to_peers(self): - await self.node.discover_and_connect_to_peers() - self.assertEqual(len(self.node.peers), 2) - self.assertIn("peer1", map(lambda p: p.id(), self.node.peers)) - self.assertIn("peer2", map(lambda p: p.id(), self.node.peers)) - - async def test_process_tensor_calls_inference_engine(self): - mock_peer = Mock() - self.node.peers = [mock_peer] - - input_tensor = np.array([69, 1, 2]) - await self.node.process_tensor(input_tensor, None) - - self.node.inference_engine.process_shard.assert_called_once_with(input_tensor) diff --git a/build/lib/exo/stats/__init__.py b/build/lib/exo/stats/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/build/lib/exo/stats/metrics.py b/build/lib/exo/stats/metrics.py deleted file mode 100644 index f29533ff7..000000000 --- a/build/lib/exo/stats/metrics.py +++ /dev/null @@ -1,29 +0,0 @@ -from exo.orchestration import Node -from prometheus_client import start_http_server, Counter, Histogram -import json - -# Create metrics to track time spent and requests made. -PROCESS_PROMPT_COUNTER = Counter("process_prompt_total", "Total number of prompts processed", ["node_id"]) -PROCESS_TENSOR_COUNTER = Counter("process_tensor_total", "Total number of tensors processed", ["node_id"]) -PROCESS_TENSOR_TIME = Histogram("process_tensor_seconds", "Time spent processing tensor", ["node_id"]) - - -def start_metrics_server(node: Node, port: int): - start_http_server(port) - - def _on_opaque_status(request_id, opaque_status: str): - status_data = json.loads(opaque_status) - _type = status_data.get("type", "") - node_id = status_data.get("node_id", "") - if _type != "node_status": - return - status = status_data.get("status", "") - - if status == "end_process_prompt": - PROCESS_PROMPT_COUNTER.labels(node_id=node_id).inc() - elif status == "end_process_tensor": - elapsed_time_ns = status_data.get("elapsed_time_ns", 0) - PROCESS_TENSOR_COUNTER.labels(node_id=node_id).inc() - PROCESS_TENSOR_TIME.labels(node_id=node_id).observe(elapsed_time_ns/1e9) # Convert ns to seconds - - node.on_opaque_status.register("stats").on_next(_on_opaque_status) diff --git a/build/lib/exo/test_callbacks.py b/build/lib/exo/test_callbacks.py deleted file mode 100644 index c10083d6e..000000000 --- a/build/lib/exo/test_callbacks.py +++ /dev/null @@ -1,50 +0,0 @@ -import asyncio -from typing import Any, Callable -from exo.helpers import AsyncCallbackSystem, AsyncCallback - - -# Usage example -async def main() -> None: - callback_system = AsyncCallbackSystem[str, Any]() - - # Register callbacks - callback1 = callback_system.register("callback1") - callback2 = callback_system.register("callback2") - - def on_next_callback(name: str) -> Callable[..., None]: - def callback(*args: Any) -> None: - print(f"{name} received values: {args}") - - return callback - - callback1.on_next(on_next_callback("Callback1")) - callback2.on_next(on_next_callback("Callback2")) - - async def wait_for_callback(name: str, callback: AsyncCallback[Any], condition: Callable[..., bool]) -> None: - try: - result = await callback.wait(condition, timeout=2) - print(f"{name} wait completed with result: {result}") - except asyncio.TimeoutError: - print(f"{name} wait timed out") - - # Trigger all callbacks at once - callback_system.trigger_all("Hello", 42, True) - - # Wait for all callbacks with different conditions - await asyncio.gather( - wait_for_callback("Callback1", callback1, lambda msg, num, flag: isinstance(msg, str) and num > 0), - wait_for_callback("Callback2", callback2, lambda msg, num, flag: flag is True), - ) - - # Trigger individual callback - callback_system.trigger("callback2", "World", -10, False) - - # Demonstrate timeout - new_callback = callback_system.register("new_callback") - new_callback.on_next(on_next_callback("NewCallback")) - await wait_for_callback("NewCallback", new_callback, lambda msg, num, flag: num > 100) - - callback_system.trigger("callback2", "World", 200, False) - - -asyncio.run(main()) diff --git a/build/lib/exo/topology/__init__.py b/build/lib/exo/topology/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/build/lib/exo/topology/device_capabilities.py b/build/lib/exo/topology/device_capabilities.py deleted file mode 100644 index 51db53ef2..000000000 --- a/build/lib/exo/topology/device_capabilities.py +++ /dev/null @@ -1,207 +0,0 @@ -from exo import DEBUG -from dataclasses import dataclass, asdict -import subprocess -import psutil - -TFLOPS = 1.00 - - -@dataclass -class DeviceFlops: - # units of TFLOPS - fp32: float - fp16: float - int8: float - - def __str__(self): - return f"fp32: {self.fp32 / TFLOPS:.2f} TFLOPS, fp16: {self.fp16 / TFLOPS:.2f} TFLOPS, int8: {self.int8 / TFLOPS:.2f} TFLOPS" - - def to_dict(self): - return asdict(self) - - -@dataclass -class DeviceCapabilities: - model: str - chip: str - memory: int - flops: DeviceFlops - - def __str__(self): - return f"Model: {self.model}. Chip: {self.chip}. Memory: {self.memory}MB. Flops: {self.flops}" - - def __post_init__(self): - if isinstance(self.flops, dict): - self.flops = DeviceFlops(**self.flops) - - def to_dict(self): - return {"model": self.model, "chip": self.chip, "memory": self.memory, "flops": self.flops.to_dict()} - - -UNKNOWN_DEVICE_CAPABILITIES = DeviceCapabilities(model="Unknown Model", chip="Unknown Chip", memory=0, flops=DeviceFlops(fp32=0, fp16=0, int8=0)) - -CHIP_FLOPS = { - # Source: https://www.cpu-monkey.com - # Note: currently no distinction between variants of M3 Max and M3 Pro, we pick the lower one to be conservative - ### M chips - "Apple M1": DeviceFlops(fp32=2.29*TFLOPS, fp16=4.58*TFLOPS, int8=9.16*TFLOPS), - "Apple M1 Pro": DeviceFlops(fp32=5.30*TFLOPS, fp16=10.60*TFLOPS, int8=21.20*TFLOPS), - "Apple M1 Max": DeviceFlops(fp32=10.60*TFLOPS, fp16=21.20*TFLOPS, int8=42.40*TFLOPS), - "Apple M1 Ultra": DeviceFlops(fp32=21.20*TFLOPS, fp16=42.40*TFLOPS, int8=84.80*TFLOPS), - "Apple M2": DeviceFlops(fp32=3.55*TFLOPS, fp16=7.10*TFLOPS, int8=14.20*TFLOPS), - "Apple M2 Pro": DeviceFlops(fp32=5.68*TFLOPS, fp16=11.36*TFLOPS, int8=22.72*TFLOPS), - "Apple M2 Max": DeviceFlops(fp32=13.49*TFLOPS, fp16=26.98*TFLOPS, int8=53.96*TFLOPS), - "Apple M2 Ultra": DeviceFlops(fp32=26.98*TFLOPS, fp16=53.96*TFLOPS, int8=107.92*TFLOPS), - "Apple M3": DeviceFlops(fp32=3.55*TFLOPS, fp16=7.10*TFLOPS, int8=14.20*TFLOPS), - "Apple M3 Max": DeviceFlops(fp32=14.20*TFLOPS, fp16=28.40*TFLOPS, int8=56.80*TFLOPS), - "Apple M3 Pro": DeviceFlops(fp32=4.97*TFLOPS, fp16=9.94*TFLOPS, int8=19.88*TFLOPS), - "Apple M4": DeviceFlops(fp32=3.55*TFLOPS, fp16=7.10*TFLOPS, int8=14.20*TFLOPS), - ### A chips - "Apple A13 Bionic": DeviceFlops(fp32=0.69*TFLOPS, fp16=1.38*TFLOPS, int8=2.76*TFLOPS), - "Apple A14 Bionic": DeviceFlops(fp32=0.75*TFLOPS, fp16=1.50*TFLOPS, int8=3.00*TFLOPS), - "Apple A15 Bionic": DeviceFlops(fp32=1.37*TFLOPS, fp16=2.74*TFLOPS, int8=5.48*TFLOPS), - "Apple A16 Bionic": DeviceFlops(fp32=1.79*TFLOPS, fp16=3.58*TFLOPS, int8=7.16*TFLOPS), - "Apple A17 Pro": DeviceFlops(fp32=2.15*TFLOPS, fp16=4.30*TFLOPS, int8=8.60*TFLOPS), - ### NVIDIA GPUs - # RTX 40 series - "NVIDIA GEFORCE RTX 4090": DeviceFlops(fp32=82.58*TFLOPS, fp16=165.16*TFLOPS, int8=330.32*TFLOPS), - "NVIDIA GEFORCE RTX 4080": DeviceFlops(fp32=48.74*TFLOPS, fp16=97.48*TFLOPS, int8=194.96*TFLOPS), - "NVIDIA GEFORCE RTX 4080 SUPER": DeviceFlops(fp32=52.0*TFLOPS, fp16=104.0*TFLOPS, int8=208.0*TFLOPS), - "NVIDIA GEFORCE RTX 4070 TI SUPER": DeviceFlops(fp32=40.0*TFLOPS, fp16=80.0*TFLOPS, int8=160.0*TFLOPS), - "NVIDIA GEFORCE RTX 4070 TI": DeviceFlops(fp32=39.43*TFLOPS, fp16=78.86*TFLOPS, int8=157.72*TFLOPS), - "NVIDIA GEFORCE RTX 4070 SUPER": DeviceFlops(fp32=30.0*TFLOPS, fp16=60.0*TFLOPS, int8=120.0*TFLOPS), - "NVIDIA GEFORCE RTX 4070": DeviceFlops(fp32=29.0*TFLOPS, fp16=58.0*TFLOPS, int8=116.0*TFLOPS), - "NVIDIA GEFORCE RTX 4060 TI 16GB": DeviceFlops(fp32=22.0*TFLOPS, fp16=44.0*TFLOPS, int8=88.0*TFLOPS), - # RTX 30 series - "NVIDIA GEFORCE RTX 3050": DeviceFlops(fp32=9.11*TFLOPS, fp16=18.22*TFLOPS, int8=36.44*TFLOPS), - "NVIDIA GEFORCE RTX 3060": DeviceFlops(fp32=13.0*TFLOPS, fp16=26.0*TFLOPS, int8=52.0*TFLOPS), - "NVIDIA GEFORCE RTX 3060 TI": DeviceFlops(fp32=16.2*TFLOPS, fp16=32.4*TFLOPS, int8=64.8*TFLOPS), - "NVIDIA GEFORCE RTX 3070": DeviceFlops(fp32=20.3*TFLOPS, fp16=40.6*TFLOPS, int8=81.2*TFLOPS), - "NVIDIA GEFORCE RTX 3070 TI": DeviceFlops(fp32=21.8*TFLOPS, fp16=43.6*TFLOPS, int8=87.2*TFLOPS), - "NVIDIA GEFORCE RTX 3080 (10 GB)": DeviceFlops(fp32=29.8*TFLOPS, fp16=59.6*TFLOPS, int8=119.2*TFLOPS), - "NVIDIA GEFORCE RTX 3080 (12 GB)": DeviceFlops(fp32=30.6*TFLOPS, fp16=61.2*TFLOPS, int8=122.4*TFLOPS), - "NVIDIA GEFORCE RTX 3080 TI": DeviceFlops(fp32=34.1*TFLOPS, fp16=68.2*TFLOPS, int8=136.4*TFLOPS), - "NVIDIA GEFORCE RTX 3090": DeviceFlops(fp32=35.6*TFLOPS, fp16=71.2*TFLOPS, int8=142.4*TFLOPS), - "NVIDIA GEFORCE RTX 3090 TI": DeviceFlops(fp32=40.0*TFLOPS, fp16=80.0*TFLOPS, int8=160.0*TFLOPS), - # RTX 20 series - "NVIDIA GEFORCE RTX 2060": DeviceFlops(fp32=6.45*TFLOPS, fp16=12.9*TFLOPS, int8=25.8*TFLOPS), - "NVIDIA GEFORCE RTX 2060 SUPER": DeviceFlops(fp32=7.2*TFLOPS, fp16=14.4*TFLOPS, int8=28.8*TFLOPS), - "NVIDIA GEFORCE RTX 2070": DeviceFlops(fp32=7.46*TFLOPS, fp16=14.93*TFLOPS, int8=29.86*TFLOPS), - "NVIDIA GEFORCE RTX 2070 SUPER": DeviceFlops(fp32=9.06*TFLOPS, fp16=18.12*TFLOPS, int8=36.24*TFLOPS), - "NVIDIA GEFORCE RTX 2080": DeviceFlops(fp32=10.07*TFLOPS, fp16=20.14*TFLOPS, int8=40.28*TFLOPS), - "NVIDIA GEFORCE RTX 2080 SUPER": DeviceFlops(fp32=11.15*TFLOPS, fp16=22.30*TFLOPS, int8=44.60*TFLOPS), - "NVIDIA TITAN RTX": DeviceFlops(fp32=16.31*TFLOPS, fp16=32.62*TFLOPS, int8=65.24*TFLOPS), - # QUATRO RTX Ampere series - "NVIDIA QUATRO RTX A2000": DeviceFlops(fp32=7.99*TFLOPS, fp16=7.99*TFLOPS, int8=31.91*TFLOPS), - "NVIDIA QUATRO RTX A4000": DeviceFlops(fp32=19.17*TFLOPS, fp16=19.17*TFLOPS, int8=76.68*TFLOPS), - "NVIDIA QUATRO RTX A4500": DeviceFlops(fp32=23.65*TFLOPS, fp16=23.65*TFLOPS, int8=94.6*TFLOPS), - "NVIDIA QUATRO RTX A5000": DeviceFlops(fp32=27.8*TFLOPS, fp16=27.8*TFLOPS, int8=111.2*TFLOPS), - "NVIDIA QUATRO RTX A6000": DeviceFlops(fp32=38.71*TFLOPS, fp16=38.71*TFLOPS, int8=154.84*TFLOPS), - # Common Server GPUs - "NVIDIA A40 48GB PCIE": DeviceFlops(fp32=37.4*TFLOPS, fp16=149.7*TFLOPS, int8=299.3*TFLOPS), - "NVIDIA A100 40GB PCIE": DeviceFlops(fp32=19.5*TFLOPS, fp16=312.0*TFLOPS, int8=624.0*TFLOPS), - "NVIDIA A800 40GB PCIE": DeviceFlops(fp32=19.5*TFLOPS, fp16=312.0*TFLOPS, int8=624.0*TFLOPS), - "NVIDIA A100 80GB PCIE": DeviceFlops(fp32=19.5*TFLOPS, fp16=312.0*TFLOPS, int8=624.0*TFLOPS), - "NVIDIA A800 80GB PCIE": DeviceFlops(fp32=19.5*TFLOPS, fp16=312.0*TFLOPS, int8=624.0*TFLOPS), - "NVIDIA A100 80GB SXM": DeviceFlops(fp32=19.5*TFLOPS, fp16=312.0*TFLOPS, int8=624.0*TFLOPS), - "NVIDIA A800 80GB SXM": DeviceFlops(fp32=19.5*TFLOPS, fp16=312.0*TFLOPS, int8=624.0*TFLOPS), - "NVIDIA T1000 8GB": DeviceFlops(fp32=2.5 * TFLOPS, fp16=5.0 * TFLOPS, int8=10.0 * TFLOPS), - "Quadro M2000": DeviceFlops(fp32=0.5 * TFLOPS, fp16=1.0 * TFLOPS, int8=2.0 * TFLOPS), - "Quadro P400": DeviceFlops(fp32=0.641 * TFLOPS, fp16=1.282 * TFLOPS, int8=2.564 * TFLOPS), - # ... add more devices if needed ... - ### AMD GPUs - # RX 6000 series - "AMD Radeon RX 6900 XT": DeviceFlops(fp32=23.04*TFLOPS, fp16=46.08*TFLOPS, int8=92.16*TFLOPS), - "AMD Radeon RX 6800 XT": DeviceFlops(fp32=20.74*TFLOPS, fp16=41.48*TFLOPS, int8=82.96*TFLOPS), - "AMD Radeon RX 6800": DeviceFlops(fp32=16.17*TFLOPS, fp16=32.34*TFLOPS, int8=64.68*TFLOPS), - "AMD Radeon RX 6700 XT": DeviceFlops(fp32=13.21*TFLOPS, fp16=26.42*TFLOPS, int8=52.84*TFLOPS), - "AMD Radeon RX 6700": DeviceFlops(fp32=11.4*TFLOPS, fp16=22.8*TFLOPS, int8=45.6*TFLOPS), - "AMD Radeon RX 6600 XT": DeviceFlops(fp32=10.6*TFLOPS, fp16=21.2*TFLOPS, int8=42.4*TFLOPS), - "AMD Radeon RX 6600": DeviceFlops(fp32=8.93*TFLOPS, fp16=17.86*TFLOPS, int8=35.72*TFLOPS), - "AMD Radeon RX 6500 XT": DeviceFlops(fp32=5.77*TFLOPS, fp16=11.54*TFLOPS, int8=23.08*TFLOPS), - "AMD Radeon RX 6400": DeviceFlops(fp32=3.57*TFLOPS, fp16=7.14*TFLOPS, int8=14.28*TFLOPS), - # RX 7000 series - "AMD Radeon RX 7900 XTX": DeviceFlops(fp32=61.4*TFLOPS, fp16=122.8*TFLOPS, int8=245.6*TFLOPS), - "AMD Radeon RX 7900 XT": DeviceFlops(fp32=53.4*TFLOPS, fp16=106.8*TFLOPS, int8=213.6*TFLOPS), - "AMD Radeon RX 7800 XT": DeviceFlops(fp32=42.6*TFLOPS, fp16=85.2*TFLOPS, int8=170.4*TFLOPS), - "AMD Radeon RX 7700 XT": DeviceFlops(fp32=34.2*TFLOPS, fp16=68.4*TFLOPS, int8=136.8*TFLOPS), - "AMD Radeon RX 7600": DeviceFlops(fp32=21.5*TFLOPS, fp16=43.0*TFLOPS, int8=86.0*TFLOPS), - "AMD Radeon RX 7500": DeviceFlops(fp32=16.2*TFLOPS, fp16=32.4*TFLOPS, int8=64.8*TFLOPS), - ### Qualcomm embedded chips: TODO -} -CHIP_FLOPS.update({f"LAPTOP GPU {key}": value for key, value in CHIP_FLOPS.items()}) -CHIP_FLOPS.update({f"Laptop GPU {key}": value for key, value in CHIP_FLOPS.items()}) -CHIP_FLOPS.update({f"{key} LAPTOP GPU": value for key, value in CHIP_FLOPS.items()}) -CHIP_FLOPS.update({f"{key} Laptop GPU": value for key, value in CHIP_FLOPS.items()}) - - -def device_capabilities() -> DeviceCapabilities: - if psutil.MACOS: - return mac_device_capabilities() - elif psutil.LINUX: - return linux_device_capabilities() - else: - return DeviceCapabilities( - model="Unknown Device", - chip="Unknown Chip", - memory=psutil.virtual_memory().total // 2**20, - flops=DeviceFlops(fp32=0, fp16=0, int8=0), - ) - - -def mac_device_capabilities() -> DeviceCapabilities: - # Fetch the model of the Mac using system_profiler - model = subprocess.check_output(["system_profiler", "SPHardwareDataType"]).decode("utf-8") - model_line = next((line for line in model.split("\n") if "Model Name" in line), None) - model_id = model_line.split(": ")[1] if model_line else "Unknown Model" - chip_line = next((line for line in model.split("\n") if "Chip" in line), None) - chip_id = chip_line.split(": ")[1] if chip_line else "Unknown Chip" - memory_line = next((line for line in model.split("\n") if "Memory" in line), None) - memory_str = memory_line.split(": ")[1] if memory_line else "Unknown Memory" - memory_units = memory_str.split() - memory_value = int(memory_units[0]) - if memory_units[1] == "GB": - memory = memory_value*1024 - else: - memory = memory_value - - # Assuming static values for other attributes for demonstration - return DeviceCapabilities(model=model_id, chip=chip_id, memory=memory, flops=CHIP_FLOPS.get(chip_id, DeviceFlops(fp32=0, fp16=0, int8=0))) - - -def linux_device_capabilities() -> DeviceCapabilities: - import psutil - from tinygrad import Device - - if DEBUG >= 2: print(f"tinygrad {Device.DEFAULT=}") - if Device.DEFAULT == "CUDA" or Device.DEFAULT == "NV" or Device.DEFAULT == "GPU": - import pynvml - - pynvml.nvmlInit() - handle = pynvml.nvmlDeviceGetHandleByIndex(0) - gpu_name = pynvml.nvmlDeviceGetName(handle).upper() - gpu_memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle) - - if DEBUG >= 2: print(f"NVIDIA device {gpu_name=} {gpu_memory_info=}") - - return DeviceCapabilities( - model=f"Linux Box ({gpu_name})", - chip=gpu_name, - memory=gpu_memory_info.total // 2**20, - flops=CHIP_FLOPS.get(gpu_name, DeviceFlops(fp32=0, fp16=0, int8=0)), - ) - elif Device.DEFAULT == "AMD": - # TODO AMD support - return DeviceCapabilities( - model="Linux Box (AMD)", - chip="Unknown AMD", - memory=psutil.virtual_memory().total // 2**20, - flops=DeviceFlops(fp32=0, fp16=0, int8=0), - ) - else: - return DeviceCapabilities( - model=f"Linux Box (Device: {Device.DEFAULT})", - chip=f"Unknown Chip (Device: {Device.DEFAULT})", - memory=psutil.virtual_memory().total // 2**20, - flops=DeviceFlops(fp32=0, fp16=0, int8=0), - ) diff --git a/build/lib/exo/topology/partitioning_strategy.py b/build/lib/exo/topology/partitioning_strategy.py deleted file mode 100644 index 29c3dc6a9..000000000 --- a/build/lib/exo/topology/partitioning_strategy.py +++ /dev/null @@ -1,40 +0,0 @@ -from abc import ABC, abstractmethod -from typing import List -from dataclasses import dataclass -from .topology import Topology -from exo.inference.shard import Shard - - -# Partitions shard-space into pieces of contiguous shards, represented by floating point range [start, end) between 0 and 1 -@dataclass -class Partition: - node_id: str - start: float - end: float - - -class PartitioningStrategy(ABC): - @abstractmethod - def partition(self, topology: Topology) -> List[Partition]: - pass - - -def map_partitions_to_shards(partitions: List[Partition], num_layers: int, model_id: str) -> List[Shard]: - shards = [] - for i, partition in enumerate(partitions): - start_layer = int(partition.start*num_layers) - end_layer = int(partition.end*num_layers) - 1 - - # Ensure the last partition covers up to num_layers - 1 - if i == len(partitions) - 1: - end_layer = num_layers - 1 - - # Ensure no empty shards - if start_layer <= end_layer: - shards.append(Shard(model_id, start_layer, end_layer, num_layers)) - - # Ensure full coverage - if shards and shards[-1].end_layer < num_layers - 1: - shards[-1] = Shard(model_id, shards[-1].start_layer, num_layers - 1, num_layers) - - return shards diff --git a/build/lib/exo/topology/ring_memory_weighted_partitioning_strategy.py b/build/lib/exo/topology/ring_memory_weighted_partitioning_strategy.py deleted file mode 100644 index 6550aeb19..000000000 --- a/build/lib/exo/topology/ring_memory_weighted_partitioning_strategy.py +++ /dev/null @@ -1,18 +0,0 @@ -from typing import List -from .partitioning_strategy import PartitioningStrategy -from .topology import Topology -from .partitioning_strategy import Partition - - -class RingMemoryWeightedPartitioningStrategy(PartitioningStrategy): - def partition(self, topology: Topology) -> List[Partition]: - nodes = list(topology.all_nodes()) - nodes.sort(key=lambda x: (x[1].memory, x[0]), reverse=True) - total_memory = sum(node[1].memory for node in nodes) - partitions = [] - start = 0 - for node in nodes: - end = round(start + (node[1].memory/total_memory), 5) - partitions.append(Partition(node[0], start, end)) - start = end - return partitions diff --git a/build/lib/exo/topology/test_device_capabilities.py b/build/lib/exo/topology/test_device_capabilities.py deleted file mode 100644 index 5f8b4c3ac..000000000 --- a/build/lib/exo/topology/test_device_capabilities.py +++ /dev/null @@ -1,91 +0,0 @@ -import unittest -from unittest.mock import patch -from exo.topology.device_capabilities import mac_device_capabilities, DeviceCapabilities, DeviceFlops, TFLOPS - - -class TestMacDeviceCapabilities(unittest.TestCase): - @patch("subprocess.check_output") - def test_mac_device_capabilities_pro(self, mock_check_output): - # Mock the subprocess output - mock_check_output.return_value = b""" -Hardware: - -Hardware Overview: - -Model Name: MacBook Pro -Model Identifier: Mac15,9 -Model Number: Z1CM000EFB/A -Chip: Apple M3 Max -Total Number of Cores: 16 (12 performance and 4 efficiency) -Memory: 128 GB -System Firmware Version: 10000.000.0 -OS Loader Version: 10000.000.0 -Serial Number (system): XXXXXXXXXX -Hardware UUID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX -Provisioning UDID: XXXXXXXX-XXXXXXXXXXXXXXXX -Activation Lock Status: Enabled -""" - - # Call the function - result = mac_device_capabilities() - - # Check the results - self.assertIsInstance(result, DeviceCapabilities) - self.assertEqual(result.model, "MacBook Pro") - self.assertEqual(result.chip, "Apple M3 Max") - self.assertEqual(result.memory, 131072) # 16 GB in MB - self.assertEqual( - str(result), - "Model: MacBook Pro. Chip: Apple M3 Max. Memory: 131072MB. Flops: 14.20 TFLOPS, fp16: 28.40 TFLOPS, int8: 56.80 TFLOPS", - ) - - @patch("subprocess.check_output") - def test_mac_device_capabilities_air(self, mock_check_output): - # Mock the subprocess output - mock_check_output.return_value = b""" -Hardware: - -Hardware Overview: - -Model Name: MacBook Air -Model Identifier: Mac14,2 -Model Number: MLY33B/A -Chip: Apple M2 -Total Number of Cores: 8 (4 performance and 4 efficiency) -Memory: 8 GB -System Firmware Version: 10000.00.0 -OS Loader Version: 10000.00.0 -Serial Number (system): XXXXXXXXXX -Hardware UUID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX -Provisioning UDID: XXXXXXXX-XXXXXXXXXXXXXXXX -Activation Lock Status: Disabled -""" - - # Call the function - result = mac_device_capabilities() - - # Check the results - self.assertIsInstance(result, DeviceCapabilities) - self.assertEqual(result.model, "MacBook Air") - self.assertEqual(result.chip, "Apple M2") - self.assertEqual(result.memory, 8192) # 8 GB in MB - - @unittest.skip("Unskip this test when running on a MacBook Pro, Apple M3 Max, 128GB") - def test_mac_device_capabilities_real(self): - # Call the function without mocking - result = mac_device_capabilities() - - # Check the results - self.assertIsInstance(result, DeviceCapabilities) - self.assertEqual(result.model, "MacBook Pro") - self.assertEqual(result.chip, "Apple M3 Max") - self.assertEqual(result.memory, 131072) # 128 GB in MB - self.assertEqual(result.flops, DeviceFlops(fp32=14.20*TFLOPS, fp16=28.40*TFLOPS, int8=56.80*TFLOPS)) - self.assertEqual( - str(result), - "Model: MacBook Pro. Chip: Apple M3 Max. Memory: 131072MB. Flops: 14.20 TFLOPS, fp16: 28.40 TFLOPS, int8: 56.80 TFLOPS", - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/build/lib/exo/topology/test_map_partitions.py b/build/lib/exo/topology/test_map_partitions.py deleted file mode 100644 index 5254915e6..000000000 --- a/build/lib/exo/topology/test_map_partitions.py +++ /dev/null @@ -1,81 +0,0 @@ -import unittest -from typing import List -from exo.topology.partitioning_strategy import Partition, map_partitions_to_shards -from exo.inference.shard import Shard - - -class TestRingMemoryWeightedPartitioningStrategy(unittest.TestCase): - def test_map_partitions_to_shards(self): - partitions = [ - Partition("node1", 0.0, 0.42857), - Partition("node2", 0.42857, 0.71428), - Partition("node3", 0.71428, 0.99999), - ] - shards = map_partitions_to_shards(partitions, 32, "model") - self.assertEqual( - shards, - [ - Shard("model", 0, 12, 32), - Shard("model", 13, 21, 32), - Shard("model", 22, 31, 32), - ], - ) - - partitions = [ - Partition("node1", 0.0, 0.1), - Partition("node2", 0.1, 0.2), - Partition("node3", 0.2, 1.0), - ] - shards = map_partitions_to_shards(partitions, 32, "model") - self.assertEqual( - shards, - [ - Shard("model", 0, 2, 32), - Shard("model", 3, 5, 32), - Shard("model", 6, 31, 32), - ], - ) - - partitions = [ - Partition("node1", 0.0, 1.0), - ] - shards = map_partitions_to_shards(partitions, 32, "model") - self.assertEqual( - shards, - [ - Shard("model", 0, 31, 32), - ], - ) - - partitions = [] - shards = map_partitions_to_shards(partitions, 32, "model") - self.assertEqual(shards, []) - - def test_broken_map_partitions_to_shards(self): - # this was an old broken implementation that sometimes had rounding errors! - def _broken_map_partitions_to_shards(partitions: List[Partition], num_layers, model_id: str): - shards = [] - for i, partition in enumerate(partitions): - start_layer = int(partition.start*num_layers) - end_layer = int(partition.end*num_layers) - 1 - shards.append(Shard(model_id, start_layer, end_layer, num_layers)) - return shards - - partitions = [ - Partition("node1", 0.0, 0.42857), - Partition("node2", 0.42857, 0.71428), - Partition("node3", 0.71428, 0.99999), - ] - shards = _broken_map_partitions_to_shards(partitions, 32, "model") - self.assertEqual( - shards, - [ - Shard("model", 0, 12, 32), - Shard("model", 13, 21, 32), - Shard("model", 22, 30, 32), - ], - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/build/lib/exo/topology/test_ring_memory_weighted_partitioning_strategy.py b/build/lib/exo/topology/test_ring_memory_weighted_partitioning_strategy.py deleted file mode 100644 index fd466f367..000000000 --- a/build/lib/exo/topology/test_ring_memory_weighted_partitioning_strategy.py +++ /dev/null @@ -1,90 +0,0 @@ -import unittest -from exo.topology.ring_memory_weighted_partitioning_strategy import RingMemoryWeightedPartitioningStrategy -from exo.topology.topology import Topology -from exo.topology.device_capabilities import DeviceCapabilities, DeviceFlops -from exo.topology.partitioning_strategy import Partition - - -class TestRingMemoryWeightedPartitioningStrategy(unittest.TestCase): - def test_partition(self): - # triangle - # node1 -> node2 -> node3 -> node1 - topology = Topology() - topology.update_node( - "node1", - DeviceCapabilities(model="test1", chip="test1", memory=3000, flops=DeviceFlops(fp32=0, fp16=0, int8=0)), - ) - topology.update_node( - "node2", - DeviceCapabilities(model="test2", chip="test2", memory=1000, flops=DeviceFlops(fp32=0, fp16=0, int8=0)), - ) - topology.update_node( - "node3", - DeviceCapabilities(model="test3", chip="test3", memory=6000, flops=DeviceFlops(fp32=0, fp16=0, int8=0)), - ) - topology.add_edge("node1", "node2") - topology.add_edge("node2", "node3") - topology.add_edge("node3", "node1") - topology.add_edge("node1", "node3") - - strategy = RingMemoryWeightedPartitioningStrategy() - partitions = strategy.partition(topology) - - self.assertEqual(len(partitions), 3) - self.assertEqual( - partitions, - [ - Partition("node3", 0.0, 0.6), - Partition("node1", 0.6, 0.9), - Partition("node2", 0.9, 1.0), - ], - ) - - def test_partition_rounding(self): - # triangle - # node1 -> node2 -> node3 -> node1 - topology = Topology() - topology.update_node( - "node1", - DeviceCapabilities( - model="MacBook Pro", - chip="test1", - memory=128*1024*1024*1024, - flops=DeviceFlops(fp32=0, fp16=0, int8=0), - ), - ) - topology.update_node( - "node2", - DeviceCapabilities( - model="Mac Studio", - chip="test2", - memory=192*1024*1024*1024, - flops=DeviceFlops(fp32=0, fp16=0, int8=0), - ), - ) - topology.update_node( - "node3", - DeviceCapabilities( - model="MacBook Pro", - chip="test3", - memory=128*1024*1024*1024, - flops=DeviceFlops(fp32=0, fp16=0, int8=0), - ), - ) - - strategy = RingMemoryWeightedPartitioningStrategy() - partitions = strategy.partition(topology) - - self.assertEqual(len(partitions), 3) - self.assertEqual( - partitions, - [ - Partition("node3", 0.0, 0.42857), - Partition("node1", 0.6, 0.9), - Partition("node2", 0.9, 1.0), - ], - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/build/lib/exo/topology/topology.py b/build/lib/exo/topology/topology.py deleted file mode 100644 index 46b512e50..000000000 --- a/build/lib/exo/topology/topology.py +++ /dev/null @@ -1,49 +0,0 @@ -from .device_capabilities import DeviceCapabilities -from typing import Dict, Set, Optional - - -class Topology: - def __init__(self): - self.nodes: Dict[str, DeviceCapabilities] = {} # Maps node IDs to DeviceCapabilities - self.peer_graph: Dict[str, Set[str]] = {} # Adjacency list representing the graph - self.active_node_id: Optional[str] = None - - def update_node(self, node_id: str, device_capabilities: DeviceCapabilities): - self.nodes[node_id] = device_capabilities - - def get_node(self, node_id: str) -> DeviceCapabilities: - return self.nodes.get(node_id) - - def all_nodes(self): - return self.nodes.items() - - def add_edge(self, node1_id: str, node2_id: str): - if node1_id not in self.peer_graph: - self.peer_graph[node1_id] = set() - if node2_id not in self.peer_graph: - self.peer_graph[node2_id] = set() - self.peer_graph[node1_id].add(node2_id) - self.peer_graph[node2_id].add(node1_id) - - def get_neighbors(self, node_id: str) -> Set[str]: - return self.peer_graph.get(node_id, set()) - - def all_edges(self): - edges = [] - for node, neighbors in self.peer_graph.items(): - for neighbor in neighbors: - if (neighbor, node) not in edges: # Avoid duplicate edges - edges.append((node, neighbor)) - return edges - - def merge(self, other: "Topology"): - for node_id, capabilities in other.nodes.items(): - self.update_node(node_id, capabilities) - for node_id, neighbors in other.peer_graph.items(): - for neighbor in neighbors: - self.add_edge(node_id, neighbor) - - def __str__(self): - nodes_str = ", ".join(f"{node_id}: {cap}" for node_id, cap in self.nodes.items()) - edges_str = ", ".join(f"{node}: {neighbors}" for node, neighbors in self.peer_graph.items()) - return f"Topology(Nodes: {{{nodes_str}}}, Edges: {{{edges_str}}})" diff --git a/build/lib/exo/viz/__init__.py b/build/lib/exo/viz/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/build/lib/exo/viz/test_topology_viz.py b/build/lib/exo/viz/test_topology_viz.py deleted file mode 100644 index e57de1ae3..000000000 --- a/build/lib/exo/viz/test_topology_viz.py +++ /dev/null @@ -1,129 +0,0 @@ -import asyncio -import unittest -from datetime import timedelta -from exo.viz.topology_viz import TopologyViz -from exo.topology.topology import Topology -from exo.topology.device_capabilities import DeviceCapabilities, DeviceFlops -from exo.topology.partitioning_strategy import Partition -from exo.download.hf.hf_helpers import RepoProgressEvent, RepoFileProgressEvent - - -def create_hf_repo_progress_event( - completed_files: int = 5, - total_files: int = 10, - downloaded_bytes: int = 500000000, - downloaded_bytes_this_session: int = 250000000, - total_bytes: int = 1000000000, - overall_speed: int = 5000000, - overall_eta: timedelta = timedelta(seconds=100), - file_progress: dict = None, - status: str = "in_progress" -) -> RepoProgressEvent: - if file_progress is None: - file_progress = { - "file1.bin": - RepoFileProgressEvent( - repo_id="repo_id", - repo_revision="repo_revision", - file_path="file1.bin", - downloaded=100000000, - downloaded_this_session=50000000, - total=200000000, - speed=1000000, - eta=timedelta(seconds=100), - status="in_progress" - ), "file2.bin": - RepoFileProgressEvent( - repo_id="repo_id", - repo_revision="repo_revision", - file_path="file2.bin", - downloaded=200000000, - downloaded_this_session=100000000, - total=200000000, - speed=2000000, - eta=timedelta(seconds=0), - status="complete" - ) - } - - return RepoProgressEvent( - repo_id="repo_id", - repo_revision="repo_revision", - completed_files=completed_files, - total_files=total_files, - downloaded_bytes=downloaded_bytes, - downloaded_bytes_this_session=downloaded_bytes_this_session, - total_bytes=total_bytes, - overall_speed=overall_speed, - overall_eta=overall_eta, - file_progress=file_progress, - status=status - ) - - -class TestNodeViz(unittest.IsolatedAsyncioTestCase): - async def asyncSetUp(self): - self.topology = Topology() - self.topology.update_node( - "node1", - DeviceCapabilities(model="ModelA", chip="ChipA", memory=8*1024, flops=DeviceFlops(fp32=1.0, fp16=2.0, int8=4.0)), - ) - self.topology.update_node( - "node2", - DeviceCapabilities(model="ModelB", chip="ChipB", memory=16*1024, flops=DeviceFlops(fp32=2.0, fp16=4.0, int8=8.0)), - ) - self.topology.update_node( - "node3", - DeviceCapabilities(model="ModelC", chip="ChipC", memory=32*1024, flops=DeviceFlops(fp32=4.0, fp16=8.0, int8=16.0)), - ) - self.topology.update_node( - "node4", - DeviceCapabilities(model="ModelD", chip="ChipD", memory=64*1024, flops=DeviceFlops(fp32=8.0, fp16=16.0, int8=32.0)), - ) - - self.top_viz = TopologyViz() - await asyncio.sleep(2) # Simulate running for a short time - - async def test_layout_generation(self): - # self.top_viz._generate_layout() - self.top_viz.refresh() - import time - - time.sleep(2) - self.top_viz.update_visualization( - self.topology, - [ - Partition("node1", 0, 0.2), - Partition("node4", 0.2, 0.4), - Partition("node2", 0.4, 0.8), - Partition("node3", 0.8, 0.9), - ], - "node1", - { - "node1": create_hf_repo_progress_event(), - "node2": create_hf_repo_progress_event(), - "node3": create_hf_repo_progress_event(), - "node4": create_hf_repo_progress_event(), - }, - ) - time.sleep(2) - self.topology.active_node_id = "node3" - self.top_viz.update_visualization( - self.topology, - [ - Partition("node1", 0, 0.3), - Partition("node5", 0.3, 0.5), - Partition("node2", 0.5, 0.7), - Partition("node4", 0.7, 0.9), - ], - "node5", - { - "node1": create_hf_repo_progress_event(), - "node5": create_hf_repo_progress_event(), - }, - ) - time.sleep(2) - - -if __name__ == "__main__": - unittest.main() diff --git a/build/lib/exo/viz/topology_viz.py b/build/lib/exo/viz/topology_viz.py deleted file mode 100644 index 3664f3783..000000000 --- a/build/lib/exo/viz/topology_viz.py +++ /dev/null @@ -1,307 +0,0 @@ -import math -from collections import OrderedDict -from typing import List, Optional, Tuple, Dict -from exo.helpers import exo_text, pretty_print_bytes, pretty_print_bytes_per_second -from exo.topology.topology import Topology -from exo.topology.partitioning_strategy import Partition -from exo.download.hf.hf_helpers import RepoProgressEvent -from exo.topology.device_capabilities import UNKNOWN_DEVICE_CAPABILITIES -from rich.console import Console, Group -from rich.text import Text -from rich.live import Live -from rich.style import Style -from rich.table import Table -from rich.layout import Layout -from rich.syntax import Syntax -from rich.panel import Panel -from rich.markdown import Markdown - - -class TopologyViz: - def __init__(self, chatgpt_api_endpoints: List[str] = [], web_chat_urls: List[str] = []): - self.chatgpt_api_endpoints = chatgpt_api_endpoints - self.web_chat_urls = web_chat_urls - self.topology = Topology() - self.partitions: List[Partition] = [] - self.node_id = None - self.node_download_progress: Dict[str, RepoProgressEvent] = {} - self.requests: OrderedDict[str, Tuple[str, str]] = {} - - self.console = Console() - self.layout = Layout() - self.layout.split(Layout(name="main"), Layout(name="prompt_output", size=15), Layout(name="download", size=25)) - self.main_panel = Panel(self._generate_main_layout(), title="Exo Cluster (0 nodes)", border_style="bright_yellow") - self.prompt_output_panel = Panel("", title="Prompt and Output", border_style="green") - self.download_panel = Panel("", title="Download Progress", border_style="cyan") - self.layout["main"].update(self.main_panel) - self.layout["prompt_output"].update(self.prompt_output_panel) - self.layout["download"].update(self.download_panel) - - # Initially hide the prompt_output panel - self.layout["prompt_output"].visible = False - self.live_panel = Live(self.layout, auto_refresh=False, console=self.console) - self.live_panel.start() - - def update_visualization(self, topology: Topology, partitions: List[Partition], node_id: Optional[str] = None, node_download_progress: Dict[str, RepoProgressEvent] = {}): - self.topology = topology - self.partitions = partitions - self.node_id = node_id - if node_download_progress: - self.node_download_progress = node_download_progress - self.refresh() - - def update_prompt(self, request_id: str, prompt: Optional[str] = None): - if request_id in self.requests: - self.requests[request_id] = [prompt, self.requests[request_id][1]] - else: - self.requests[request_id] = [prompt, ""] - self.refresh() - - def update_prompt_output(self, request_id: str, output: Optional[str] = None): - if request_id in self.requests: - self.requests[request_id] = [self.requests[request_id][0], output] - else: - self.requests[request_id] = ["", output] - self.refresh() - - def refresh(self): - self.main_panel.renderable = self._generate_main_layout() - # Update the panel title with the number of nodes and partitions - node_count = len(self.topology.nodes) - self.main_panel.title = f"Exo Cluster ({node_count} node{'s' if node_count != 1 else ''})" - - # Update and show/hide prompt and output panel - if any(r[0] or r[1] for r in self.requests.values()): - self.prompt_output_panel = self._generate_prompt_output_layout() - self.layout["prompt_output"].update(self.prompt_output_panel) - self.layout["prompt_output"].visible = True - else: - self.layout["prompt_output"].visible = False - - # Only show download_panel if there are in-progress downloads - if any(progress.status == "in_progress" for progress in self.node_download_progress.values()): - self.download_panel.renderable = self._generate_download_layout() - self.layout["download"].visible = True - else: - self.layout["download"].visible = False - - self.live_panel.update(self.layout, refresh=True) - - def _generate_prompt_output_layout(self) -> Panel: - content = [] - requests = list(self.requests.values())[-3:] # Get the 3 most recent requests - max_width = self.console.width - 6 # Full width minus padding and icon - max_lines = 13 # Maximum number of lines for the entire panel content - - for (prompt, output) in reversed(requests): - prompt_icon, output_icon = "💬️", "🤖" - - # Process prompt - prompt_lines = prompt.split('\n') - if len(prompt_lines) > max_lines // 2: - prompt_lines = prompt_lines[:max_lines//2 - 1] + ['...'] - prompt_text = Text(f"{prompt_icon} ", style="bold bright_blue") - prompt_text.append('\n'.join(line[:max_width] for line in prompt_lines), style="white") - - # Process output - output_lines = output.split('\n') - remaining_lines = max_lines - len(prompt_lines) - 2 # -2 for spacing - if len(output_lines) > remaining_lines: - output_lines = output_lines[:remaining_lines - 1] + ['...'] - output_text = Text(f"\n{output_icon} ", style="bold bright_magenta") - output_text.append('\n'.join(line[:max_width] for line in output_lines), style="white") - - content.append(prompt_text) - content.append(output_text) - content.append(Text()) # Empty line between entries - - return Panel( - Group(*content), - title="", - border_style="cyan", - height=15, # Increased height to accommodate multiple lines - expand=True # Allow the panel to expand to full width - ) - - def _generate_main_layout(self) -> str: - # Calculate visualization parameters - num_partitions = len(self.partitions) - radius_x = 30 - radius_y = 12 - center_x, center_y = 50, 24 # Increased center_y to add more space - - # Generate visualization - visualization = [[" " for _ in range(100)] for _ in range(48)] # Increased height to 48 - - # Add exo_text at the top in bright yellow - exo_lines = exo_text.split("\n") - yellow_style = Style(color="bright_yellow") - max_line_length = max(len(line) for line in exo_lines) - for i, line in enumerate(exo_lines): - centered_line = line.center(max_line_length) - start_x = (100-max_line_length) // 2 + 15 - colored_line = Text(centered_line, style=yellow_style) - for j, char in enumerate(str(colored_line)): - if 0 <= start_x + j < 100 and i < len(visualization): - visualization[i][start_x + j] = char - - # Display chatgpt_api_endpoints and web_chat_urls - info_lines = [] - if len(self.web_chat_urls) > 0: - info_lines.append(f"Web Chat URL (tinychat): {' '.join(self.web_chat_urls[:1])}") - if len(self.chatgpt_api_endpoints) > 0: - info_lines.append(f"ChatGPT API endpoint: {' '.join(self.chatgpt_api_endpoints[:1])}") - - info_start_y = len(exo_lines) + 1 - for i, line in enumerate(info_lines): - start_x = (100 - len(line)) // 2 + 15 - for j, char in enumerate(line): - if 0 <= start_x + j < 100 and info_start_y + i < 48: - visualization[info_start_y + i][start_x + j] = char - - # Calculate total FLOPS and position on the bar - total_flops = sum(self.topology.nodes.get(partition.node_id, UNKNOWN_DEVICE_CAPABILITIES).flops.fp16 for partition in self.partitions) - bar_pos = (math.tanh(total_flops/20 - 2) + 1)/2 - - # Add GPU poor/rich bar - bar_width = 30 - bar_start_x = (100-bar_width) // 2 - bar_y = info_start_y + len(info_lines) + 1 - - # Create a gradient bar using emojis - gradient_bar = Text() - emojis = ["🟥", "🟧", "🟨", "🟩"] - for i in range(bar_width): - emoji_index = min(int(i/(bar_width/len(emojis))), len(emojis) - 1) - gradient_bar.append(emojis[emoji_index]) - - # Add the gradient bar to the visualization - visualization[bar_y][bar_start_x - 1] = "[" - visualization[bar_y][bar_start_x + bar_width] = "]" - for i, segment in enumerate(str(gradient_bar)): - visualization[bar_y][bar_start_x + i] = segment - - # Add labels - visualization[bar_y - 1][bar_start_x - 10:bar_start_x - 3] = "GPU poor" - visualization[bar_y - 1][bar_start_x + bar_width*2 + 2:bar_start_x + bar_width*2 + 11] = "GPU rich" - - # Add position indicator and FLOPS value - pos_x = bar_start_x + int(bar_pos*bar_width) - flops_str = f"{total_flops:.2f} TFLOPS" - visualization[bar_y - 1][pos_x] = "▼" - visualization[bar_y + 1][pos_x - len(flops_str) // 2:pos_x + len(flops_str) // 2 + len(flops_str) % 2] = flops_str - visualization[bar_y + 2][pos_x] = "▲" - - # Add an extra empty line for spacing - bar_y += 4 - - for i, partition in enumerate(self.partitions): - device_capabilities = self.topology.nodes.get(partition.node_id, UNKNOWN_DEVICE_CAPABILITIES) - - angle = 2*math.pi*i/num_partitions - x = int(center_x + radius_x*math.cos(angle)) - y = int(center_y + radius_y*math.sin(angle)) - - # Place node with different color for active node and this node - if partition.node_id == self.topology.active_node_id: - visualization[y][x] = "🔴" - elif partition.node_id == self.node_id: - visualization[y][x] = "🟢" - else: - visualization[y][x] = "🔵" - - # Place node info (model, memory, TFLOPS, partition) on three lines - node_info = [ - f"{device_capabilities.model} {device_capabilities.memory // 1024}GB", - f"{device_capabilities.flops.fp16}TFLOPS", - f"[{partition.start:.2f}-{partition.end:.2f}]", - ] - - # Calculate info position based on angle - info_distance_x = radius_x + 6 - info_distance_y = radius_y + 3 - info_x = int(center_x + info_distance_x*math.cos(angle)) - info_y = int(center_y + info_distance_y*math.sin(angle)) - - # Adjust text position to avoid overwriting the node icon and prevent cutoff - if info_x < x: - info_x = max(0, x - len(max(node_info, key=len)) - 1) - elif info_x > x: - info_x = min(99 - len(max(node_info, key=len)), info_x) - - # Adjust for top and bottom nodes - if 5*math.pi/4 < angle < 7*math.pi/4: - info_x += 4 - elif math.pi/4 < angle < 3*math.pi/4: - info_x += 3 - info_y -= 2 - - for j, line in enumerate(node_info): - for k, char in enumerate(line): - if 0 <= info_y + j < 48 and 0 <= info_x + k < 100: - if info_y + j != y or info_x + k != x: - visualization[info_y + j][info_x + k] = char - - # Draw line to next node - next_i = (i+1) % num_partitions - next_angle = 2*math.pi*next_i/num_partitions - next_x = int(center_x + radius_x*math.cos(next_angle)) - next_y = int(center_y + radius_y*math.sin(next_angle)) - - # Simple line drawing - steps = max(abs(next_x - x), abs(next_y - y)) - for step in range(1, steps): - line_x = int(x + (next_x-x)*step/steps) - line_y = int(y + (next_y-y)*step/steps) - if 0 <= line_y < 48 and 0 <= line_x < 100: - visualization[line_y][line_x] = "-" - - # Convert to string - return "\n".join("".join(str(char) for char in row) for row in visualization) - - def _generate_download_layout(self) -> Table: - summary = Table(show_header=False, box=None, padding=(0, 1), expand=True) - summary.add_column("Info", style="cyan", no_wrap=True, ratio=50) - summary.add_column("Progress", style="cyan", no_wrap=True, ratio=40) - summary.add_column("Percentage", style="cyan", no_wrap=True, ratio=10) - - # Current node download progress - if self.node_id in self.node_download_progress: - download_progress = self.node_download_progress[self.node_id] - title = f"Downloading model {download_progress.repo_id}@{download_progress.repo_revision} ({download_progress.completed_files}/{download_progress.total_files}):" - summary.add_row(Text(title, style="bold")) - progress_info = f"{pretty_print_bytes(download_progress.downloaded_bytes)} / {pretty_print_bytes(download_progress.total_bytes)} ({pretty_print_bytes_per_second(download_progress.overall_speed)})" - summary.add_row(progress_info) - - eta_info = f"{download_progress.overall_eta}" - summary.add_row(eta_info) - - summary.add_row("") # Empty row for spacing - - for file_path, file_progress in download_progress.file_progress.items(): - if file_progress.status != "complete": - progress = int(file_progress.downloaded/file_progress.total*30) - bar = f"[{'=' * progress}{' ' * (30 - progress)}]" - percentage = f"{file_progress.downloaded / file_progress.total * 100:.0f}%" - summary.add_row(Text(file_path[:30], style="cyan"), bar, percentage) - - summary.add_row("") # Empty row for spacing - - # Other nodes download progress summary - summary.add_row(Text("Other Nodes Download Progress:", style="bold")) - for node_id, progress in self.node_download_progress.items(): - if node_id != self.node_id: - device = self.topology.nodes.get(node_id) - partition = next((p for p in self.partitions if p.node_id == node_id), None) - partition_info = f"[{partition.start:.2f}-{partition.end:.2f}]" if partition else "" - percentage = progress.downloaded_bytes/progress.total_bytes*100 if progress.total_bytes > 0 else 0 - speed = pretty_print_bytes_per_second(progress.overall_speed) - device_info = f"{device.model if device else 'Unknown Device'} {device.memory // 1024 if device else '?'}GB {partition_info}" - progress_info = f"{progress.repo_id}@{progress.repo_revision} ({speed})" - progress_bar = f"[{'=' * int(percentage // 3.33)}{' ' * (30 - int(percentage // 3.33))}]" - percentage_str = f"{percentage:.1f}%" - eta_str = f"{progress.overall_eta}" - summary.add_row(device_info, progress_info, percentage_str) - summary.add_row("", progress_bar, eta_str) - - return summary diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 4f52c08d6..8a274c5ed 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -3,6 +3,7 @@ Written with pytorch using torchtune and other methods """ +import re from typing import Optional, Any, Tuple, List, Union, Callable @@ -134,6 +135,7 @@ def forward( *, mask: Optional[_MaskType] = None, input_pos: Optional[torch.Tensor] = None, + hidden_state: Optional[torch.Tensor] = None, ) -> Union[torch.Tensor, List[torch.Tensor]]: # Determine the type of input and shape if DEBUG >= 4: @@ -142,8 +144,8 @@ def forward( print(f"mask: {mask}") print(f"input_pos: {input_pos}") - if tokens.ndim == 3: - h = tokens # Use directly as hidden states + if hidden_state is not None: + h = hidden_state # Use directly as hidden states else: seq_len = tokens.shape[1] @@ -170,6 +172,7 @@ def forward( # Process through each transformer layer # with torch.no_grad(): if layer.caches_are_enabled(): + self.check_maxed_cache(tokens=h) try: h = layer( h, @@ -195,19 +198,22 @@ def forward( if DEBUG >= 8: print(f"\nhidden layer out H[{i}]->H[{i + 1}]\n{h}\n") - # Apply normalization - h = self.norm(h) + if self.shard.is_last_layer(): + # Apply normalization + h = self.norm(h) - # Handle chunked output if needed - output = self.output(h).float() + # Handle chunked output if needed + output = self.output(h).float() - # Return list if hidden states are requested - output = [hidden[-1], output] if hidden else output + if DEBUG >= 4: + print(f"\n\noutput {output}\n\n") - if DEBUG >= 4: - print(f"\n\noutput {output}\n\n") + return output + else: + if DEBUG >= 4: + print(f"\n\nhidden output {hidden[-1]}\n\n") - return output + return hidden[-1] def LlamaModel(config: dict, shard: Shard): @@ -278,7 +284,11 @@ def LlamaModel(config: dict, shard: Shard): layers = nn.ModuleList(layers) tok_embeddings = nn.Embedding(config["vocab_size"], config["embed_dim"]) - output_proj = ttm.TiedLinear(tok_embeddings) + + if len(re.findall(r"3\.2", shard.model_id)) > 0: + output_proj = ttm.TiedLinear(tok_embeddings) + else: + output_proj = nn.Linear(config["embed_dim"], config["vocab_size"], bias=False) norm = RMSNorm(config["embed_dim"], eps=config["norm_eps"]) @@ -302,7 +312,7 @@ def __init__( shard: Shard, device: Optional[torch.device] = None, use_cache: Optional[bool] = False, - max_generated_tokens: int = 300, + max_generated_tokens: int = 1024, ): super(ShardedLlamaModel, self).__init__() @@ -318,6 +328,10 @@ def __init__( self.model = LlamaModel(config, self.shard).to(dtype=self.dtype, device=self.device) + if DEBUG >= 4: + print("ShardedLlamaModel called") + print(f"self.model {self.model}") + # keep track of current position in generation self.max_generated_tokens = max_generated_tokens self.curr_pos = 0 @@ -344,6 +358,9 @@ def generate( print(f"hidden_state: {hidden_state}") print(f"curr_pos: {self.curr_pos}") + model_hs = None + model_logits = None + bsz, tokens_length = tokens.size() if tokens_length > 1: @@ -371,11 +388,8 @@ def generate( else: max_seq_len = self.model.decoder_max_cache_seq_len - # clone tokens - generated_tokens = tokens.clone().to(device=self.device) - # masking for proper attention - padding_masks = generated_tokens != self.pad_id + padding_masks = tokens != self.pad_id if not padding_masks.all(): padding_masks = torch.nn.functional.pad( padding_masks, @@ -420,9 +434,10 @@ def generate( print(f"input_pos: {self.curr_input_pos}") model_output = self.model( - tokens=hidden_state if hidden_state is not None else tokens, + tokens=tokens, mask=self.curr_masks, input_pos=self.curr_input_pos, + hidden_state=hidden_state, ) self.curr_pos += 1 @@ -430,12 +445,9 @@ def generate( if DEBUG >= 4: print(f"model_output\n{model_output}") - if isinstance(model_output, list): - model_logits = model_output[1] - model_output.pop() # remove logits - model_hs = model_output[0] # get last hidden state - else: + if self.shard.is_last_layer(): model_logits = model_output - model_hs = None + else: + model_hs = model_output return model_hs, model_logits diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index cd6c86ebe..60da1a002 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -8,6 +8,7 @@ from concurrent.futures import ThreadPoolExecutor import asyncio import uuid +import re from typing import Optional import numpy as np @@ -28,8 +29,10 @@ # supported models from exo.inference.torch.models.llama3 import ShardedLlamaModel -TEMP = 0.0 -TOP_K = 35 +# from torchtune generate recipe +# https://github.com/pytorch/torchtune/blob/main/recipes/configs/generation.yaml#L40 +TEMP = 0.6 +TOP_K = 300 class TorchDynamicShardInferenceEngine(InferenceEngine): @@ -43,6 +46,8 @@ def __init__(self, shard_downloader: HFShardDownloader): self.executor = ThreadPoolExecutor(max_workers=1) self.past_tokens = None self.uuid = str(uuid.uuid4()) + self.model_path = None + self.model_config = None # device settings if os.environ.get("TORCH_DEVICE"): @@ -99,21 +104,30 @@ def sample_wrapper(): if DEBUG >= 4: print(f"tokens: {tokens}") + if tokens.item() == self.tokenizer.eos_token_id: + if self.device == torch.device("cuda"): + torch.cuda.empty_cache() + self.sharded_model = None + self.shard = None return tokens.numpy(force=True) return await asyncio.get_running_loop().run_in_executor(self.executor, functools.partial(sample_wrapper)) async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarray, inference_state: Optional[dict] = None) -> tuple[np.ndarray, Optional[dict]]: + + await self.ensure_shard(shard) + + infer_cached = os.environ.get("TORCH_USE_CACHE", True) + # ensure shard if DEBUG >= 4: print("infer_tensor called") print(f"shard: {shard}") print(f"input_data: {input_data}") print(f"inference_state: {inference_state}") + print(f"infer_cached: {infer_cached}") - await self.ensure_shard(shard) - - if inference_state.get("past_tokens") is not None: + if inference_state.get("past_tokens") is not None and not infer_cached: self.past_tokens = torch.tensor(inference_state["past_tokens"]).to(self.device) self.request_id = request_id if not self.request_id else self.request_id @@ -124,10 +138,11 @@ async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarr elif input_data.ndim == 2: input_tensor = torch.tensor(input_data).to(self.device) - if self.past_tokens is not None: - self.past_tokens = torch.cat([self.past_tokens, input_tensor], dim=-1).to(self.device) - else: - self.past_tokens = input_tensor.clone() + if not infer_cached: + if self.past_tokens is not None: + self.past_tokens = torch.cat([self.past_tokens, input_tensor], dim=-1).to(self.device) + else: + self.past_tokens = input_tensor.clone() def infer_wrapper(): if DEBUG >= 4: @@ -135,18 +150,18 @@ def infer_wrapper(): print(f"self.past_tokens: {self.past_tokens}") print(f"hidden_state: {hidden_state}") + curr_inference_state = {} + if hidden_state is not None: model_hs, model_logits = self.sharded_model.generate(hidden_state=hidden_state) else: - if not self.sharded_model.model.caches_are_enabled(): + if not infer_cached: model_hs, model_logits = self.sharded_model.generate(tokens=self.past_tokens) + + curr_inference_state["past_tokens"] = self.past_tokens.numpy(force=True).tolist() else: model_hs, model_logits = self.sharded_model.generate(tokens=input_tensor) - curr_inference_state = { - "past_tokens": self.past_tokens.numpy(force=True).tolist(), - } - if model_hs is not None: # model_hs = model_hs.detach().cpu() @@ -167,31 +182,29 @@ async def ensure_shard(self, shard: Shard): print(f"class shard: {self.shard}") print(f"uuid: {self.uuid}") + # reset model after last layer to fix OOM if self.shard == shard: return self.shard = shard # download model safetensors and shard - model_path = await self.shard_downloader.ensure_shard(shard, self.__class__.__name__) - model_config = load_model_config(model_path/"config.json") + + self.model_path = await self.shard_downloader.ensure_shard(shard, self.__class__.__name__) + self.model_config = load_model_config(self.model_path/"config.json") # self.tokenizer = await _resolve_tokenizer(model_path) - self.tokenizer = await _resolve_tokenizer(model_path) + self.tokenizer = await _resolve_tokenizer(self.model_path) eot_token = ( self.tokenizer.special_tokens_map.get("eos_token_id") if hasattr(self.tokenizer, "_tokenizer") and isinstance(self.tokenizer._tokenizer, AutoTokenizer) else getattr(self.tokenizer, "eos_token_id", None) ) - print(f"eot_token: {eot_token}") - print(self.tokenizer.special_tokens_map) - print(self.tokenizer.eos_token_id) - self.sharded_model = await asyncio.get_running_loop().run_in_executor( self.executor, functools.partial( ShardedLlamaModel, - config=model_config, + config=self.model_config, shard=shard, device=self.device, use_cache=os.environ.get("TORCH_USE_CACHE", True), @@ -201,7 +214,7 @@ async def ensure_shard(self, shard: Shard): # load sharded weights await asyncio.get_running_loop().run_in_executor( self.executor, - functools.partial(load_model_weights_torchtune, model_path, shard, self.sharded_model), + functools.partial(load_model_weights_torchtune, self.model_path, shard, self.sharded_model), ) async def load_checkpoint(self, shard: Shard, path: str): From 48a75c15e3f57965e4b47fa9d04d8d36cedf29ac Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 17 Jan 2025 14:02:05 -0900 Subject: [PATCH 532/589] updating torchao version in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0920cfe7c..f73dee247 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ "torch==2.5.1", "accelerate==0.34.2", "torchtune==0.5.0", - "torchao==0.6.1", + "torchao==0.8.0", "pytest==8.3.3", "pytest-asyncio==0.24.0", ] From bceeaf58c2ced73796a5864d84fd125a49dd4e25 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 17 Jan 2025 14:24:43 -0900 Subject: [PATCH 533/589] updating for node to node hidden state passing and adding back inference state for node to node --- exo/inference/torch/models/llama3.py | 10 +++-- .../torch/sharded_inference_engine.py | 44 ++++++++++--------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 8a274c5ed..435fb8ca0 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -140,9 +140,13 @@ def forward( # Determine the type of input and shape if DEBUG >= 4: print("forward called") - print(f"tokens [{tokens.shape}]: {tokens}") - print(f"mask: {mask}") - print(f"input_pos: {input_pos}") + if tokens is not None: + print(f"tokens [{tokens.shape}]: {tokens}") + print(f"mask: {mask}") + print(f"input_pos: {input_pos}") + + if hidden_state is not None: + print(f"hidden_state [{hidden_state.shape}]: {hidden_state}") if hidden_state is not None: h = hidden_state # Use directly as hidden states diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 60da1a002..55bc544eb 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -117,7 +117,7 @@ async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarr await self.ensure_shard(shard) - infer_cached = os.environ.get("TORCH_USE_CACHE", True) + infer_cached = # ensure shard if DEBUG >= 4: @@ -127,7 +127,7 @@ async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarr print(f"inference_state: {inference_state}") print(f"infer_cached: {infer_cached}") - if inference_state.get("past_tokens") is not None and not infer_cached: + if inference_state.get("past_tokens") is not None: self.past_tokens = torch.tensor(inference_state["past_tokens"]).to(self.device) self.request_id = request_id if not self.request_id else self.request_id @@ -138,11 +138,10 @@ async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarr elif input_data.ndim == 2: input_tensor = torch.tensor(input_data).to(self.device) - if not infer_cached: - if self.past_tokens is not None: - self.past_tokens = torch.cat([self.past_tokens, input_tensor], dim=-1).to(self.device) - else: - self.past_tokens = input_tensor.clone() + if self.past_tokens is not None: + self.past_tokens = torch.cat([self.past_tokens, input_tensor], dim=-1).to(self.device) + else: + self.past_tokens = input_tensor.clone() def infer_wrapper(): if DEBUG >= 4: @@ -150,28 +149,31 @@ def infer_wrapper(): print(f"self.past_tokens: {self.past_tokens}") print(f"hidden_state: {hidden_state}") - curr_inference_state = {} + curr_inference_state = { + "past_tokens": self.past_tokens.numpy(force=True).tolist() + } if hidden_state is not None: - model_hs, model_logits = self.sharded_model.generate(hidden_state=hidden_state) + model_hs, model_logits = self.sharded_model.generate( + tokens=self.past_tokens, + hidden_state=hidden_state, + ) else: - if not infer_cached: + if not os.environ.get("TORCH_USE_CACHE", True): model_hs, model_logits = self.sharded_model.generate(tokens=self.past_tokens) - - curr_inference_state["past_tokens"] = self.past_tokens.numpy(force=True).tolist() else: model_hs, model_logits = self.sharded_model.generate(tokens=input_tensor) if model_hs is not None: - # model_hs = model_hs.detach().cpu() - - # possibly make this into a tensor that has past_tokens also - # to pass to node, currently only hidden state is - return model_hs.numpy(force=True), curr_inference_state - - # model_logits = model_logits.detach().cpu() - # token = await self.sample(model_logits, TEMP, TOP_K) - return model_logits[:, -1].numpy(force=True), curr_inference_state + return ( + model_hs.numpy(force=True), + curr_inference_state, + ) + + return ( + model_logits[:, -1].numpy(force=True), + curr_inference_state, + ) return await asyncio.get_running_loop().run_in_executor(self.executor, infer_wrapper) From 4f9f0383ffa9e1efe723d40d253b7cf5ca07df9f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 17 Jan 2025 14:25:32 -0900 Subject: [PATCH 534/589] syntax error fix --- exo/inference/torch/sharded_inference_engine.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 55bc544eb..27b8c6cb4 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -117,15 +117,12 @@ async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarr await self.ensure_shard(shard) - infer_cached = - # ensure shard if DEBUG >= 4: print("infer_tensor called") print(f"shard: {shard}") print(f"input_data: {input_data}") print(f"inference_state: {inference_state}") - print(f"infer_cached: {infer_cached}") if inference_state.get("past_tokens") is not None: self.past_tokens = torch.tensor(inference_state["past_tokens"]).to(self.device) @@ -149,9 +146,7 @@ def infer_wrapper(): print(f"self.past_tokens: {self.past_tokens}") print(f"hidden_state: {hidden_state}") - curr_inference_state = { - "past_tokens": self.past_tokens.numpy(force=True).tolist() - } + curr_inference_state = {"past_tokens": self.past_tokens.numpy(force=True).tolist()} if hidden_state is not None: model_hs, model_logits = self.sharded_model.generate( @@ -169,7 +164,7 @@ def infer_wrapper(): model_hs.numpy(force=True), curr_inference_state, ) - + return ( model_logits[:, -1].numpy(force=True), curr_inference_state, From 51184d4a2f2da2eda965efa1e2668e73ca2ee540 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 17 Jan 2025 14:51:46 -0900 Subject: [PATCH 535/589] mask and input_pos passing between nodes fix --- exo/inference/torch/models/llama3.py | 11 +++++++++++ exo/inference/torch/sharded_inference_engine.py | 15 ++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 435fb8ca0..cd722864a 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -348,6 +348,7 @@ def generate( self, tokens: Optional[torch.Tensor] = None, hidden_state: Optional[torch.Tensor] = None, + inference_state: Optional[dict] = None, ) -> Tuple[Optional[torch.Tensor], torch.Tensor]: """ Generate logits and/or hidden_states from llama model @@ -414,6 +415,10 @@ def generate( self.input_pos = torch.arange(0, total_response_length, device=self.device).unsqueeze(0) + if inference_state is not None: + inference_state["masks"] = self.masks.numpy(force=True).tolist() + inference_state["input_pos"] = self.input_pos.numpy(force=True).tolist() + if self.model.caches_are_enabled(): self.curr_masks = self.masks[:, :tokens_length] else: @@ -421,6 +426,12 @@ def generate( self.curr_input_pos = self.input_pos[:, :tokens_length].squeeze() else: + if inference_state is not None: + if self.input_pos is None: + self.input_pos = torch.tensor(inference_state["input_pos"]).to(self.device) + if self.masks is None: + self.masks = torch.tensor(inference_state["masks"]).to(self.device) + if self.model.caches_are_enabled(): self.curr_input_pos = self.input_pos[:, self.curr_pos].contiguous() self.curr_masks = self.masks[:, self.curr_pos, None, :].contiguous() diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 27b8c6cb4..79645d714 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -146,18 +146,27 @@ def infer_wrapper(): print(f"self.past_tokens: {self.past_tokens}") print(f"hidden_state: {hidden_state}") - curr_inference_state = {"past_tokens": self.past_tokens.numpy(force=True).tolist()} + curr_inference_state = { + "past_tokens": self.past_tokens.numpy(force=True).tolist(), + } if hidden_state is not None: model_hs, model_logits = self.sharded_model.generate( tokens=self.past_tokens, hidden_state=hidden_state, + inference_state=curr_inference_state, ) else: if not os.environ.get("TORCH_USE_CACHE", True): - model_hs, model_logits = self.sharded_model.generate(tokens=self.past_tokens) + model_hs, model_logits = self.sharded_model.generate( + tokens=self.past_tokens, + inference_state=curr_inference_state, + ) else: - model_hs, model_logits = self.sharded_model.generate(tokens=input_tensor) + model_hs, model_logits = self.sharded_model.generate( + tokens=input_tensor, + inference_state=curr_inference_state, + ) if model_hs is not None: return ( From bc7d699716c52355cec5374d7f9c4b4481333eac Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 17 Jan 2025 15:08:11 -0900 Subject: [PATCH 536/589] mask and input_pos passing between nodes fix --- exo/inference/torch/models/llama3.py | 12 ++++++++---- exo/inference/torch/sharded_inference_engine.py | 12 +++++++++--- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index cd722864a..f0775f8bd 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -349,7 +349,7 @@ def generate( tokens: Optional[torch.Tensor] = None, hidden_state: Optional[torch.Tensor] = None, inference_state: Optional[dict] = None, - ) -> Tuple[Optional[torch.Tensor], torch.Tensor]: + ) -> Tuple[Optional[torch.Tensor], torch.Tensor, Optional[dict]]: """ Generate logits and/or hidden_states from llama model @@ -369,6 +369,7 @@ def generate( bsz, tokens_length = tokens.size() if tokens_length > 1: + tokens = tokens.view(1, -1).to(device=self.device) if tokens.ndim == 1 else tokens self.curr_pos = tokens_length @@ -427,9 +428,9 @@ def generate( self.curr_input_pos = self.input_pos[:, :tokens_length].squeeze() else: if inference_state is not None: - if self.input_pos is None: + if inference_state.get("input_pos") is not None: self.input_pos = torch.tensor(inference_state["input_pos"]).to(self.device) - if self.masks is None: + if inference_state.get("masks") is not None: self.masks = torch.tensor(inference_state["masks"]).to(self.device) if self.model.caches_are_enabled(): @@ -465,4 +466,7 @@ def generate( else: model_hs = model_output - return model_hs, model_logits + if inference_state is not None: + return model_hs, model_logits, inference_state + else: + return model_hs, model_logits, {} diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 79645d714..6921b4f2d 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -150,20 +150,26 @@ def infer_wrapper(): "past_tokens": self.past_tokens.numpy(force=True).tolist(), } + if inference_state.get("masks") is not None: + curr_inference_state["masks"] = inference_state["masks"] + + if inference_state.get("input_pos") is not None: + curr_inference_state["input_pos"] = inference_state["input_pos"] + if hidden_state is not None: - model_hs, model_logits = self.sharded_model.generate( + model_hs, model_logits, curr_inference_state = self.sharded_model.generate( tokens=self.past_tokens, hidden_state=hidden_state, inference_state=curr_inference_state, ) else: if not os.environ.get("TORCH_USE_CACHE", True): - model_hs, model_logits = self.sharded_model.generate( + model_hs, model_logits, curr_inference_state = self.sharded_model.generate( tokens=self.past_tokens, inference_state=curr_inference_state, ) else: - model_hs, model_logits = self.sharded_model.generate( + model_hs, model_logits, curr_inference_state = self.sharded_model.generate( tokens=input_tensor, inference_state=curr_inference_state, ) From c0d9f57b03b07765f5a8a0b7d3eadd1cb98e5883 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 17 Jan 2025 15:14:13 -0900 Subject: [PATCH 537/589] mask and input pos issue when passing to sharded node fix --- exo/inference/torch/models/llama3.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index f0775f8bd..45583620f 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -430,9 +430,17 @@ def generate( if inference_state is not None: if inference_state.get("input_pos") is not None: self.input_pos = torch.tensor(inference_state["input_pos"]).to(self.device) + + if DEBUG >= 8: + print("using input_pos from inference state") + print(f"{self.input_pos}") if inference_state.get("masks") is not None: self.masks = torch.tensor(inference_state["masks"]).to(self.device) + if DEBUG >= 8: + print("using masks from inference state") + print(f"{self.masks}") + if self.model.caches_are_enabled(): self.curr_input_pos = self.input_pos[:, self.curr_pos].contiguous() self.curr_masks = self.masks[:, self.curr_pos, None, :].contiguous() From c0d0c71cde2be2ba80527f95f0401bb18a1444ae Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 17 Jan 2025 15:19:04 -0900 Subject: [PATCH 538/589] mask and input pos issue when passing to sharded node fix --- exo/inference/torch/models/llama3.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 45583620f..e53d1e906 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -362,6 +362,7 @@ def generate( print(f"tokens: {tokens}") print(f"hidden_state: {hidden_state}") print(f"curr_pos: {self.curr_pos}") + print(f"cached? {self.model.caches_are_enabled()}") model_hs = None model_logits = None From 05d4c9db7087be246a2f21df4fc4df0879652b0e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 17 Jan 2025 15:35:35 -0900 Subject: [PATCH 539/589] mask and input pos issue when passing to sharded node fix --- exo/inference/torch/sharded_inference_engine.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 6921b4f2d..491ab67da 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -163,16 +163,22 @@ def infer_wrapper(): inference_state=curr_inference_state, ) else: - if not os.environ.get("TORCH_USE_CACHE", True): + if not self.sharded_model.model.caches_are_enabled(): model_hs, model_logits, curr_inference_state = self.sharded_model.generate( tokens=self.past_tokens, inference_state=curr_inference_state, ) else: - model_hs, model_logits, curr_inference_state = self.sharded_model.generate( - tokens=input_tensor, - inference_state=curr_inference_state, - ) + if self.past_tokens is not None and self.shard.is_first_layer(): + model_hs, model_logits, curr_inference_state = self.sharded_model.generate( + tokens=self.past_tokens, + inference_state=curr_inference_state, + ) + else: + model_hs, model_logits, curr_inference_state = self.sharded_model.generate( + tokens=input_tensor, + inference_state=curr_inference_state, + ) if model_hs is not None: return ( From 464b9cf7aa8c87ebd5cd916c79fc09e8c82a8233 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 17 Jan 2025 15:39:38 -0900 Subject: [PATCH 540/589] removing mask and input_pos from inference state passing for nodes --- exo/inference/torch/models/llama3.py | 29 ++++--------------- .../torch/sharded_inference_engine.py | 22 ++------------ 2 files changed, 8 insertions(+), 43 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index e53d1e906..744755a7d 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -348,8 +348,10 @@ def generate( self, tokens: Optional[torch.Tensor] = None, hidden_state: Optional[torch.Tensor] = None, - inference_state: Optional[dict] = None, - ) -> Tuple[Optional[torch.Tensor], torch.Tensor, Optional[dict]]: + ) -> Tuple[ + Optional[torch.Tensor], + torch.Tensor, + ]: """ Generate logits and/or hidden_states from llama model @@ -417,10 +419,6 @@ def generate( self.input_pos = torch.arange(0, total_response_length, device=self.device).unsqueeze(0) - if inference_state is not None: - inference_state["masks"] = self.masks.numpy(force=True).tolist() - inference_state["input_pos"] = self.input_pos.numpy(force=True).tolist() - if self.model.caches_are_enabled(): self.curr_masks = self.masks[:, :tokens_length] else: @@ -428,20 +426,6 @@ def generate( self.curr_input_pos = self.input_pos[:, :tokens_length].squeeze() else: - if inference_state is not None: - if inference_state.get("input_pos") is not None: - self.input_pos = torch.tensor(inference_state["input_pos"]).to(self.device) - - if DEBUG >= 8: - print("using input_pos from inference state") - print(f"{self.input_pos}") - if inference_state.get("masks") is not None: - self.masks = torch.tensor(inference_state["masks"]).to(self.device) - - if DEBUG >= 8: - print("using masks from inference state") - print(f"{self.masks}") - if self.model.caches_are_enabled(): self.curr_input_pos = self.input_pos[:, self.curr_pos].contiguous() self.curr_masks = self.masks[:, self.curr_pos, None, :].contiguous() @@ -475,7 +459,4 @@ def generate( else: model_hs = model_output - if inference_state is not None: - return model_hs, model_logits, inference_state - else: - return model_hs, model_logits, {} + return model_hs, model_logits diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 491ab67da..fa5367cdb 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -150,35 +150,19 @@ def infer_wrapper(): "past_tokens": self.past_tokens.numpy(force=True).tolist(), } - if inference_state.get("masks") is not None: - curr_inference_state["masks"] = inference_state["masks"] - - if inference_state.get("input_pos") is not None: - curr_inference_state["input_pos"] = inference_state["input_pos"] - if hidden_state is not None: model_hs, model_logits, curr_inference_state = self.sharded_model.generate( tokens=self.past_tokens, hidden_state=hidden_state, - inference_state=curr_inference_state, ) else: if not self.sharded_model.model.caches_are_enabled(): - model_hs, model_logits, curr_inference_state = self.sharded_model.generate( - tokens=self.past_tokens, - inference_state=curr_inference_state, - ) + model_hs, model_logits = self.sharded_model.generate(tokens=self.past_tokens,) else: if self.past_tokens is not None and self.shard.is_first_layer(): - model_hs, model_logits, curr_inference_state = self.sharded_model.generate( - tokens=self.past_tokens, - inference_state=curr_inference_state, - ) + model_hs, model_logits = self.sharded_model.generate(tokens=self.past_tokens,) else: - model_hs, model_logits, curr_inference_state = self.sharded_model.generate( - tokens=input_tensor, - inference_state=curr_inference_state, - ) + model_hs, model_logits = self.sharded_model.generate(tokens=input_tensor,) if model_hs is not None: return ( From d469e3e9b6253f42773636a13510405eb7f745f1 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 17 Jan 2025 15:41:11 -0900 Subject: [PATCH 541/589] removing mask and input_pos from inference state passing for nodes --- exo/inference/torch/sharded_inference_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index fa5367cdb..45aa2c57d 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -151,7 +151,7 @@ def infer_wrapper(): } if hidden_state is not None: - model_hs, model_logits, curr_inference_state = self.sharded_model.generate( + model_hs, model_logits = self.sharded_model.generate( tokens=self.past_tokens, hidden_state=hidden_state, ) From 5302b739b00e3e3a644772c03d13165bb8fe9ad5 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 17 Jan 2025 19:10:40 -0900 Subject: [PATCH 542/589] testing reloading model at prompt encode to fix OOM issue --- exo/api/chatgpt_api.py | 265 ++++++++---------- exo/inference/torch/models/llama3.py | 6 +- exo/inference/torch/models/llm_utils.py | 2 +- .../torch/sharded_inference_engine.py | 30 +- 4 files changed, 140 insertions(+), 163 deletions(-) diff --git a/exo/api/chatgpt_api.py b/exo/api/chatgpt_api.py index cebedd328..6e9316b31 100644 --- a/exo/api/chatgpt_api.py +++ b/exo/api/chatgpt_api.py @@ -28,6 +28,7 @@ from exo.download.hf.hf_helpers import get_hf_home, get_repo_root from exo.apputil import create_animation_mp4 + class Message: def __init__(self, role: str, content: Union[str, List[Dict[str, Union[str, Dict[str, str]]]]], tools: Optional[List[Dict]] = None): self.role = role @@ -41,7 +42,6 @@ def to_dict(self): return data - class ChatCompletionRequest: def __init__(self, model: str, messages: List[Message], temperature: float, tools: Optional[List[Dict]] = None): self.model = model @@ -76,7 +76,7 @@ def generate_completion( "finish_reason": finish_reason, }], } - + if DEBUG >= 3: print(f"completion: {completion}") @@ -138,7 +138,7 @@ def build_prompt(tokenizer, _messages: List[Message], tools: Optional[List[Dict] chat_template_args = { "conversation": [m.to_dict() for m in messages], "tokenize": False, - "add_generation_prompt": True + "add_generation_prompt": True, } if tools: chat_template_args["tools"] = tools @@ -168,8 +168,17 @@ def __init__(self, request_id: str, timestamp: int, prompt: str): self.timestamp = timestamp self.prompt = prompt + class ChatGPTAPI: - def __init__(self, node: Node, inference_engine_classname: str, response_timeout: int = 90, on_chat_completion_request: Callable[[str, ChatCompletionRequest, str], None] = None, default_model: Optional[str] = None, system_prompt: Optional[str] = None): + def __init__( + self, + node: Node, + inference_engine_classname: str, + response_timeout: int = 90, + on_chat_completion_request: Callable[[str, ChatCompletionRequest, str], None] = None, + default_model: Optional[str] = None, + system_prompt: Optional[str] = None + ): self.node = node self.inference_engine_classname = inference_engine_classname self.response_timeout = response_timeout @@ -205,7 +214,6 @@ def __init__(self, node: Node, inference_engine_classname: str, response_timeout cors.add(self.app.router.add_post("/download", self.handle_post_download), {"*": cors_options}) cors.add(self.app.router.add_get("/topology", self.handle_get_topology), {"*": cors_options}) - if "__compiled__" not in globals(): self.static_dir = Path(__file__).parent.parent/"tinychat" self.app.router.add_get("/", self.handle_root) @@ -216,7 +224,7 @@ def __init__(self, node: Node, inference_engine_classname: str, response_timeout self.app.middlewares.append(self.log_request) async def handle_quit(self, request): - if DEBUG>=1: print("Received quit signal") + if DEBUG >= 1: print("Received quit signal") response = web.json_response({"detail": "Quit signal received"}, status=200) await response.prepare(request) await response.write_eof() @@ -246,61 +254,48 @@ async def handle_healthcheck(self, request): async def handle_model_support(self, request): try: - response = web.StreamResponse( - status=200, - reason='OK', - headers={ - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', - } - ) - await response.prepare(request) + response = web.StreamResponse(status=200, reason='OK', headers={ + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + }) + await response.prepare(request) - async def process_model(model_name, pretty): - if model_name in model_cards: - model_info = model_cards[model_name] - - if self.inference_engine_classname in model_info.get("repo", {}): - shard = build_base_shard(model_name, self.inference_engine_classname) - if shard: - downloader = HFShardDownloader(quick_check=True) - downloader.current_shard = shard - downloader.current_repo_id = get_repo(shard.model_id, self.inference_engine_classname) - status = await downloader.get_shard_download_status() - - download_percentage = status.get("overall") if status else None - total_size = status.get("total_size") if status else None - total_downloaded = status.get("total_downloaded") if status else False - - model_data = { - model_name: { - "name": pretty, - "downloaded": download_percentage == 100 if download_percentage is not None else False, - "download_percentage": download_percentage, - "total_size": total_size, - "total_downloaded": total_downloaded - } - } - - await response.write(f"data: {json.dumps(model_data)}\n\n".encode()) - - # Process all models in parallel - await asyncio.gather(*[ - process_model(model_name, pretty) - for model_name, pretty in pretty_name.items() - ]) - - await response.write(b"data: [DONE]\n\n") - return response + async def process_model(model_name, pretty): + if model_name in model_cards: + model_info = model_cards[model_name] + + if self.inference_engine_classname in model_info.get("repo", {}): + shard = build_base_shard(model_name, self.inference_engine_classname) + if shard: + downloader = HFShardDownloader(quick_check=True) + downloader.current_shard = shard + downloader.current_repo_id = get_repo(shard.model_id, self.inference_engine_classname) + status = await downloader.get_shard_download_status() + + download_percentage = status.get("overall") if status else None + total_size = status.get("total_size") if status else None + total_downloaded = status.get("total_downloaded") if status else False + + model_data = { + model_name: { + "name": pretty, "downloaded": download_percentage == 100 if download_percentage is not None else False, "download_percentage": download_percentage, "total_size": total_size, + "total_downloaded": total_downloaded + } + } + + await response.write(f"data: {json.dumps(model_data)}\n\n".encode()) + + # Process all models in parallel + await asyncio.gather(*[process_model(model_name, pretty) for model_name, pretty in pretty_name.items()]) + + await response.write(b"data: [DONE]\n\n") + return response except Exception as e: - print(f"Error in handle_model_support: {str(e)}") - traceback.print_exc() - return web.json_response( - {"detail": f"Server error: {str(e)}"}, - status=500 - ) + print(f"Error in handle_model_support: {str(e)}") + traceback.print_exc() + return web.json_response({"detail": f"Server error: {str(e)}"}, status=500) async def handle_get_models(self, request): models_list = [{"id": model_name, "object": "model", "owned_by": "exo", "ready": True} for model_name, _ in model_cards.items()] @@ -469,7 +464,6 @@ def on_result(_request_id: str, tokens: List[int], is_finished: bool): deregistered_callback = self.node.on_token.deregister(callback_id) if DEBUG >= 2: print(f"Deregister {callback_id=} {deregistered_callback=}") - async def handle_post_image_generations(self, request): data = await request.json() @@ -482,7 +476,7 @@ async def handle_post_image_generations(self, request): shard = build_base_shard(model, self.inference_engine_classname) if DEBUG >= 2: print(f"shard: {shard}") if not shard: - return web.json_response({"error": f"Unsupported model: {model} with inference engine {self.inference_engine_classname}"}, status=400) + return web.json_response({"error": f"Unsupported model: {model} with inference engine {self.inference_engine_classname}"}, status=400) request_id = str(uuid.uuid4()) callback_id = f"chatgpt-api-wait-response-{request_id}" @@ -494,77 +488,73 @@ async def handle_post_image_generations(self, request): img = None await asyncio.wait_for(asyncio.shield(asyncio.create_task(self.node.process_prompt(shard, prompt, request_id=request_id, inference_state={"image": img}))), timeout=self.response_timeout) - - response = web.StreamResponse(status=200, reason='OK', headers={'Content-Type': 'application/octet-stream',"Cache-Control": "no-cache",}) + response = web.StreamResponse(status=200, reason='OK', headers={ + 'Content-Type': 'application/octet-stream', + "Cache-Control": "no-cache", + }) await response.prepare(request) def get_progress_bar(current_step, total_steps, bar_length=50): # Calculate the percentage of completion - percent = float(current_step) / total_steps + percent = float(current_step)/total_steps # Calculate the number of hashes to display - arrow = '-' * int(round(percent * bar_length) - 1) + '>' - spaces = ' ' * (bar_length - len(arrow)) - + arrow = '-'*int(round(percent*bar_length) - 1) + '>' + spaces = ' '*(bar_length - len(arrow)) + # Create the progress bar string progress_bar = f'Progress: [{arrow}{spaces}] {int(percent * 100)}% ({current_step}/{total_steps})' return progress_bar async def stream_image(_request_id: str, result, is_finished: bool): - if isinstance(result, list): - await response.write(json.dumps({'progress': get_progress_bar((result[0]), (result[1]))}).encode('utf-8') + b'\n') - - elif isinstance(result, np.ndarray): - im = Image.fromarray(np.array(result)) - images_folder = get_exo_images_dir() - # Save the image to a file - image_filename = f"{_request_id}.png" - image_path = images_folder / image_filename - im.save(image_path) - image_url = request.app.router['static_images'].url_for(filename=image_filename) - base_url = f"{request.scheme}://{request.host}" - # Construct the full URL correctly - full_image_url = base_url + str(image_url) - - await response.write(json.dumps({'images': [{'url': str(full_image_url), 'content_type': 'image/png'}]}).encode('utf-8') + b'\n') - if is_finished: - await response.write_eof() - + if isinstance(result, list): + await response.write(json.dumps({'progress': get_progress_bar((result[0]), (result[1]))}).encode('utf-8') + b'\n') + + elif isinstance(result, np.ndarray): + im = Image.fromarray(np.array(result)) + images_folder = get_exo_images_dir() + # Save the image to a file + image_filename = f"{_request_id}.png" + image_path = images_folder/image_filename + im.save(image_path) + image_url = request.app.router['static_images'].url_for(filename=image_filename) + base_url = f"{request.scheme}://{request.host}" + # Construct the full URL correctly + full_image_url = base_url + str(image_url) + + await response.write(json.dumps({'images': [{'url': str(full_image_url), 'content_type': 'image/png'}]}).encode('utf-8') + b'\n') + if is_finished: + await response.write_eof() stream_task = None + def on_result(_request_id: str, result, is_finished: bool): - nonlocal stream_task - stream_task = asyncio.create_task(stream_image(_request_id, result, is_finished)) - return _request_id == request_id and is_finished + nonlocal stream_task + stream_task = asyncio.create_task(stream_image(_request_id, result, is_finished)) + return _request_id == request_id and is_finished await callback.wait(on_result, timeout=self.response_timeout*10) - + if stream_task: - # Wait for the stream task to complete before returning - await stream_task + # Wait for the stream task to complete before returning + await stream_task return response except Exception as e: - if DEBUG >= 2: traceback.print_exc() - return web.json_response({"detail": f"Error processing prompt (see logs with DEBUG>=2): {str(e)}"}, status=500) - + if DEBUG >= 2: traceback.print_exc() + return web.json_response({"detail": f"Error processing prompt (see logs with DEBUG>=2): {str(e)}"}, status=500) + async def handle_delete_model(self, request): try: model_name = request.match_info.get('model_name') if DEBUG >= 2: print(f"Attempting to delete model: {model_name}") if not model_name or model_name not in model_cards: - return web.json_response( - {"detail": f"Invalid model name: {model_name}"}, - status=400 - ) + return web.json_response({"detail": f"Invalid model name: {model_name}"}, status=400) shard = build_base_shard(model_name, self.inference_engine_classname) if not shard: - return web.json_response( - {"detail": "Could not build shard for model"}, - status=400 - ) + return web.json_response({"detail": "Could not build shard for model"}, status=400) repo_id = get_repo(shard.model_id, self.inference_engine_classname) if DEBUG >= 2: print(f"Repo ID for model: {repo_id}") @@ -579,38 +569,28 @@ async def handle_delete_model(self, request): if DEBUG >= 2: print(f"Found model files at {cache_dir}, deleting...") try: shutil.rmtree(cache_dir) - return web.json_response({ - "status": "success", - "message": f"Model {model_name} deleted successfully", - "path": str(cache_dir) - }) + return web.json_response({"status": "success", "message": f"Model {model_name} deleted successfully", "path": str(cache_dir)}) except Exception as e: - return web.json_response({ - "detail": f"Failed to delete model files: {str(e)}" - }, status=500) + return web.json_response({"detail": f"Failed to delete model files: {str(e)}"}, status=500) else: - return web.json_response({ - "detail": f"Model files not found at {cache_dir}" - }, status=404) + return web.json_response({"detail": f"Model files not found at {cache_dir}"}, status=404) except Exception as e: - print(f"Error in handle_delete_model: {str(e)}") - traceback.print_exc() - return web.json_response({ - "detail": f"Server error: {str(e)}" - }, status=500) + print(f"Error in handle_delete_model: {str(e)}") + traceback.print_exc() + return web.json_response({"detail": f"Server error: {str(e)}"}, status=500) async def handle_get_initial_models(self, request): model_data = {} for model_name, pretty in pretty_name.items(): - model_data[model_name] = { - "name": pretty, - "downloaded": None, # Initially unknown - "download_percentage": None, # Change from 0 to null - "total_size": None, - "total_downloaded": None, - "loading": True # Add loading state - } + model_data[model_name] = { + "name": pretty, + "downloaded": None, # Initially unknown + "download_percentage": None, # Change from 0 to null + "total_size": None, + "total_downloaded": None, + "loading": True # Add loading state + } return web.json_response(model_data) async def handle_create_animation(self, request): @@ -636,17 +616,9 @@ async def handle_create_animation(self, request): if DEBUG >= 2: print(f"Animation temp directory: {tmp_dir}, output file: {output_path}, directory exists: {tmp_dir.exists()}, directory permissions: {oct(tmp_dir.stat().st_mode)[-3:]}") # Create the animation - create_animation_mp4( - replacement_image_path, - output_path, - device_name, - prompt_text - ) + create_animation_mp4(replacement_image_path, output_path, device_name, prompt_text) - return web.json_response({ - "status": "success", - "output_path": output_path - }) + return web.json_response({"status": "success", "output_path": output_path}) except Exception as e: if DEBUG >= 2: traceback.print_exc() @@ -662,10 +634,7 @@ async def handle_post_download(self, request): if not shard: return web.json_response({"error": f"Could not build shard for model {model_name}"}, status=400) asyncio.create_task(self.node.inference_engine.shard_downloader.ensure_shard(shard, self.inference_engine_classname)) - return web.json_response({ - "status": "success", - "message": f"Download started for model: {model_name}" - }) + return web.json_response({"status": "success", "message": f"Download started for model: {model_name}"}) except Exception as e: if DEBUG >= 2: traceback.print_exc() return web.json_response({"error": str(e)}, status=500) @@ -679,10 +648,7 @@ async def handle_get_topology(self, request): return web.json_response({}) except Exception as e: if DEBUG >= 2: traceback.print_exc() - return web.json_response( - {"detail": f"Error getting topology: {str(e)}"}, - status=500 - ) + return web.json_response({"detail": f"Error getting topology: {str(e)}"}, status=500) async def run(self, host: str = "0.0.0.0", port: int = 52415): runner = web.AppRunner(self.app) @@ -693,15 +659,14 @@ async def run(self, host: str = "0.0.0.0", port: int = 52415): def base64_decode(self, base64_string): #decode and reshape image if base64_string.startswith('data:image'): - base64_string = base64_string.split(',')[1] + base64_string = base64_string.split(',')[1] image_data = base64.b64decode(base64_string) img = Image.open(BytesIO(image_data)) - W, H = (dim - dim % 64 for dim in (img.width, img.height)) + W, H = (dim - dim%64 for dim in (img.width, img.height)) if W != img.width or H != img.height: - if DEBUG >= 2: print(f"Warning: image shape is not divisible by 64, downsampling to {W}x{H}") - img = img.resize((W, H), Image.NEAREST) # use desired downsampling filter + if DEBUG >= 2: print(f"Warning: image shape is not divisible by 64, downsampling to {W}x{H}") + img = img.resize((W, H), Image.NEAREST) # use desired downsampling filter img = mx.array(np.array(img)) - img = (img[:, :, :3].astype(mx.float32) / 255) * 2 - 1 + img = (img[:, :, :3].astype(mx.float32)/255)*2 - 1 img = img[None] return img - diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 744755a7d..38a95ba96 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -348,6 +348,7 @@ def generate( self, tokens: Optional[torch.Tensor] = None, hidden_state: Optional[torch.Tensor] = None, + past_tokens: Optional[torch.Tensor] = None, ) -> Tuple[ Optional[torch.Tensor], torch.Tensor, @@ -371,7 +372,10 @@ def generate( bsz, tokens_length = tokens.size() - if tokens_length > 1: + if tokens_length > 1 or (self.input_pos is None and self.masks is None): + + if tokens_length == 1: + tokens = past_tokens tokens = tokens.view(1, -1).to(device=self.device) if tokens.ndim == 1 else tokens diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index efd5471f0..c498a3b06 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -51,7 +51,7 @@ def load_model_config(model_config_path: Path) -> dict: "hidden_act": base_config.get("hidden_act", "silu") } - if (os.environ.get("TORCH_USE_ORG_SEQ", True) and model_config.get("rope_scaling", None) is not None): + if (os.environ.get("TORCH_USE_ORG_SEQ", False) and model_config.get("rope_scaling", None) is not None): model_config["max_seq_len"] = model_config["rope_scaling"]["original_max_position_embeddings"] return model_config diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 45aa2c57d..d3704bd2d 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -31,8 +31,8 @@ # from torchtune generate recipe # https://github.com/pytorch/torchtune/blob/main/recipes/configs/generation.yaml#L40 -TEMP = 0.6 -TOP_K = 300 +TEMP = 0.0 +TOP_K = 35 class TorchDynamicShardInferenceEngine(InferenceEngine): @@ -42,6 +42,7 @@ class TorchDynamicShardInferenceEngine(InferenceEngine): def __init__(self, shard_downloader: HFShardDownloader): self.shard = None self.shard_downloader = shard_downloader + self.sharded_model = None self.request_id = None self.executor = ThreadPoolExecutor(max_workers=1) self.past_tokens = None @@ -65,6 +66,11 @@ async def encode(self, shard: Shard, prompt: str) -> np.ndarray: print(f"shard: {shard}") print(f"prompt: {prompt}") + if self.sharded_model is not None: + self.sharded_model.model.reset_caches() + self.sharded_model = None + self.shard = None + await self.ensure_shard(shard) tokens = await asyncio.get_event_loop().run_in_executor( @@ -104,11 +110,11 @@ def sample_wrapper(): if DEBUG >= 4: print(f"tokens: {tokens}") - if tokens.item() == self.tokenizer.eos_token_id: - if self.device == torch.device("cuda"): - torch.cuda.empty_cache() - self.sharded_model = None - self.shard = None + # if tokens.item() == self.tokenizer.eos_token_id: + # if self.device == torch.device("cuda"): + # torch.cuda.empty_cache() + # self.sharded_model = None + # self.shard = None return tokens.numpy(force=True) return await asyncio.get_running_loop().run_in_executor(self.executor, functools.partial(sample_wrapper)) @@ -158,11 +164,13 @@ def infer_wrapper(): else: if not self.sharded_model.model.caches_are_enabled(): model_hs, model_logits = self.sharded_model.generate(tokens=self.past_tokens,) + elif (self.sharded_model.input_pos is None and self.sharded_model.masks is None): + model_hs, model_logits = self.sharded_model.generate( + tokens=input_tensor, + past_tokens=self.past_tokens, + ) else: - if self.past_tokens is not None and self.shard.is_first_layer(): - model_hs, model_logits = self.sharded_model.generate(tokens=self.past_tokens,) - else: - model_hs, model_logits = self.sharded_model.generate(tokens=input_tensor,) + model_hs, model_logits = self.sharded_model.generate(tokens=input_tensor) if model_hs is not None: return ( From 13ea82deaa333b5b4398834ad1e800e456be192a Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 17 Jan 2025 20:07:26 -0900 Subject: [PATCH 543/589] fixing output weight for llama3.1 --- exo/inference/torch/models/llama3.py | 1 + exo/inference/torch/models/llm_utils.py | 2 +- exo/models.py | 199 +++++++++++++++++------- 3 files changed, 142 insertions(+), 60 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 38a95ba96..72ff5ccfb 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -290,6 +290,7 @@ def LlamaModel(config: dict, shard: Shard): tok_embeddings = nn.Embedding(config["vocab_size"], config["embed_dim"]) if len(re.findall(r"3\.2", shard.model_id)) > 0: + print("Using TiedLinear") output_proj = ttm.TiedLinear(tok_embeddings) else: output_proj = nn.Linear(config["embed_dim"], config["vocab_size"], bias=False) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index c498a3b06..cbd463f73 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -159,7 +159,7 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): remapped_state_dict["model.norm.scale"] = value if key == "lm_head.weight": - remapped_state_dict["output.weight"] = value + remapped_state_dict["model.output.weight"] = value else: print(f"{shard.model_id} not supported for sharding, loading weights normally") diff --git a/exo/models.py b/exo/models.py index 3b47972ff..5f93a67e8 100644 --- a/exo/models.py +++ b/exo/models.py @@ -6,15 +6,14 @@ "llama-3.3-70b": { "layers": 80, "repo": { - "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.3-70B-Instruct-4bit", - "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.3-70B-Instruct", + "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.3-70B-Instruct-4bit", + "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.3-70B-Instruct", }, }, "llama-3.2-1b": { "layers": 16, "repo": { - "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-1B-Instruct-4bit", - "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-1B-Instruct", + "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-1B-Instruct-4bit", "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-1B-Instruct", "TorchDynamicShardInferenceEngine": "unsloth/Llama-3.2-1B-Instruct" }, }, @@ -28,97 +27,182 @@ "llama-3.2-3b": { "layers": 28, "repo": { - "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-3B-Instruct-4bit", - "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-3B-Instruct", + "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-3B-Instruct-4bit", + "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-3B-Instruct", }, }, "llama-3.2-3b-8bit": { "layers": 28, "repo": { - "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-3B-Instruct-8bit", - "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-3B-Instruct", + "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-3B-Instruct-8bit", + "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-3B-Instruct", }, }, "llama-3.2-3b-bf16": { "layers": 28, "repo": { - "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-3B-Instruct", - "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-3B-Instruct", + "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.2-3B-Instruct", + "TinygradDynamicShardInferenceEngine": "unsloth/Llama-3.2-3B-Instruct", }, }, "llama-3.1-8b": { "layers": 32, "repo": { - "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", - "TinygradDynamicShardInferenceEngine": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", + "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", + "TinygradDynamicShardInferenceEngine": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", + "TorchDynamicShardInferenceEngine": "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", }, }, "llama-3.1-70b": { "layers": 80, "repo": { - "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", - "TinygradDynamicShardInferenceEngine": "NousResearch/Meta-Llama-3.1-70B-Instruct", + "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit", + "TinygradDynamicShardInferenceEngine": "NousResearch/Meta-Llama-3.1-70B-Instruct", }, }, "llama-3.1-70b-bf16": { "layers": 80, "repo": { - "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-70B-Instruct-bf16-CORRECTED", - "TinygradDynamicShardInferenceEngine": "NousResearch/Meta-Llama-3.1-70B-Instruct", + "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-70B-Instruct-bf16-CORRECTED", + "TinygradDynamicShardInferenceEngine": "NousResearch/Meta-Llama-3.1-70B-Instruct", }, }, "llama-3-8b": { "layers": 32, "repo": { - "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3-8B-Instruct-4bit", - "TinygradDynamicShardInferenceEngine": "TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R", + "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3-8B-Instruct-4bit", + "TinygradDynamicShardInferenceEngine": "TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R", }, }, "llama-3-70b": { "layers": 80, "repo": { - "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3-70B-Instruct-4bit", - "TinygradDynamicShardInferenceEngine": "TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-70B-R", + "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3-70B-Instruct-4bit", + "TinygradDynamicShardInferenceEngine": "TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-70B-R", }, }, - "llama-3.1-405b": { "layers": 126, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-405B-4bit", }, }, - "llama-3.1-405b-8bit": { "layers": 126, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-405B-Instruct-8bit", }, }, + "llama-3.1-405b": { + "layers": 126, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-405B-4bit",}, + }, + "llama-3.1-405b-8bit": { + "layers": 126, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Meta-Llama-3.1-405B-Instruct-8bit",}, + }, ### mistral - "mistral-nemo": { "layers": 40, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Mistral-Nemo-Instruct-2407-4bit", }, }, - "mistral-large": { "layers": 88, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Mistral-Large-Instruct-2407-4bit", }, }, + "mistral-nemo": { + "layers": 40, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Mistral-Nemo-Instruct-2407-4bit",}, + }, + "mistral-large": { + "layers": 88, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Mistral-Large-Instruct-2407-4bit",}, + }, ### deepseek - "deepseek-coder-v2-lite": { "layers": 27, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx", }, }, - "deepseek-coder-v2.5": { "layers": 60, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/DeepSeek-V2.5-MLX-AQ4_1_64", }, }, + "deepseek-coder-v2-lite": { + "layers": 27, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx",}, + }, + "deepseek-coder-v2.5": { + "layers": 60, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/DeepSeek-V2.5-MLX-AQ4_1_64",}, + }, ### llava - "llava-1.5-7b-hf": { "layers": 32, "repo": { "MLXDynamicShardInferenceEngine": "llava-hf/llava-1.5-7b-hf", }, }, + "llava-1.5-7b-hf": { + "layers": 32, + "repo": {"MLXDynamicShardInferenceEngine": "llava-hf/llava-1.5-7b-hf",}, + }, ### qwen - "qwen-2.5-0.5b": { "layers": 28, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-0.5B-Instruct-4bit", }, }, - "qwen-2.5-1.5b": { "layers": 28, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-1.5B-Instruct-4bit", }, }, - "qwen-2.5-coder-1.5b": { "layers": 28, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-1.5B-Instruct-4bit", }, }, - "qwen-2.5-3b": { "layers": 36, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-3B-Instruct-4bit", }, }, - "qwen-2.5-coder-3b": { "layers": 36, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-3B-Instruct-4bit", }, }, - "qwen-2.5-7b": { "layers": 28, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-7B-Instruct-4bit", }, }, - "qwen-2.5-coder-7b": { "layers": 28, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-7B-Instruct-4bit", }, }, - "qwen-2.5-math-7b": { "layers": 28, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Math-7B-Instruct-4bit", }, }, - "qwen-2.5-14b": { "layers": 48, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-14B-Instruct-4bit", }, }, - "qwen-2.5-coder-14b": { "layers": 48, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-14B-Instruct-4bit", }, }, - "qwen-2.5-32b": { "layers": 64, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-32B-Instruct-4bit", }, }, - "qwen-2.5-coder-32b": { "layers": 64, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-32B-Instruct-4bit", }, }, - "qwen-2.5-72b": { "layers": 80, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-72B-Instruct-4bit", }, }, - "qwen-2.5-math-72b": { "layers": 80, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Math-72B-Instruct-4bit", }, }, + "qwen-2.5-0.5b": { + "layers": 28, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-0.5B-Instruct-4bit",}, + }, + "qwen-2.5-1.5b": { + "layers": 28, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-1.5B-Instruct-4bit",}, + }, + "qwen-2.5-coder-1.5b": { + "layers": 28, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-1.5B-Instruct-4bit",}, + }, + "qwen-2.5-3b": { + "layers": 36, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-3B-Instruct-4bit",}, + }, + "qwen-2.5-coder-3b": { + "layers": 36, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-3B-Instruct-4bit",}, + }, + "qwen-2.5-7b": { + "layers": 28, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-7B-Instruct-4bit",}, + }, + "qwen-2.5-coder-7b": { + "layers": 28, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-7B-Instruct-4bit",}, + }, + "qwen-2.5-math-7b": { + "layers": 28, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Math-7B-Instruct-4bit",}, + }, + "qwen-2.5-14b": { + "layers": 48, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-14B-Instruct-4bit",}, + }, + "qwen-2.5-coder-14b": { + "layers": 48, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-14B-Instruct-4bit",}, + }, + "qwen-2.5-32b": { + "layers": 64, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-32B-Instruct-4bit",}, + }, + "qwen-2.5-coder-32b": { + "layers": 64, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Coder-32B-Instruct-4bit",}, + }, + "qwen-2.5-72b": { + "layers": 80, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-72B-Instruct-4bit",}, + }, + "qwen-2.5-math-72b": { + "layers": 80, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Qwen2.5-Math-72B-Instruct-4bit",}, + }, ### nemotron - "nemotron-70b": { "layers": 80, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/nvidia_Llama-3.1-Nemotron-70B-Instruct-HF_4bit", }, }, - "nemotron-70b-bf16": { "layers": 80, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.1-Nemotron-70B-Instruct-HF-bf16", }, }, + "nemotron-70b": { + "layers": 80, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/nvidia_Llama-3.1-Nemotron-70B-Instruct-HF_4bit",}, + }, + "nemotron-70b-bf16": { + "layers": 80, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Llama-3.1-Nemotron-70B-Instruct-HF-bf16",}, + }, # gemma - "gemma2-9b": { "layers": 42, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/gemma-2-9b-it-4bit", }, }, - "gemma2-27b": { "layers": 46, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/gemma-2-27b-it-4bit", }, }, + "gemma2-9b": { + "layers": 42, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/gemma-2-9b-it-4bit",}, + }, + "gemma2-27b": { + "layers": 46, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/gemma-2-27b-it-4bit",}, + }, # stable diffusion - "stable-diffusion-2-1-base": { "layers": 31, "repo": { "MLXDynamicShardInferenceEngine": "stabilityai/stable-diffusion-2-1-base" } }, + "stable-diffusion-2-1-base": {"layers": 31, "repo": {"MLXDynamicShardInferenceEngine": "stabilityai/stable-diffusion-2-1-base"}}, # phi - "phi-3.5-mini": { "layers": 32, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/Phi-3.5-mini-instruct-4bit", }, }, - "phi-4": { "layers": 40, "repo": { "MLXDynamicShardInferenceEngine": "mlx-community/phi-4-4bit", }, }, + "phi-3.5-mini": { + "layers": 32, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/Phi-3.5-mini-instruct-4bit",}, + }, + "phi-4": { + "layers": 40, + "repo": {"MLXDynamicShardInferenceEngine": "mlx-community/phi-4-4bit",}, + }, # dummy - "dummy": { "layers": 8, "repo": { "DummyInferenceEngine": "dummy", }, }, + "dummy": { + "layers": 8, + "repo": {"DummyInferenceEngine": "dummy",}, + }, } pretty_name = { @@ -162,9 +246,11 @@ "stable-diffusion-2-1-base": "Stable Diffusion 2.1", } + def get_repo(model_id: str, inference_engine_classname: str) -> Optional[str]: return model_cards.get(model_id, {}).get("repo", {}).get(inference_engine_classname, None) + def build_base_shard(model_id: str, inference_engine_classname: str) -> Optional[Shard]: repo = get_repo(model_id, inference_engine_classname) n_layers = model_cards.get(model_id, {}).get("layers", 0) @@ -172,24 +258,19 @@ def build_base_shard(model_id: str, inference_engine_classname: str) -> Optional return None return Shard(model_id, 0, 0, n_layers) + def get_supported_models(supported_inference_engine_lists: List[List[str]]) -> List[str]: if not supported_inference_engine_lists: return list(model_cards.keys()) from exo.inference.inference_engine import inference_engine_classes - supported_inference_engine_lists = [ - [inference_engine_classes[engine] if engine in inference_engine_classes else engine for engine in engine_list] - for engine_list in supported_inference_engine_lists - ] + supported_inference_engine_lists = [[inference_engine_classes[engine] if engine in inference_engine_classes else engine for engine in engine_list] + for engine_list in supported_inference_engine_lists] def has_any_engine(model_info: dict, engine_list: List[str]) -> bool: return any(engine in model_info.get("repo", {}) for engine in engine_list) def supports_all_engine_lists(model_info: dict) -> bool: - return all(has_any_engine(model_info, engine_list) - for engine_list in supported_inference_engine_lists) + return all(has_any_engine(model_info, engine_list) for engine_list in supported_inference_engine_lists) - return [ - model_id for model_id, model_info in model_cards.items() - if supports_all_engine_lists(model_info) - ] + return [model_id for model_id, model_info in model_cards.items() if supports_all_engine_lists(model_info)] From 5e31e3d51a95e308c77c162cf162154d12273c17 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sat, 18 Jan 2025 15:54:48 -0900 Subject: [PATCH 544/589] changing top_k to 25, making reset of cache and model at every initial prompt encoding --- exo/inference/torch/sharded_inference_engine.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index d3704bd2d..d4e11fe25 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -32,7 +32,7 @@ # from torchtune generate recipe # https://github.com/pytorch/torchtune/blob/main/recipes/configs/generation.yaml#L40 TEMP = 0.0 -TOP_K = 35 +TOP_K = 25 class TorchDynamicShardInferenceEngine(InferenceEngine): @@ -67,9 +67,12 @@ async def encode(self, shard: Shard, prompt: str) -> np.ndarray: print(f"prompt: {prompt}") if self.sharded_model is not None: - self.sharded_model.model.reset_caches() + print("CLEARING SHARD AND MODEL") + if self.device == torch.device("cuda"): + torch.cuda.empty_cache() self.sharded_model = None self.shard = None + self.past_tokens = None await self.ensure_shard(shard) @@ -110,11 +113,6 @@ def sample_wrapper(): if DEBUG >= 4: print(f"tokens: {tokens}") - # if tokens.item() == self.tokenizer.eos_token_id: - # if self.device == torch.device("cuda"): - # torch.cuda.empty_cache() - # self.sharded_model = None - # self.shard = None return tokens.numpy(force=True) return await asyncio.get_running_loop().run_in_executor(self.executor, functools.partial(sample_wrapper)) From 646c14a76c6116496ab14edaaa8c29c3527d8139 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 19 Jan 2025 09:32:24 -0900 Subject: [PATCH 545/589] putting in better clearing model functions, adding in clearing after infer prompt for non-primary nodes to manage OOM, changed top_k for testing, adding sliding window for larger than max tokens --- .../torch/sharded_inference_engine.py | 70 +++++++++++++++---- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index d4e11fe25..5c2b7c6f7 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -31,8 +31,8 @@ # from torchtune generate recipe # https://github.com/pytorch/torchtune/blob/main/recipes/configs/generation.yaml#L40 -TEMP = 0.0 -TOP_K = 25 +TEMP = 0.6 +TOP_K = 35 class TorchDynamicShardInferenceEngine(InferenceEngine): @@ -60,6 +60,27 @@ def __init__(self, shard_downloader: HFShardDownloader): else: self.device = torch.device("cpu") + self.rng = torch.Generator(device=self.device) + self.rng.manual_seed(1234) + + def clear_model(self): + """ + Clear out model and shard + A way to avoid OOM as more prompts will just + stack in memory. OOM will be hit eventually for longer prompts. + """ + if self.sharded_model.model.caches_are_enabled(): + self.sharded_model.model.reset_caches() + + del self.sharded_model + self.sharded_model = None + + if self.device == torch.device("cuda"): + torch.cuda.empty_cache() + + self.shard = None + self.past_tokens = None + async def encode(self, shard: Shard, prompt: str) -> np.ndarray: if DEBUG >= 4: print("encode called") @@ -67,12 +88,8 @@ async def encode(self, shard: Shard, prompt: str) -> np.ndarray: print(f"prompt: {prompt}") if self.sharded_model is not None: - print("CLEARING SHARD AND MODEL") - if self.device == torch.device("cuda"): - torch.cuda.empty_cache() - self.sharded_model = None - self.shard = None - self.past_tokens = None + print("CLEARING SHARD AND MODEL - ENCODING") + self.clear_model() await self.ensure_shard(shard) @@ -81,6 +98,11 @@ async def encode(self, shard: Shard, prompt: str) -> np.ndarray: functools.partial(self.tokenizer.encode, prompt, return_tensors="np"), ) + # if going past max, just take from max onward + if len(tokens) > self.sharded_model.max_generated_tokens: + max_gen_tokens = self.sharded_model.max_generated_tokens + tokens = tokens[-max_gen_tokens:] + if DEBUG >= 4: print(f"tokens: {tokens}") @@ -105,11 +127,13 @@ async def sample(self, x: np.ndarray, temp=TEMP, top_k=TOP_K) -> np.ndarray: print(f"x: {x}") print(f"temp: {temp}") print(f"top_k: {top_k}") - + print(self.device) logits = torch.tensor(x).to(self.device) def sample_wrapper(): - tokens = tt_sample(logits, temperature=temp, top_k=top_k) + + q = torch.empty((logits.size(0), self.sharded_model.model.tok_embeddings.num_embeddings), device=logits.device).exponential_(1, generator=self.rng) + tokens = tt_sample(logits.clone(), temperature=temp, top_k=top_k, q=q.to(self.device)) if DEBUG >= 4: print(f"tokens: {tokens}") @@ -117,7 +141,13 @@ def sample_wrapper(): return await asyncio.get_running_loop().run_in_executor(self.executor, functools.partial(sample_wrapper)) - async def infer_tensor(self, request_id: str, shard: Shard, input_data: np.ndarray, inference_state: Optional[dict] = None) -> tuple[np.ndarray, Optional[dict]]: + async def infer_tensor( + self, + request_id: str, + shard: Shard, + input_data: np.ndarray, + inference_state: Optional[dict] = None + ) -> tuple[np.ndarray, Optional[dict]]: await self.ensure_shard(shard) @@ -150,6 +180,10 @@ def infer_wrapper(): print(f"self.past_tokens: {self.past_tokens}") print(f"hidden_state: {hidden_state}") + model_input_pos = self.sharded_model.input_pos + model_masks = self.sharded_model.masks + model_cache = self.sharded_model.model.caches_are_enabled() + curr_inference_state = { "past_tokens": self.past_tokens.numpy(force=True).tolist(), } @@ -160,9 +194,11 @@ def infer_wrapper(): hidden_state=hidden_state, ) else: - if not self.sharded_model.model.caches_are_enabled(): + if not model_cache: model_hs, model_logits = self.sharded_model.generate(tokens=self.past_tokens,) - elif (self.sharded_model.input_pos is None and self.sharded_model.masks is None): + elif (model_input_pos is None and model_masks is None and model_cache): + # this is for nodes that are just getting the hidden state + # to do caching and positioning correctly model_hs, model_logits = self.sharded_model.generate( tokens=input_tensor, past_tokens=self.past_tokens, @@ -176,6 +212,11 @@ def infer_wrapper(): curr_inference_state, ) + # clearing for non-primary nodes at end of processing + if not self.shard.is_first_layer() and self.shard.is_last_layer(): + print("CLEARING MODEL - INFER TENSOR NODE") + self.clear_model() + return ( model_logits[:, -1].numpy(force=True), curr_inference_state, @@ -189,6 +230,7 @@ async def ensure_shard(self, shard: Shard): print(f"shard: {shard}") print(f"class shard: {self.shard}") print(f"uuid: {self.uuid}") + print(f"use cache? {bool(os.getenv("TORCH_USE_CACHE", "True").lower() == "true")}") # reset model after last layer to fix OOM if self.shard == shard: @@ -215,7 +257,7 @@ async def ensure_shard(self, shard: Shard): config=self.model_config, shard=shard, device=self.device, - use_cache=os.environ.get("TORCH_USE_CACHE", True), + use_cache=bool(os.getenv("TORCH_USE_CACHE", "True").lower() == "true"), ), ) From 25bdfb7d64e7b627eb6e935cb901b497d3366203 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 19 Jan 2025 09:34:48 -0900 Subject: [PATCH 546/589] fixing env variable boolean --- exo/inference/torch/models/llm_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index cbd463f73..15f3471b2 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -51,7 +51,8 @@ def load_model_config(model_config_path: Path) -> dict: "hidden_act": base_config.get("hidden_act", "silu") } - if (os.environ.get("TORCH_USE_ORG_SEQ", False) and model_config.get("rope_scaling", None) is not None): + use_org_seq = bool(os.getenv("TORCH_USE_ORG_SEQ", "False").lower() == "true") + if use_org_seq and model_config.get("rope_scaling", None) is not None): model_config["max_seq_len"] = model_config["rope_scaling"]["original_max_position_embeddings"] return model_config From d0680b6b961110d9cb7dfc8811d24dc0d7716427 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 19 Jan 2025 09:36:02 -0900 Subject: [PATCH 547/589] syntax fix --- exo/inference/torch/models/llm_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 15f3471b2..374304680 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -52,7 +52,7 @@ def load_model_config(model_config_path: Path) -> dict: } use_org_seq = bool(os.getenv("TORCH_USE_ORG_SEQ", "False").lower() == "true") - if use_org_seq and model_config.get("rope_scaling", None) is not None): + if use_org_seq and model_config.get("rope_scaling", None) is not None: model_config["max_seq_len"] = model_config["rope_scaling"]["original_max_position_embeddings"] return model_config From 58a190a365a517e5cdba5df611f0caeb5cbabf34 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 19 Jan 2025 09:42:35 -0900 Subject: [PATCH 548/589] remove log message --- exo/inference/torch/sharded_inference_engine.py | 1 - 1 file changed, 1 deletion(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 5c2b7c6f7..9e080204b 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -230,7 +230,6 @@ async def ensure_shard(self, shard: Shard): print(f"shard: {shard}") print(f"class shard: {self.shard}") print(f"uuid: {self.uuid}") - print(f"use cache? {bool(os.getenv("TORCH_USE_CACHE", "True").lower() == "true")}") # reset model after last layer to fix OOM if self.shard == shard: From d7d55909aa8418aee57621c01905970869f49c43 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Sun, 19 Jan 2025 09:49:37 -0900 Subject: [PATCH 549/589] removing clearing model on non-primary nodes --- exo/inference/torch/sharded_inference_engine.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 9e080204b..de5c0b8e4 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -137,6 +137,11 @@ def sample_wrapper(): if DEBUG >= 4: print(f"tokens: {tokens}") + # clearing for non-primary nodes at end of processing + # if not self.shard.is_first_layer() and self.shard.is_last_layer(): + # print("CLEARING MODEL - INFER TENSOR NODE") + # self.clear_model() + return tokens.numpy(force=True) return await asyncio.get_running_loop().run_in_executor(self.executor, functools.partial(sample_wrapper)) @@ -212,11 +217,6 @@ def infer_wrapper(): curr_inference_state, ) - # clearing for non-primary nodes at end of processing - if not self.shard.is_first_layer() and self.shard.is_last_layer(): - print("CLEARING MODEL - INFER TENSOR NODE") - self.clear_model() - return ( model_logits[:, -1].numpy(force=True), curr_inference_state, From ddaab5a3cb6734d94101ed2675d4b2cc4b1099b2 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Wed, 22 Jan 2025 16:09:07 -0900 Subject: [PATCH 550/589] changing out tensor loading, separating out initial mask and input_pos creation outside of shardedllamamodel generate, working on rebuilding to place in inference engine, successful prompt after implmenting torchtune hf_to_tune safetensor loading --- exo/inference/torch/models/llama3.py | 160 +++++------------- exo/inference/torch/models/llm_utils.py | 50 +++++- .../torch/sharded_inference_engine.py | 6 +- exo/inference/torch/tests/test_llama3_full.py | 149 ++++++++++++++-- 4 files changed, 233 insertions(+), 132 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 72ff5ccfb..a8f25ad83 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -136,6 +136,7 @@ def forward( mask: Optional[_MaskType] = None, input_pos: Optional[torch.Tensor] = None, hidden_state: Optional[torch.Tensor] = None, + dtype: torch.dtype = torch.float16 ) -> Union[torch.Tensor, List[torch.Tensor]]: # Determine the type of input and shape if DEBUG >= 4: @@ -153,51 +154,39 @@ def forward( else: seq_len = tokens.shape[1] - h = self.tok_embeddings(tokens) # Apply token tok_embeddings - self._validate_inputs( seq_len, mask=mask, input_pos=input_pos, ) + fl_tokens = tokens.clone() + h = self.tok_embeddings(fl_tokens).to(dtype=dtype) # Apply token tok_embeddings + # Initialize a list to capture hidden states if requested # for captured hidden states hidden = [] - - for i in range(self.shard.start_layer, self.shard.end_layer + 1): - layer = self.layers[i] - + curr_layers = [self.layers[i] for i in range(self.shard.start_layer, self.shard.end_layer + 1)] + for i, layer in enumerate(curr_layers): if DEBUG >= 8: print(f"\nhidden layer in H[{i}]\n{h}") print(f"\nmask\n{mask}\ninput_pos\n{input_pos}") print(f"\noutput_hidden_states\n{self.output_hidden_states}\n") + if i in self.output_hidden_states: + hidden.append(h) + # Process through each transformer layer # with torch.no_grad(): - if layer.caches_are_enabled(): - self.check_maxed_cache(tokens=h) - try: - h = layer( - h, - mask=mask, - input_pos=input_pos, - ) - except AssertionError: - # assume due to cache - self.reset_caches() - - h = layer( - h, - mask=mask, - input_pos=input_pos, - ) + h = layer( + h, + mask=mask, + input_pos=input_pos, + ) - else: - h = layer(h) - if i in self.output_hidden_states: - hidden.append(h) + # if i in self.output_hidden_states: + # hidden.append(h) if DEBUG >= 8: print(f"\nhidden layer out H[{i}]->H[{i + 1}]\n{h}\n") @@ -224,16 +213,13 @@ def LlamaModel(config: dict, shard: Shard): """ LlamaModel using torchtune """ - # rope scaling config - scale_factor = 32 - if config["rope_scaling"] is not None: - scale_factor = config["rope_scaling"].get("factor", 32) + # rope scaling config rope = Llama3ScaledRoPE( dim=config["head_dim"], max_seq_len=config["max_seq_len"], base=config["rope_base"], - scale_factor=scale_factor, + scale_factor=config["rope_scaling_factor"], ) # hack to align sharded weights with layers @@ -287,10 +273,10 @@ def LlamaModel(config: dict, shard: Shard): layers[i] = layer layers = nn.ModuleList(layers) - tok_embeddings = nn.Embedding(config["vocab_size"], config["embed_dim"]) if len(re.findall(r"3\.2", shard.model_id)) > 0: print("Using TiedLinear") + tok_embeddings = nn.Embedding(config["vocab_size"], config["embed_dim"]) output_proj = ttm.TiedLinear(tok_embeddings) else: output_proj = nn.Linear(config["embed_dim"], config["vocab_size"], bias=False) @@ -316,6 +302,7 @@ def __init__( config: dict, shard: Shard, device: Optional[torch.device] = None, + dtype: torch.dtype = torch.float16, use_cache: Optional[bool] = False, max_generated_tokens: int = 1024, ): @@ -323,7 +310,7 @@ def __init__( self.shard = shard self.config = config - self.dtype = torch.float16 + self.dtype = dtype self.device = device if device is not None else torch.device("cpu") self.max_seq_len = self.config["max_seq_len"] self.use_cache = use_cache @@ -339,17 +326,16 @@ def __init__( # keep track of current position in generation self.max_generated_tokens = max_generated_tokens - self.curr_pos = 0 - self.masks = None - self.curr_masks = None self.input_pos = None - self.curr_input_pos = None + self.mask = None def generate( self, tokens: Optional[torch.Tensor] = None, + mask: Optional[torch.Tensor] = None, + input_pos: Optional[torch.Tensor] = None, hidden_state: Optional[torch.Tensor] = None, - past_tokens: Optional[torch.Tensor] = None, + curr_pos: Optional[int] = 0 ) -> Tuple[ Optional[torch.Tensor], torch.Tensor, @@ -364,79 +350,28 @@ def generate( if DEBUG >= 4: print("generate called") print(f"tokens: {tokens}") + if mask is not None: + print(f"mask: {mask}") + print(f"input_pos: {input_pos}") + else: + print(f"self.mask: {self.mask}") + print(f"self.input_pos: {self.input_pos}") print(f"hidden_state: {hidden_state}") - print(f"curr_pos: {self.curr_pos}") + print(f"curr_pos: {curr_pos}") print(f"cached? {self.model.caches_are_enabled()}") model_hs = None model_logits = None - bsz, tokens_length = tokens.size() - - if tokens_length > 1 or (self.input_pos is None and self.masks is None): - - if tokens_length == 1: - tokens = past_tokens - - tokens = tokens.view(1, -1).to(device=self.device) if tokens.ndim == 1 else tokens - - self.curr_pos = tokens_length - - # using self.max_seq_len will take up alot of VRAM - total_response_length = tokens_length + self.max_generated_tokens - - # setup cache - if not self.model.caches_are_enabled() and self.use_cache: - with self.device: - self.model.setup_caches( - bsz, - self.dtype, - decoder_max_seq_len=tokens.numel() + self.max_generated_tokens, - ) - - if not self.shard.is_last_layer(): - self.model.output_hidden_states = [self.shard.end_layer] - - if not self.model.caches_are_enabled(): - max_seq_len = total_response_length - else: - max_seq_len = self.model.decoder_max_cache_seq_len - - # masking for proper attention - padding_masks = tokens != self.pad_id - if not padding_masks.all(): - padding_masks = torch.nn.functional.pad( - padding_masks, - (0, self.max_generated_tokens), - value=True, - ) - - self.masks = ttg.get_causal_mask_from_padding_mask(padding_masks, target_seq_len=max_seq_len) - - self.input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) - else: - self.masks = torch.tril(torch.ones( - total_response_length, - max_seq_len, - dtype=torch.bool, - device=self.device, - )).unsqueeze(0) - - self.input_pos = torch.arange(0, total_response_length, device=self.device).unsqueeze(0) - - if self.model.caches_are_enabled(): - self.curr_masks = self.masks[:, :tokens_length] - else: - self.curr_masks = self.masks[:, :tokens_length, :tokens_length] - - self.curr_input_pos = self.input_pos[:, :tokens_length].squeeze() - else: + self.model.output_hidden_states = [self.shard.end_layer] + + if curr_pos > 0: if self.model.caches_are_enabled(): - self.curr_input_pos = self.input_pos[:, self.curr_pos].contiguous() - self.curr_masks = self.masks[:, self.curr_pos, None, :].contiguous() + input_pos = input_pos[:, curr_pos].contiguous() + mask = mask[:, curr_pos, None, :].contiguous() else: - self.curr_input_pos = self.input_pos[:, :self.curr_pos + 1] - self.curr_masks = self.masks[:, :self.curr_pos + 1, :self.curr_pos + 1] + input_pos = input_pos[:, :curr_pos + 1] + mask = mask[:, :curr_pos + 1, :curr_pos + 1] if DEBUG >= 4: print("model_input") @@ -444,24 +379,23 @@ def generate( print(f"tokens: {tokens}") if hidden_state is not None: print(f"hidden_state: {hidden_state}") - print(f"mask: {self.curr_masks}") - print(f"input_pos: {self.curr_input_pos}") + print(f"self.mask: {self.mask}") + print(f"self.input_pos: {self.input_pos}") model_output = self.model( tokens=tokens, - mask=self.curr_masks, - input_pos=self.curr_input_pos, + mask=mask, + input_pos=input_pos, hidden_state=hidden_state, + dtype=self.dtype ) - self.curr_pos += 1 - - if DEBUG >= 4: - print(f"model_output\n{model_output}") - if self.shard.is_last_layer(): model_logits = model_output else: model_hs = model_output + if DEBUG >= 4: + print(f"model_hs\n{model_hs}\nmodel_logits\n{model_logits}") + return model_hs, model_logits diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 374304680..993eb4b96 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -5,17 +5,26 @@ import re import json from pathlib import Path -from typing import Any +from typing import Any, Dict import torch import torch.nn as nn from torchtune.modules import FeedForward +from torchtune.models.convert_weights import hf_to_tune from safetensors.torch import load_file as load_safetensors from exo.helpers import DEBUG from exo.inference.shard import Shard +# dtype string to dtype from huggingface type config.json +HF_PRECISION_STR_TO_DTYPE: Dict[str, torch.dtype] = { + "float16": torch.float16, + "bfloat16": torch.bfloat16, + "float32": torch.float32, + "float64": torch.float64, +} + def load_model_config(model_config_path: Path) -> dict: """ @@ -48,13 +57,22 @@ def load_model_config(model_config_path: Path) -> dict: "vocab_size": base_config["vocab_size"], "num_layers": base_config["num_hidden_layers"], "attn_bias": base_config.get("attention_bias", False), - "hidden_act": base_config.get("hidden_act", "silu") + "hidden_act": base_config.get("hidden_act", "silu"), + "torch_dtype": HF_PRECISION_STR_TO_DTYPE.get( + base_config.get("torch_dtype", "float16"), + torch.float16 + ) } + if model_config.get("rope_scaling", None) is not None: + model_config["rope_scaling_factor"] = model_config["rope_scaling"].get("rope_factor", 32) + use_org_seq = bool(os.getenv("TORCH_USE_ORG_SEQ", "False").lower() == "true") if use_org_seq and model_config.get("rope_scaling", None) is not None: model_config["max_seq_len"] = model_config["rope_scaling"]["original_max_position_embeddings"] + + return model_config @@ -75,6 +93,34 @@ def check_weights(model, state_dict): if name not in model_state_dict: print(f"Unexpected weight {name} found in state_dict") +def load_weights_torch(cache_dir: Path, model: Any, config: Dict): + # Load weights from safetensors files in the cache directory + safetensors_files = list(cache_dir.glob("*.safetensors")) + if not safetensors_files: + raise FileNotFoundError("No safetensors files found in the cache directory.") + + # Load weights from each found safetensors file + full_state_dict = {} + for safetensor_file in safetensors_files: + state_dict = load_safetensors(safetensor_file) + + if full_state_dict is not None: + full_state_dict = full_state_dict | state_dict + else: + full_state_dict = state_dict + + converted_sd = hf_to_tune( + state_dict=full_state_dict, + num_heads=config["num_heads"], + num_kv_heads=config["num_kv_heads"], + dim=config["embed_dim"], + head_dim=config["head_dim"] + ) + + model.load_state_dict(converted_sd, strict=True) + + print("\n--- checking weights ----\n") + check_weights(model, converted_sd) def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): """ diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index de5c0b8e4..edcb757df 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -128,11 +128,13 @@ async def sample(self, x: np.ndarray, temp=TEMP, top_k=TOP_K) -> np.ndarray: print(f"temp: {temp}") print(f"top_k: {top_k}") print(self.device) + logits = torch.tensor(x).to(self.device) - def sample_wrapper(): + q = torch.empty((logits.size(0), self.sharded_model.model.tok_embeddings.num_embeddings), device=logits.device).exponential_(1, generator=self.rng) - q = torch.empty((logits.size(0), self.sharded_model.model.tok_embeddings.num_embeddings), device=logits.device).exponential_(1, generator=self.rng) + + def sample_wrapper(): tokens = tt_sample(logits.clone(), temperature=temp, top_k=top_k, q=q.to(self.device)) if DEBUG >= 4: print(f"tokens: {tokens}") diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index 703b87861..e3fa4e7f2 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -16,16 +16,20 @@ from exo.inference.torch.models.llama3 import ShardedLlamaModel from exo.inference.shard import Shard -from exo.inference.torch.models.llm_utils import (load_model_config, load_model_weights_torchtune, check_weights) - -# MODEL_NAME = "unsloth/Llama-3.2-3B-Instruct" -MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" -TEMP = 0.0 +from exo.inference.torch.models.llm_utils import ( + load_model_config, + load_weights_torch, +) + +MODEL_NAME = "unsloth/Llama-3.2-1B-Instruct" +# MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" +TEMP = 0.85 TOP_K = 35 -MAX_NEW_TOKENS = 100 +MAX_NEW_TOKENS = 200 +RAND_SEED = 42 -def main(model, prompt: str, device: torch.device = torch.device("cpu")): +def main(model, prompt: str, device: torch.device = torch.device("cpu"), dtype: torch.dtype = torch.bfloat16): messages = [{ "role": "assistant", "content": "", @@ -36,7 +40,8 @@ def main(model, prompt: str, device: torch.device = torch.device("cpu")): text = llama_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) tok_out = llama_tokenizer([text], return_tensors="pt") - tokens = tok_out.input_ids.to(device=device) + print(f"tok_out: {tok_out}") + tokens = tok_out.input_ids.to(device=device, dtype=torch.int) # messages = [] # messages.extend([ @@ -48,35 +53,134 @@ def main(model, prompt: str, device: torch.device = torch.device("cpu")): # tokenizer_out = llama_tokenizer({"messages": messages}, inference=True) # tokens = torch.tensor([tokenizer_out["tokens"]], dtype=torch.int, device=device) + + rng = torch.Generator(device=device) + rng.manual_seed(RAND_SEED) + generated_tokens = tokens.clone() print(f"tokens: {tokens}") - _, logits = model.generate(tokens=tokens) + bsz, tokens_length = tokens.size() + + # using self.max_seq_len will take up alot of VRAM + total_response_length = tokens_length + MAX_NEW_TOKENS + + # setup cache + if not model.model.caches_are_enabled(): + with device: + model.model.setup_caches( + bsz, + dtype, + decoder_max_seq_len=total_response_length + ) + + if not model.model.caches_are_enabled(): + max_seq_len = total_response_length + else: + max_seq_len = model.model.decoder_max_cache_seq_len + + # masking for proper attention + + # select correct pad_id + if hasattr(llama_tokenizer, "pad_id"): + pad_id = llama_tokenizer.pad_id + elif hasattr(llama_tokenizer, "pad_token_id"): + print(f"pad_token_id: {llama_tokenizer.pad_token_id}") + if llama_tokenizer.pad_token_id is not None: + pad_id = llama_tokenizer.pad_token_id + else: + pad_id = 0 + else: + pad_id = 0 + + print(f"pad_id: {pad_id}") + + padding_masks = tokens != pad_id + if not padding_masks.all(): + padding_masks = torch.nn.functional.pad( + padding_masks, + (0, MAX_NEW_TOKENS), + value=True, + ) + + mask = ttg.get_causal_mask_from_padding_mask(padding_masks, target_seq_len=max_seq_len) + + input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) + else: + mask = torch.tril(torch.ones( + total_response_length, + max_seq_len, + dtype=torch.bool, + device=device, + )).unsqueeze(0) + + input_pos = torch.arange(0, total_response_length, device=device).unsqueeze(0) + + print(f"init mask: {mask}") + print(f"init input_pos: {input_pos}") + + if model.model.caches_are_enabled(): + curr_mask = mask[:, :tokens_length] + else: + curr_mask = mask[:, :tokens_length, :tokens_length] + + curr_pos = 0 + + _, logits = model.generate( + tokens=tokens, + mask=curr_mask, + input_pos=input_pos[:, :tokens_length].squeeze(), + curr_pos=curr_pos + ) + + curr_pos = tokens_length + q = torch.empty(( + logits.size(0), + model.model.tok_embeddings.num_embeddings + ), device=logits.device).exponential_(1, generator=rng) + tokens = ttg.sample( logits=logits[:, -1].clone(), temperature=TEMP, top_k=TOP_K, + q=q ) print(f"tokens: {tokens}") for i in range(MAX_NEW_TOKENS - 1): - print(f"gen #{i}") + print(f"gen #{i+1}") if tokens.item() == llama_tokenizer.eos_token_id: - # if tokens.item() in llama_tokenizer.stop_tokens: + # if tokens.item() in llama_tokenizer.stop_tokens: print("stop token hit!") break - _, logits = model.generate(tokens=tokens) + tokens = tokens.view(1, -1).to(device=device) if tokens.ndim == 1 else tokens + + _, logits = model.generate( + tokens=tokens, + input_pos=input_pos, + mask=mask, + curr_pos=curr_pos + ) + + curr_pos += 1 + + q = torch.empty( + ( + logits.size(0), + model.model.tok_embeddings.num_embeddings + ), device=logits.device).exponential_(1, generator=rng) tokens = ttg.sample( logits=logits[:, -1].clone(), temperature=TEMP, top_k=TOP_K, + q=q, ) print(f"tokens: {tokens}") @@ -104,6 +208,18 @@ def normal_full(model, user_prompt: str, device: torch.device = torch.device("cp prompt = torch.tensor(tokenizer_out["tokens"], dtype=torch.int, device=device) print(f"tokens prompt: {prompt}") print(f"pad_id: {llama_tokenizer.pad_id}") + # messages = [{ + # "role": "assistant", + # "content": "", + # }, { + # "role": "user", + # "content": prompt, + # }] + + # text = llama_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + # tok_out = llama_tokenizer([text], return_tensors="pt") + # prompt_tok = tok_out.input_ids.to(device=device) + # print(f"tokens prompt: {prompt_tok}") generated_tokens, _ = ttg.generate( model=model.model, @@ -123,6 +239,7 @@ def normal_full(model, user_prompt: str, device: torch.device = torch.device("cp if __name__ == "__main__": + # prompt = "Hello, how are you?" prompt = "Tell me a joke." # prompt = "What is the meaning of exo?" # prompt = "Tell me a short 4 line haiku" @@ -153,18 +270,20 @@ def normal_full(model, user_prompt: str, device: torch.device = torch.device("cp # Initialize LlamaModel with config and tokenizer device = torch.device("cuda") + dtype = torch.bfloat16 # device = torch.device("cpu") shard_model_1 = ShardedLlamaModel( config=config, shard=shard_1, device=device, + dtype=config["torch_dtype"], use_cache=True, max_generated_tokens=MAX_NEW_TOKENS, ) print(f"\nshard_model_1: {shard_model_1}") - load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) - - main(shard_model_1, prompt, device) + # load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) + load_weights_torch(cache_dir, shard_model_1.model, config) + main(shard_model_1, prompt, device, config["torch_dtype"]) # normal_full(shard_model_1, prompt, device) From 216ee1b38735b8e3bfb2cdad6e449c42af6c8d8c Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 01:06:36 -0900 Subject: [PATCH 551/589] still moving and upgrading token generation, broken --- exo/inference/torch/models/llm_utils.py | 2 + .../torch/sharded_inference_engine.py | 180 ++++++++++++++---- 2 files changed, 143 insertions(+), 39 deletions(-) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 993eb4b96..fa8b49392 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -175,6 +175,8 @@ def load_model_weights_torchtune(cache_dir: Path, shard: Shard, model: Any): if re_attn[0][1] == "o_proj": new_key = f"model.layers.{layer_num}.attn.output_proj.weight" remapped_state_dict[new_key] = value + # add in permute for q and k proj + # see https://github.com/pytorch/torchtune/blob/main/torchtune/models/convert_weights.py#L199 else: new_key = f"model.layers.{layer_num}.attn.{re_attn[0][1]}.{re_attn[0][2]}" remapped_state_dict[new_key] = value diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index edcb757df..9ec4b2fc2 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -13,7 +13,7 @@ import numpy as np import torch -from torchtune.generation import sample as tt_sample +import torchtune.generation as ttg from transformers import AutoTokenizer from exo.inference.inference_engine import InferenceEngine @@ -23,7 +23,7 @@ from exo.helpers import DEBUG from exo.inference.torch.models.llm_utils import ( load_model_config, - load_model_weights_torchtune, + load_weights_torch, ) # supported models @@ -34,6 +34,40 @@ TEMP = 0.6 TOP_K = 35 +class InferenceState: + def __init__( + self, + tokens: Optional[torch.tensor], + input_pos: Optional[torch.tensor], + mask: Optional[torch.tensor], + curr_pos: int=0 + ): + self.tokens = tokens + self.input_pos = input_pos + self.mask = mask + self.curr_pos = curr_pos + + def from_dict(self, state_dict): + self.tokens = state_dict.tokens + self.input_pos = state_dict.input_pos + self.mask = state_dict.mask + self.curr_pos = state_dict.curr_pos + + def __dict__(self) -> dict: + return { + "tokens": self.tokens.numpy(force=True).tolist(), + "input_post": self.input_pos.numpy(force=True).tolist(), + "mask": self.mask.numpy(force=True).tolist(), + "curr_pos": self.curr_pos + } + + def __str__(self) -> str: + return f""" + tokens: {self.tokens} + input_pos: {self.input_pos} + mask: {self.mask} + curr_pos: {self.curr_pos} + """ class TorchDynamicShardInferenceEngine(InferenceEngine): """ @@ -45,11 +79,14 @@ def __init__(self, shard_downloader: HFShardDownloader): self.sharded_model = None self.request_id = None self.executor = ThreadPoolExecutor(max_workers=1) - self.past_tokens = None + self.uuid = str(uuid.uuid4()) self.model_path = None self.model_config = None + # current inference engine state + self.state = InferenceState() + # device settings if os.environ.get("TORCH_DEVICE"): self.device = torch.device(os.environ["TORCH_DEVICE"]) @@ -93,20 +130,86 @@ async def encode(self, shard: Shard, prompt: str) -> np.ndarray: await self.ensure_shard(shard) - tokens = await asyncio.get_event_loop().run_in_executor( - self.executor, - functools.partial(self.tokenizer.encode, prompt, return_tensors="np"), - ) + # tokens = await asyncio.get_event_loop().run_in_executor( + # self.executor, + # functools.partial(self.tokenizer.encode, prompt, return_tensors="np"), + # ) + + def encode_wrapper() -> np.ndarry: + """ + Encode the tensors from prompt along with the + initial input_pos and mask + """ + tokens = self.tokenizer.encode(prompt, return_tensors="np") + + if DEBUG >= 4: + print("encoded_wrapper called") + print(f"tokens: {tokens}") - # if going past max, just take from max onward - if len(tokens) > self.sharded_model.max_generated_tokens: - max_gen_tokens = self.sharded_model.max_generated_tokens - tokens = tokens[-max_gen_tokens:] + # if going past max, just take from max onward + if len(tokens) > self.sharded_model.max_generated_tokens: + max_gen_tokens = self.sharded_model.max_generated_tokens + tokens = tokens[-max_gen_tokens:] - if DEBUG >= 4: - print(f"tokens: {tokens}") + self.past_tokens = tokens - return tokens + bsz, tklng = tokens.size() + total_response_length = tklng + self.sharded_model.max_generated_tokens + + # setup cache + if not self.sharded_model.model.caches_are_enabled(): + with self.device: + self.sharded_model.model.setup_caches( + bsz, + self.model_config["torch_dtype"], + decoder_max_seq_len=total_response_length + ) + + # setup max sequence length + if not self.sharded_model.model.caches_are_enabled(): + max_seq_len = total_response_length + else: + max_seq_len = self.sharded_model.model.decoder_max_cache_seq_len + + # set pad_id + if hasattr(self.tokenizer, "pad_id"): + pad_id = self.tokenizer.pad_id + elif hasattr(self.tokenizer, "pad_token_id"): + print(f"pad_token_id: {self.tokenizer.pad_token_id}") + if self.tokenizer.pad_token_id is not None: + pad_id = self.tokenizer.pad_token_id + else: + pad_id = 0 + else: + pad_id = 0 + + padding_masks = tokens != pad_id + if not padding_masks.all(): + padding_masks = torch.nn.functional.pad( + padding_masks, + (0, self.sharded_model.max_generated_tokens), + value=True, + ) + + self.state.mask = ttg.get_causal_mask_from_padding_mask(padding_masks, target_seq_len=max_seq_len) + + self.state.input_pos = ttg.get_position_ids_from_padding_mask(padding_masks) + else: + self.state.mask = torch.tril(torch.ones( + total_response_length, + max_seq_len, + dtype=torch.bool, + device=self.device, + )).unsqueeze(0) + + self.state.input_pos = torch.arange(0, total_response_length, device=self.device).unsqueeze(0) + + return tokens + + return await asyncio.get_running_loop().run_in_executor( + self.executor, + functools.partial(encode_wrapper), + ) async def decode(self, shard: Shard, tokens: np.ndarray) -> str: if DEBUG >= 4: @@ -135,7 +238,7 @@ async def sample(self, x: np.ndarray, temp=TEMP, top_k=TOP_K) -> np.ndarray: def sample_wrapper(): - tokens = tt_sample(logits.clone(), temperature=temp, top_k=top_k, q=q.to(self.device)) + tokens = ttg.sample(logits.clone(), temperature=temp, top_k=top_k, q=q.to(self.device)) if DEBUG >= 4: print(f"tokens: {tokens}") @@ -165,8 +268,9 @@ async def infer_tensor( print(f"input_data: {input_data}") print(f"inference_state: {inference_state}") - if inference_state.get("past_tokens") is not None: - self.past_tokens = torch.tensor(inference_state["past_tokens"]).to(self.device) + if inference_state.get("tokens") is not None: + self.state.from_dict(inference_state) + self.state.tokens = torch.tensor(self.state.tokens).to(self.device) self.request_id = request_id if not self.request_id else self.request_id @@ -176,42 +280,43 @@ async def infer_tensor( elif input_data.ndim == 2: input_tensor = torch.tensor(input_data).to(self.device) - if self.past_tokens is not None: - self.past_tokens = torch.cat([self.past_tokens, input_tensor], dim=-1).to(self.device) + if self.state.tokens is not None: + self.state.tokens = torch.cat([self.state.tokens, input_tensor], dim=-1).to(self.device) else: - self.past_tokens = input_tensor.clone() + self.state.tokens = input_tensor.clone() def infer_wrapper(): if DEBUG >= 4: print("infer_wrapper called") - print(f"self.past_tokens: {self.past_tokens}") + print(f"self.state.tokens: {self.state.tokens}") print(f"hidden_state: {hidden_state}") - model_input_pos = self.sharded_model.input_pos - model_masks = self.sharded_model.masks model_cache = self.sharded_model.model.caches_are_enabled() - curr_inference_state = { - "past_tokens": self.past_tokens.numpy(force=True).tolist(), - } + if self.state.curr_pos == 0: + # initial run + self.state.curr_pos = self.state.tokens.size(-1) if hidden_state is not None: model_hs, model_logits = self.sharded_model.generate( tokens=self.past_tokens, hidden_state=hidden_state, + input_pos=self.state.input_pos, + mask=self.state.mask ) else: if not model_cache: - model_hs, model_logits = self.sharded_model.generate(tokens=self.past_tokens,) - elif (model_input_pos is None and model_masks is None and model_cache): - # this is for nodes that are just getting the hidden state - # to do caching and positioning correctly model_hs, model_logits = self.sharded_model.generate( - tokens=input_tensor, - past_tokens=self.past_tokens, + tokens=self.past_tokens, + input_pos=self.state.input_pos, + mask=self.state.mask ) else: - model_hs, model_logits = self.sharded_model.generate(tokens=input_tensor) + model_hs, model_logits = self.sharded_model.generate( + tokens=input_tensor, + input_pos=self.state.input_pos, + mask=self.state.mask + ) if model_hs is not None: return ( @@ -246,11 +351,7 @@ async def ensure_shard(self, shard: Shard): # self.tokenizer = await _resolve_tokenizer(model_path) self.tokenizer = await _resolve_tokenizer(self.model_path) - eot_token = ( - self.tokenizer.special_tokens_map.get("eos_token_id") - if hasattr(self.tokenizer, "_tokenizer") and isinstance(self.tokenizer._tokenizer, AutoTokenizer) else getattr(self.tokenizer, "eos_token_id", None) - ) - + self.sharded_model = await asyncio.get_running_loop().run_in_executor( self.executor, functools.partial( @@ -258,6 +359,7 @@ async def ensure_shard(self, shard: Shard): config=self.model_config, shard=shard, device=self.device, + dtype=self.model_config["torch_dtype"], use_cache=bool(os.getenv("TORCH_USE_CACHE", "True").lower() == "true"), ), ) @@ -265,7 +367,7 @@ async def ensure_shard(self, shard: Shard): # load sharded weights await asyncio.get_running_loop().run_in_executor( self.executor, - functools.partial(load_model_weights_torchtune, self.model_path, shard, self.sharded_model), + functools.partial(load_weights_torch, self.model_path, self.sharded_model, self.model_config), ) async def load_checkpoint(self, shard: Shard, path: str): From 106e56e436bd8d64dd7b49300c4bbf24939b0e05 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 10:46:34 -0900 Subject: [PATCH 552/589] updated inference engine, added InferenceState class, added in tweaks for memory manipulation but still running into OOM --- exo/inference/torch/models/llama3.py | 28 ++-- exo/inference/torch/models/llm_utils.py | 43 +++++- .../torch/sharded_inference_engine.py | 140 ++++++++---------- 3 files changed, 116 insertions(+), 95 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index a8f25ad83..eaf6734b9 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -326,8 +326,6 @@ def __init__( # keep track of current position in generation self.max_generated_tokens = max_generated_tokens - self.input_pos = None - self.mask = None def generate( self, @@ -351,11 +349,8 @@ def generate( print("generate called") print(f"tokens: {tokens}") if mask is not None: - print(f"mask: {mask}") - print(f"input_pos: {input_pos}") - else: - print(f"self.mask: {self.mask}") - print(f"self.input_pos: {self.input_pos}") + print(f"mask: {mask.size()}") + print(f"input_pos: {input_pos.size()}") print(f"hidden_state: {hidden_state}") print(f"curr_pos: {curr_pos}") print(f"cached? {self.model.caches_are_enabled()}") @@ -364,7 +359,7 @@ def generate( model_logits = None self.model.output_hidden_states = [self.shard.end_layer] - + if curr_pos > 0: if self.model.caches_are_enabled(): input_pos = input_pos[:, curr_pos].contiguous() @@ -372,6 +367,15 @@ def generate( else: input_pos = input_pos[:, :curr_pos + 1] mask = mask[:, :curr_pos + 1, :curr_pos + 1] + else: + _, tklng = tokens.size() + + if self.model.caches_are_enabled(): + mask = mask[:, :tklng] + else: + mask = mask[:, :tklng, :tklng] + + input_pos = input_pos[:, :tklng].squeeze() if DEBUG >= 4: print("model_input") @@ -379,13 +383,13 @@ def generate( print(f"tokens: {tokens}") if hidden_state is not None: print(f"hidden_state: {hidden_state}") - print(f"self.mask: {self.mask}") - print(f"self.input_pos: {self.input_pos}") + print(f"mask: {mask}") + print(f"input_pos: {input_pos}") model_output = self.model( tokens=tokens, - mask=mask, - input_pos=input_pos, + mask=mask.to(self.device), + input_pos=input_pos.to(self.device), hidden_state=hidden_state, dtype=self.dtype ) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index fa8b49392..c44944739 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -5,7 +5,7 @@ import re import json from pathlib import Path -from typing import Any, Dict +from typing import Any, Dict, Optional import torch import torch.nn as nn @@ -288,3 +288,44 @@ def llama3_mlp(dim: int, hidden_dim: int) -> FeedForward: up_proj = nn.Linear(dim, hidden_dim, bias=False) return FeedForward(gate_proj=gate_proj, down_proj=down_proj, up_proj=up_proj) + + +class InferenceState: + def __init__( + self, + tokens: Optional[torch.tensor] = None, + input_pos: Optional[torch.tensor] = None, + mask: Optional[torch.tensor] = None, + curr_pos: int = 0, + device: torch.device = torch.device("cpu") + ): + self.tokens = tokens + self.input_pos = input_pos + self.mask = mask + self.curr_pos = curr_pos + self.device = device + + def from_dict(self, state_dict): + """ + input_pos and mask are put on CPU until used + """ + self.tokens = torch.tensor(state_dict["tokens"]).to(self.device) + self.input_pos = torch.tensor(state_dict["input_pos"]).to("cpu") + self.mask = torch.tensor(state_dict["mask"]).to("cpu") + self.curr_pos = state_dict["curr_pos"] + + def to_dict(self) -> dict: + return { + "tokens": self.tokens.numpy(force=True).tolist(), + "input_pos": self.input_pos.numpy(force=True).tolist(), + "mask": self.mask.numpy(force=True).tolist(), + "curr_pos": self.curr_pos + } + + def __str__(self) -> str: + return f""" + tokens: {self.tokens} + input_pos: {self.input_pos} + mask: {self.mask} + curr_pos: {self.curr_pos} + """ \ No newline at end of file diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 9ec4b2fc2..16fb7271e 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -24,6 +24,7 @@ from exo.inference.torch.models.llm_utils import ( load_model_config, load_weights_torch, + InferenceState ) # supported models @@ -34,40 +35,8 @@ TEMP = 0.6 TOP_K = 35 -class InferenceState: - def __init__( - self, - tokens: Optional[torch.tensor], - input_pos: Optional[torch.tensor], - mask: Optional[torch.tensor], - curr_pos: int=0 - ): - self.tokens = tokens - self.input_pos = input_pos - self.mask = mask - self.curr_pos = curr_pos - - def from_dict(self, state_dict): - self.tokens = state_dict.tokens - self.input_pos = state_dict.input_pos - self.mask = state_dict.mask - self.curr_pos = state_dict.curr_pos - - def __dict__(self) -> dict: - return { - "tokens": self.tokens.numpy(force=True).tolist(), - "input_post": self.input_pos.numpy(force=True).tolist(), - "mask": self.mask.numpy(force=True).tolist(), - "curr_pos": self.curr_pos - } - - def __str__(self) -> str: - return f""" - tokens: {self.tokens} - input_pos: {self.input_pos} - mask: {self.mask} - curr_pos: {self.curr_pos} - """ +# max retries for infer on OOM errors +OOM_MAX_RETRY = 10 class TorchDynamicShardInferenceEngine(InferenceEngine): """ @@ -79,13 +48,11 @@ def __init__(self, shard_downloader: HFShardDownloader): self.sharded_model = None self.request_id = None self.executor = ThreadPoolExecutor(max_workers=1) - self.uuid = str(uuid.uuid4()) self.model_path = None self.model_config = None - - # current inference engine state - self.state = InferenceState() + self.state = None + self.oom_max_cnt = 0 # device settings if os.environ.get("TORCH_DEVICE"): @@ -97,14 +64,20 @@ def __init__(self, shard_downloader: HFShardDownloader): else: self.device = torch.device("cpu") + # rng setup for sampling self.rng = torch.Generator(device=self.device) self.rng.manual_seed(1234) def clear_model(self): """ Clear out model and shard - A way to avoid OOM as more prompts will just - stack in memory. OOM will be hit eventually for longer prompts. + A way to avoid OOM issues + + All prompts are stored in VRAM + while inference engine is up and using the same + model class instance, this will clear it for each prompt. + + OOM issue might occur in longer chats/contexts depending on your machine. """ if self.sharded_model.model.caches_are_enabled(): self.sharded_model.model.reset_caches() @@ -130,17 +103,19 @@ async def encode(self, shard: Shard, prompt: str) -> np.ndarray: await self.ensure_shard(shard) - # tokens = await asyncio.get_event_loop().run_in_executor( - # self.executor, - # functools.partial(self.tokenizer.encode, prompt, return_tensors="np"), - # ) - - def encode_wrapper() -> np.ndarry: + def encode_wrapper() -> np.ndarray: """ Encode the tensors from prompt along with the initial input_pos and mask """ - tokens = self.tokenizer.encode(prompt, return_tensors="np") + tokens = self.tokenizer.encode( + prompt, + return_tensors="pt" + ) + + # move to proper device, default is CPU + if tokens.device != self.device: + tokens = tokens.to(device=self.device) if DEBUG >= 4: print("encoded_wrapper called") @@ -151,7 +126,7 @@ def encode_wrapper() -> np.ndarry: max_gen_tokens = self.sharded_model.max_generated_tokens tokens = tokens[-max_gen_tokens:] - self.past_tokens = tokens + self.state.tokens = tokens bsz, tklng = tokens.size() total_response_length = tklng + self.sharded_model.max_generated_tokens @@ -234,19 +209,14 @@ async def sample(self, x: np.ndarray, temp=TEMP, top_k=TOP_K) -> np.ndarray: logits = torch.tensor(x).to(self.device) - q = torch.empty((logits.size(0), self.sharded_model.model.tok_embeddings.num_embeddings), device=logits.device).exponential_(1, generator=self.rng) - - def sample_wrapper(): + q = torch.empty((logits.size(0), self.sharded_model.model.tok_embeddings.num_embeddings), device=logits.device).exponential_(1, generator=self.rng) + tokens = ttg.sample(logits.clone(), temperature=temp, top_k=top_k, q=q.to(self.device)) + if DEBUG >= 4: print(f"tokens: {tokens}") - # clearing for non-primary nodes at end of processing - # if not self.shard.is_first_layer() and self.shard.is_last_layer(): - # print("CLEARING MODEL - INFER TENSOR NODE") - # self.clear_model() - return tokens.numpy(force=True) return await asyncio.get_running_loop().run_in_executor(self.executor, functools.partial(sample_wrapper)) @@ -266,7 +236,6 @@ async def infer_tensor( print("infer_tensor called") print(f"shard: {shard}") print(f"input_data: {input_data}") - print(f"inference_state: {inference_state}") if inference_state.get("tokens") is not None: self.state.from_dict(inference_state) @@ -279,7 +248,6 @@ async def infer_tensor( hidden_state = torch.tensor(input_data).to(self.device) elif input_data.ndim == 2: input_tensor = torch.tensor(input_data).to(self.device) - if self.state.tokens is not None: self.state.tokens = torch.cat([self.state.tokens, input_tensor], dim=-1).to(self.device) else: @@ -287,46 +255,53 @@ async def infer_tensor( def infer_wrapper(): if DEBUG >= 4: - print("infer_wrapper called") + print(f"infer_wrapper called [{self.oom_max_cnt} OOM]") print(f"self.state.tokens: {self.state.tokens}") print(f"hidden_state: {hidden_state}") model_cache = self.sharded_model.model.caches_are_enabled() - if self.state.curr_pos == 0: - # initial run - self.state.curr_pos = self.state.tokens.size(-1) - - if hidden_state is not None: - model_hs, model_logits = self.sharded_model.generate( - tokens=self.past_tokens, - hidden_state=hidden_state, - input_pos=self.state.input_pos, - mask=self.state.mask - ) - else: - if not model_cache: + try: + if hidden_state is not None: model_hs, model_logits = self.sharded_model.generate( - tokens=self.past_tokens, + hidden_state=hidden_state, input_pos=self.state.input_pos, - mask=self.state.mask + mask=self.state.mask, + curr_pos=self.state.curr_pos ) else: - model_hs, model_logits = self.sharded_model.generate( - tokens=input_tensor, - input_pos=self.state.input_pos, - mask=self.state.mask - ) + if not model_cache: + model_hs, model_logits = self.sharded_model.generate( + tokens=self.past_tokens, + input_pos=self.state.input_pos, + mask=self.state.mask, + curr_pos=self.state.curr_pos + ) + else: + model_hs, model_logits = self.sharded_model.generate( + tokens=input_tensor, + input_pos=self.state.input_pos, + mask=self.state.mask, + curr_pos=self.state.curr_pos + ) + except Exception as err: + print(f"infer_tensor err\n{err}") + raise if model_hs is not None: return ( model_hs.numpy(force=True), - curr_inference_state, + self.state.to_dict(), ) + + if self.state.curr_pos == 0: + self.state.curr_pos = self.state.tokens.size(-1) + else: + self.state.curr_pos += 1 return ( model_logits[:, -1].numpy(force=True), - curr_inference_state, + self.state.to_dict(), ) return await asyncio.get_running_loop().run_in_executor(self.executor, infer_wrapper) @@ -343,6 +318,7 @@ async def ensure_shard(self, shard: Shard): return self.shard = shard + self.state = InferenceState(device=self.device) # download model safetensors and shard @@ -367,7 +343,7 @@ async def ensure_shard(self, shard: Shard): # load sharded weights await asyncio.get_running_loop().run_in_executor( self.executor, - functools.partial(load_weights_torch, self.model_path, self.sharded_model, self.model_config), + functools.partial(load_weights_torch, self.model_path, self.sharded_model.model, self.model_config), ) async def load_checkpoint(self, shard: Shard, path: str): From 754608a7d01728300b35e3779493526663b64d0e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 11:13:41 -0900 Subject: [PATCH 553/589] trying oom tweaks --- exo/inference/torch/models/llama3.py | 4 +- exo/inference/torch/models/llm_utils.py | 6 +-- .../torch/sharded_inference_engine.py | 38 +++++++++++-------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index eaf6734b9..f98aeb5d7 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -388,8 +388,8 @@ def generate( model_output = self.model( tokens=tokens, - mask=mask.to(self.device), - input_pos=input_pos.to(self.device), + mask=mask, + input_pos=input_pos, hidden_state=hidden_state, dtype=self.dtype ) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index c44944739..427fc4626 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -307,11 +307,11 @@ def __init__( def from_dict(self, state_dict): """ - input_pos and mask are put on CPU until used + Data is stored as torch tensors on needed devices """ self.tokens = torch.tensor(state_dict["tokens"]).to(self.device) - self.input_pos = torch.tensor(state_dict["input_pos"]).to("cpu") - self.mask = torch.tensor(state_dict["mask"]).to("cpu") + self.input_pos = torch.tensor(state_dict["input_pos"]).to(self.device) + self.mask = torch.tensor(state_dict["mask"]).to(self.device) self.curr_pos = state_dict["curr_pos"] def to_dict(self) -> dict: diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 16fb7271e..52628ea12 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -35,9 +35,6 @@ TEMP = 0.6 TOP_K = 35 -# max retries for infer on OOM errors -OOM_MAX_RETRY = 10 - class TorchDynamicShardInferenceEngine(InferenceEngine): """ Pytorch based inferece engine for sharded models @@ -52,7 +49,7 @@ def __init__(self, shard_downloader: HFShardDownloader): self.model_path = None self.model_config = None self.state = None - self.oom_max_cnt = 0 + self.oom_cnt = 0 # device settings if os.environ.get("TORCH_DEVICE"): @@ -89,7 +86,7 @@ def clear_model(self): torch.cuda.empty_cache() self.shard = None - self.past_tokens = None + self.state = None async def encode(self, shard: Shard, prompt: str) -> np.ndarray: if DEBUG >= 4: @@ -239,7 +236,6 @@ async def infer_tensor( if inference_state.get("tokens") is not None: self.state.from_dict(inference_state) - self.state.tokens = torch.tensor(self.state.tokens).to(self.device) self.request_id = request_id if not self.request_id else self.request_id @@ -249,13 +245,16 @@ async def infer_tensor( elif input_data.ndim == 2: input_tensor = torch.tensor(input_data).to(self.device) if self.state.tokens is not None: - self.state.tokens = torch.cat([self.state.tokens, input_tensor], dim=-1).to(self.device) + self.state.tokens = torch.cat([ + self.state.tokens.to(self.device), + input_tensor + ], dim=-1).to(self.device) else: self.state.tokens = input_tensor.clone() def infer_wrapper(): if DEBUG >= 4: - print(f"infer_wrapper called [{self.oom_max_cnt} OOM]") + print(f"infer_wrapper called [{self.oom_cnt} OOM]") print(f"self.state.tokens: {self.state.tokens}") print(f"hidden_state: {hidden_state}") @@ -265,25 +264,30 @@ def infer_wrapper(): if hidden_state is not None: model_hs, model_logits = self.sharded_model.generate( hidden_state=hidden_state, - input_pos=self.state.input_pos, - mask=self.state.mask, + input_pos=self.state.input_pos.to(self.device), + mask=self.state.mask.to(self.device), curr_pos=self.state.curr_pos ) else: if not model_cache: model_hs, model_logits = self.sharded_model.generate( - tokens=self.past_tokens, - input_pos=self.state.input_pos, - mask=self.state.mask, + tokens=self.state.tokens.to(self.device), + input_pos=self.state.input_pos.to(self.device), + mask=self.state.mask.to(self.device), curr_pos=self.state.curr_pos ) else: model_hs, model_logits = self.sharded_model.generate( tokens=input_tensor, - input_pos=self.state.input_pos, - mask=self.state.mask, + input_pos=self.state.input_pos.to(self.device), + mask=self.state.mask.to(self.device), curr_pos=self.state.curr_pos ) + except torch.cuda.OutOfMemoryError: + print(f"OOM on cuda, clearing model and stopping") + self.oom_cnt += 1 + self.clear_model() + return except Exception as err: print(f"infer_tensor err\n{err}") raise @@ -318,7 +322,9 @@ async def ensure_shard(self, shard: Shard): return self.shard = shard - self.state = InferenceState(device=self.device) + + # Using CPU to store inference state + self.state = InferenceState() # download model safetensors and shard From 81a27c569d95075090614a1abe7a4c355a083dbc Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 11:23:07 -0900 Subject: [PATCH 554/589] Adding two new requirements not in fork main new updates --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index 0e666e3df..173d8a538 100644 --- a/setup.py +++ b/setup.py @@ -36,6 +36,9 @@ "torchao==0.8.0", "pytest==8.3.3", "pytest-asyncio==0.24.0", + "scapy==2.6.1", + "uvloop==0.21.0" + ] extras_require = { From 192f0c50d05237abf27207f0c8c9e0a54c1b9329 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 12:58:11 -0900 Subject: [PATCH 555/589] fixing numpy no bfloat16 support issue --- exo/inference/torch/sharded_inference_engine.py | 8 ++++++++ exo/inference/torch/tests/test_llama3_full.py | 5 ++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 52628ea12..45a5fea07 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -293,6 +293,10 @@ def infer_wrapper(): raise if model_hs is not None: + # numpy current no support for bf16 + if model_hs.dtype == torch.bfloat16: + model_hs = model_hs.float() + return ( model_hs.numpy(force=True), self.state.to_dict(), @@ -303,6 +307,10 @@ def infer_wrapper(): else: self.state.curr_pos += 1 + # numpy current no support for bf16 + if model_logits.dtype == torch.bfloat16: + model_logits = model_logits.float() + return ( model_logits[:, -1].numpy(force=True), self.state.to_dict(), diff --git a/exo/inference/torch/tests/test_llama3_full.py b/exo/inference/torch/tests/test_llama3_full.py index e3fa4e7f2..fca6b8829 100644 --- a/exo/inference/torch/tests/test_llama3_full.py +++ b/exo/inference/torch/tests/test_llama3_full.py @@ -285,5 +285,8 @@ def normal_full(model, user_prompt: str, device: torch.device = torch.device("cp # load_model_weights_torchtune(cache_dir, shard_1, shard_model_1) load_weights_torch(cache_dir, shard_model_1.model, config) - main(shard_model_1, prompt, device, config["torch_dtype"]) + + import time + time.sleep(5) + # main(shard_model_1, prompt, device, config["torch_dtype"]) # normal_full(shard_model_1, prompt, device) From fea4b3103bff411497e53e6506fff32e71b7774f Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 13:12:51 -0900 Subject: [PATCH 556/589] fixing numpy no bfloat16 support issue --- exo/inference/torch/models/llm_utils.py | 2 +- exo/inference/torch/sharded_inference_engine.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/exo/inference/torch/models/llm_utils.py b/exo/inference/torch/models/llm_utils.py index 427fc4626..d3d8754c6 100644 --- a/exo/inference/torch/models/llm_utils.py +++ b/exo/inference/torch/models/llm_utils.py @@ -290,7 +290,7 @@ def llama3_mlp(dim: int, hidden_dim: int) -> FeedForward: return FeedForward(gate_proj=gate_proj, down_proj=down_proj, up_proj=up_proj) -class InferenceState: +class ShardInferenceState: def __init__( self, tokens: Optional[torch.tensor] = None, diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 45a5fea07..e4e388de6 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -24,7 +24,7 @@ from exo.inference.torch.models.llm_utils import ( load_model_config, load_weights_torch, - InferenceState + ShardInferenceState ) # supported models @@ -297,6 +297,13 @@ def infer_wrapper(): if model_hs.dtype == torch.bfloat16: model_hs = model_hs.float() + if DEBUG >= 4: + print("sending hidden states") + print(f"model_hs: {model_hs.size()}") + print(f"state.tokens: {self.state.tokens}") + print(f"state.input_pos: {self.state.input_pos.size()}") + print(f"state.mask: {self.state.mask.size()}") + return ( model_hs.numpy(force=True), self.state.to_dict(), @@ -332,7 +339,7 @@ async def ensure_shard(self, shard: Shard): self.shard = shard # Using CPU to store inference state - self.state = InferenceState() + self.state = ShardInferenceState() # download model safetensors and shard From ffd2907b7ef2d530ac740c9e347c104da616e33a Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 13:16:43 -0900 Subject: [PATCH 557/589] debugging grpc peer handle issue for serialization of inference state --- exo/networking/grpc/grpc_peer_handle.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exo/networking/grpc/grpc_peer_handle.py b/exo/networking/grpc/grpc_peer_handle.py index 6b3e27589..1c2831e38 100644 --- a/exo/networking/grpc/grpc_peer_handle.py +++ b/exo/networking/grpc/grpc_peer_handle.py @@ -201,6 +201,7 @@ def serialize_inference_state(self, inference_state: dict) -> node_service_pb2.I proto_inference_state = node_service_pb2.InferenceState() other_data = {} for k, v in inference_state.items(): + print(f"k {k}\nv {v}") if isinstance(v, mx.array): np_array = np.array(v) tensor_data = node_service_pb2.Tensor(tensor_data=np_array.tobytes(), shape=list(np_array.shape), dtype=str(np_array.dtype)) From 726ef0ddd0bfbc4e5b8f129739cb586a7bc0ce6b Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 13:27:46 -0900 Subject: [PATCH 558/589] adding in bool to check if apple or not for checking array type --- exo/networking/grpc/grpc_peer_handle.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/exo/networking/grpc/grpc_peer_handle.py b/exo/networking/grpc/grpc_peer_handle.py index 1c2831e38..7c12b4b82 100644 --- a/exo/networking/grpc/grpc_peer_handle.py +++ b/exo/networking/grpc/grpc_peer_handle.py @@ -16,8 +16,10 @@ if platform.system().lower() == "darwin" and platform.machine().lower() == "arm64": import mlx.core as mx + IS_APPLE = True else: import numpy as mx + IS_APPLE = False class GRPCPeerHandle(PeerHandle): @@ -201,8 +203,8 @@ def serialize_inference_state(self, inference_state: dict) -> node_service_pb2.I proto_inference_state = node_service_pb2.InferenceState() other_data = {} for k, v in inference_state.items(): - print(f"k {k}\nv {v}") - if isinstance(v, mx.array): + mx_array_type = mx.array if IS_APPLE else mx.ndarray + if isinstance(v, mx_array_type): np_array = np.array(v) tensor_data = node_service_pb2.Tensor(tensor_data=np_array.tobytes(), shape=list(np_array.shape), dtype=str(np_array.dtype)) proto_inference_state.tensor_data[k].CopyFrom(tensor_data) From f24397f6b7cea78c2e09f9e15ca160afb28d858e Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 13:30:17 -0900 Subject: [PATCH 559/589] fixing other mx.array call --- exo/networking/grpc/grpc_peer_handle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/networking/grpc/grpc_peer_handle.py b/exo/networking/grpc/grpc_peer_handle.py index 7c12b4b82..e7999637e 100644 --- a/exo/networking/grpc/grpc_peer_handle.py +++ b/exo/networking/grpc/grpc_peer_handle.py @@ -208,7 +208,7 @@ def serialize_inference_state(self, inference_state: dict) -> node_service_pb2.I np_array = np.array(v) tensor_data = node_service_pb2.Tensor(tensor_data=np_array.tobytes(), shape=list(np_array.shape), dtype=str(np_array.dtype)) proto_inference_state.tensor_data[k].CopyFrom(tensor_data) - elif isinstance(v, list) and all(isinstance(item, mx.array) for item in v): + elif isinstance(v, list) and all(isinstance(item, mx_array_type) for item in v): tensor_list = node_service_pb2.TensorList() for tensor in v: np_array = np.array(tensor) From 6bbbb046e9a9f83165f0663f8b978a9c34424ca1 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 13:42:14 -0900 Subject: [PATCH 560/589] adding tokens pass to hidden state passing --- exo/inference/torch/sharded_inference_engine.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index e4e388de6..bab50210b 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -263,6 +263,7 @@ def infer_wrapper(): try: if hidden_state is not None: model_hs, model_logits = self.sharded_model.generate( + tokens=self.state.tokens.to(self.device), hidden_state=hidden_state, input_pos=self.state.input_pos.to(self.device), mask=self.state.mask.to(self.device), From 0d5779ef55b8f3dbb7d84e731e35f87507ad0a50 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 13:49:12 -0900 Subject: [PATCH 561/589] fixing type conversion and device conversion --- .../torch/sharded_inference_engine.py | 44 ++++++++++++++----- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index bab50210b..4a192a1f3 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -241,9 +241,16 @@ async def infer_tensor( hidden_state = None if input_data.ndim == 3: - hidden_state = torch.tensor(input_data).to(self.device) + hidden_state = torch.tensor(input_data).to( + device=self.device, + dtype=self.model_config["torch_dtype"] + ) elif input_data.ndim == 2: - input_tensor = torch.tensor(input_data).to(self.device) + input_tensor = torch.tensor(input_data).to( + device=self.device, + dtype=self.model_config["torch_dtype"] + ) + if self.state.tokens is not None: self.state.tokens = torch.cat([ self.state.tokens.to(self.device), @@ -261,27 +268,42 @@ def infer_wrapper(): model_cache = self.sharded_model.model.caches_are_enabled() try: + in_tokens = self.state.tokens.clone().to( + device=self.device, + dtype=self.model_config["torch_dtype"] + ) + + in_input_pos = self.state.input_pos.to( + device=self.device, + dtype=self.model_config["torch_dtype"] + ) + + in_mask = self.state.mask.to( + device=self.device, + dtype=self.model_config["torch_dtype"] + ) + if hidden_state is not None: model_hs, model_logits = self.sharded_model.generate( - tokens=self.state.tokens.to(self.device), + tokens=in_tokens hidden_state=hidden_state, - input_pos=self.state.input_pos.to(self.device), - mask=self.state.mask.to(self.device), + input_pos=in_input_pos, + mask=in_mask, curr_pos=self.state.curr_pos ) else: if not model_cache: model_hs, model_logits = self.sharded_model.generate( - tokens=self.state.tokens.to(self.device), - input_pos=self.state.input_pos.to(self.device), - mask=self.state.mask.to(self.device), + tokens=in_tokens, + input_pos=in_input_pos, + mask=in_mask, curr_pos=self.state.curr_pos ) else: model_hs, model_logits = self.sharded_model.generate( - tokens=input_tensor, - input_pos=self.state.input_pos.to(self.device), - mask=self.state.mask.to(self.device), + tokens=in_tokens, + input_pos=in_input_pos, + mask=in_mask, curr_pos=self.state.curr_pos ) except torch.cuda.OutOfMemoryError: From fe1e8ef53a5fed3849b8870ff11b052b9763bab1 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 13:50:46 -0900 Subject: [PATCH 562/589] fixing types and adding clones --- .../torch/sharded_inference_engine.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 4a192a1f3..d1c4e088d 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -247,8 +247,7 @@ async def infer_tensor( ) elif input_data.ndim == 2: input_tensor = torch.tensor(input_data).to( - device=self.device, - dtype=self.model_config["torch_dtype"] + device=self.device ) if self.state.tokens is not None: @@ -269,23 +268,20 @@ def infer_wrapper(): try: in_tokens = self.state.tokens.clone().to( - device=self.device, - dtype=self.model_config["torch_dtype"] + device=self.device ) - in_input_pos = self.state.input_pos.to( - device=self.device, - dtype=self.model_config["torch_dtype"] + in_input_pos = self.state.input_pos.clone().to( + device=self.device ) - in_mask = self.state.mask.to( - device=self.device, - dtype=self.model_config["torch_dtype"] + in_mask = self.state.mask.clone().to( + device=self.device ) if hidden_state is not None: model_hs, model_logits = self.sharded_model.generate( - tokens=in_tokens + tokens=in_tokens, hidden_state=hidden_state, input_pos=in_input_pos, mask=in_mask, From f43af1bbcdf5369b32e48df2584c6e9d75232bc1 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 13:57:39 -0900 Subject: [PATCH 563/589] tensor shape error --- .../torch/sharded_inference_engine.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index d1c4e088d..0857b4925 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -288,20 +288,20 @@ def infer_wrapper(): curr_pos=self.state.curr_pos ) else: - if not model_cache: - model_hs, model_logits = self.sharded_model.generate( - tokens=in_tokens, - input_pos=in_input_pos, - mask=in_mask, - curr_pos=self.state.curr_pos - ) - else: - model_hs, model_logits = self.sharded_model.generate( - tokens=in_tokens, - input_pos=in_input_pos, - mask=in_mask, - curr_pos=self.state.curr_pos - ) + # if not model_cache: + # model_hs, model_logits = self.sharded_model.generate( + # tokens=in_tokens, + # input_pos=in_input_pos, + # mask=in_mask, + # curr_pos=self.state.curr_pos + # ) + # else: + model_hs, model_logits = self.sharded_model.generate( + tokens=input_tensor, + input_pos=in_input_pos, + mask=in_mask, + curr_pos=self.state.curr_pos + ) except torch.cuda.OutOfMemoryError: print(f"OOM on cuda, clearing model and stopping") self.oom_cnt += 1 From 9ec9b23e01a3b78363ac5b1eb3b2a574f4360fb0 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 14:04:32 -0900 Subject: [PATCH 564/589] tensor shape error --- exo/inference/torch/models/llama3.py | 32 +++++++++++++++------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index f98aeb5d7..21f74faf3 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -360,22 +360,23 @@ def generate( self.model.output_hidden_states = [self.shard.end_layer] - if curr_pos > 0: - if self.model.caches_are_enabled(): - input_pos = input_pos[:, curr_pos].contiguous() - mask = mask[:, curr_pos, None, :].contiguous() + if hidden_state is not None: + if curr_pos > 0: + if self.model.caches_are_enabled(): + input_pos = input_pos[:, curr_pos].contiguous() + mask = mask[:, curr_pos, None, :].contiguous() + else: + input_pos = input_pos[:, :curr_pos + 1] + mask = mask[:, :curr_pos + 1, :curr_pos + 1] else: - input_pos = input_pos[:, :curr_pos + 1] - mask = mask[:, :curr_pos + 1, :curr_pos + 1] - else: - _, tklng = tokens.size() + _, tklng = tokens.size() - if self.model.caches_are_enabled(): - mask = mask[:, :tklng] - else: - mask = mask[:, :tklng, :tklng] + if self.model.caches_are_enabled(): + mask = mask[:, :tklng] + else: + mask = mask[:, :tklng, :tklng] - input_pos = input_pos[:, :tklng].squeeze() + input_pos = input_pos[:, :tklng].squeeze() if DEBUG >= 4: print("model_input") @@ -386,11 +387,12 @@ def generate( print(f"mask: {mask}") print(f"input_pos: {input_pos}") + model_output = self.model( - tokens=tokens, + tokens=tokens if hidden_state is None else None, mask=mask, input_pos=input_pos, - hidden_state=hidden_state, + hidden_state=hidden_state if hidden_state is not None else None, dtype=self.dtype ) From 2f31a7bb2f3e46e93d11b0d429ebc2c5c7d4a630 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 14:08:21 -0900 Subject: [PATCH 565/589] tensor shape error --- exo/inference/torch/models/llama3.py | 4 +-- .../torch/sharded_inference_engine.py | 31 +++++++++---------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 21f74faf3..25c4f27e9 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -389,10 +389,10 @@ def generate( model_output = self.model( - tokens=tokens if hidden_state is None else None, + tokens=tokens, mask=mask, input_pos=input_pos, - hidden_state=hidden_state if hidden_state is not None else None, + hidden_state=hidden_state, dtype=self.dtype ) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 0857b4925..7bad37c4b 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -281,27 +281,24 @@ def infer_wrapper(): if hidden_state is not None: model_hs, model_logits = self.sharded_model.generate( - tokens=in_tokens, hidden_state=hidden_state, - input_pos=in_input_pos, - mask=in_mask, curr_pos=self.state.curr_pos ) else: - # if not model_cache: - # model_hs, model_logits = self.sharded_model.generate( - # tokens=in_tokens, - # input_pos=in_input_pos, - # mask=in_mask, - # curr_pos=self.state.curr_pos - # ) - # else: - model_hs, model_logits = self.sharded_model.generate( - tokens=input_tensor, - input_pos=in_input_pos, - mask=in_mask, - curr_pos=self.state.curr_pos - ) + if not model_cache: + model_hs, model_logits = self.sharded_model.generate( + tokens=in_tokens, + input_pos=in_input_pos, + mask=in_mask, + curr_pos=self.state.curr_pos + ) + else: + model_hs, model_logits = self.sharded_model.generate( + tokens=input_tensor, + input_pos=in_input_pos, + mask=in_mask, + curr_pos=self.state.curr_pos + ) except torch.cuda.OutOfMemoryError: print(f"OOM on cuda, clearing model and stopping") self.oom_cnt += 1 From b7cfece6127677be5913b4bcabd9b1d8973dcecc Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 14:13:53 -0900 Subject: [PATCH 566/589] turning back on mask calculation --- exo/inference/torch/models/llama3.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 25c4f27e9..aa2c353da 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -360,23 +360,22 @@ def generate( self.model.output_hidden_states = [self.shard.end_layer] - if hidden_state is not None: - if curr_pos > 0: - if self.model.caches_are_enabled(): - input_pos = input_pos[:, curr_pos].contiguous() - mask = mask[:, curr_pos, None, :].contiguous() - else: - input_pos = input_pos[:, :curr_pos + 1] - mask = mask[:, :curr_pos + 1, :curr_pos + 1] + if curr_pos > 0: + if self.model.caches_are_enabled(): + input_pos = input_pos[:, curr_pos].contiguous() + mask = mask[:, curr_pos, None, :].contiguous() else: - _, tklng = tokens.size() + input_pos = input_pos[:, :curr_pos + 1] + mask = mask[:, :curr_pos + 1, :curr_pos + 1] + else: + _, tklng = tokens.size() - if self.model.caches_are_enabled(): - mask = mask[:, :tklng] - else: - mask = mask[:, :tklng, :tklng] + if self.model.caches_are_enabled(): + mask = mask[:, :tklng] + else: + mask = mask[:, :tklng, :tklng] - input_pos = input_pos[:, :tklng].squeeze() + input_pos = input_pos[:, :tklng].squeeze() if DEBUG >= 4: print("model_input") From 332ed2a35c264f3654a9e7708474f87369370baf Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 14:16:17 -0900 Subject: [PATCH 567/589] tensor none called --- exo/inference/torch/sharded_inference_engine.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 7bad37c4b..557875e6a 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -281,7 +281,10 @@ def infer_wrapper(): if hidden_state is not None: model_hs, model_logits = self.sharded_model.generate( + tokens=in_tokens, hidden_state=hidden_state, + input_pos=in_input_pos, + mask=in_mask, curr_pos=self.state.curr_pos ) else: From 983c3415813e2d1b9f8a1fac998bb45af3034ac0 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 14:24:45 -0900 Subject: [PATCH 568/589] tensor repeating in state cache --- exo/inference/torch/sharded_inference_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 557875e6a..f7c609ce0 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -250,7 +250,7 @@ async def infer_tensor( device=self.device ) - if self.state.tokens is not None: + if self.state.tokens is not None and input_tensor.size(-1) == 1: self.state.tokens = torch.cat([ self.state.tokens.to(self.device), input_tensor From 73e13b4f2c62fe12d4c39770845750c3eff36371 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 14:31:48 -0900 Subject: [PATCH 569/589] tensor repeating in state cache --- exo/inference/torch/sharded_inference_engine.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index f7c609ce0..aa63d87d0 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -214,6 +214,14 @@ def sample_wrapper(): if DEBUG >= 4: print(f"tokens: {tokens}") + if self.state.tokens is not None: + self.state.tokens = torch.cat([ + self.state.tokens.to(self.device), + tokens.clone() + ], dim=-1).to(self.device) + else: + self.state.tokens = tokens.clone() + return tokens.numpy(force=True) return await asyncio.get_running_loop().run_in_executor(self.executor, functools.partial(sample_wrapper)) @@ -249,14 +257,6 @@ async def infer_tensor( input_tensor = torch.tensor(input_data).to( device=self.device ) - - if self.state.tokens is not None and input_tensor.size(-1) == 1: - self.state.tokens = torch.cat([ - self.state.tokens.to(self.device), - input_tensor - ], dim=-1).to(self.device) - else: - self.state.tokens = input_tensor.clone() def infer_wrapper(): if DEBUG >= 4: From f65766ec62ae9c5f861c2153086508d5f018d2e9 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 14:41:29 -0900 Subject: [PATCH 570/589] hidden state issues --- exo/inference/torch/models/llama3.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index aa2c353da..41b7e8d93 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -169,7 +169,7 @@ def forward( curr_layers = [self.layers[i] for i in range(self.shard.start_layer, self.shard.end_layer + 1)] for i, layer in enumerate(curr_layers): if DEBUG >= 8: - print(f"\nhidden layer in H[{i}]\n{h}") + print(f"\nhidden layer in H[{self.shard.start_layer+i}]\n{h}") print(f"\nmask\n{mask}\ninput_pos\n{input_pos}") print(f"\noutput_hidden_states\n{self.output_hidden_states}\n") @@ -178,11 +178,16 @@ def forward( # Process through each transformer layer # with torch.no_grad(): - h = layer( - h, - mask=mask, - input_pos=input_pos, - ) + if hidden_state is not None: + h = layer( + h + ) + else: + h = layer( + h, + mask=mask, + input_pos=input_pos, + ) # if i in self.output_hidden_states: From f80597cc0b1443550091ea7562d936e62eed5ff5 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 14:43:19 -0900 Subject: [PATCH 571/589] hidden state issues --- exo/inference/torch/models/llama3.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 41b7e8d93..6694ceb96 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -178,16 +178,11 @@ def forward( # Process through each transformer layer # with torch.no_grad(): - if hidden_state is not None: - h = layer( - h - ) - else: - h = layer( - h, - mask=mask, - input_pos=input_pos, - ) + h = layer( + h, + mask=mask, + input_pos=input_pos, + ) # if i in self.output_hidden_states: @@ -366,12 +361,12 @@ def generate( self.model.output_hidden_states = [self.shard.end_layer] if curr_pos > 0: - if self.model.caches_are_enabled(): - input_pos = input_pos[:, curr_pos].contiguous() - mask = mask[:, curr_pos, None, :].contiguous() - else: - input_pos = input_pos[:, :curr_pos + 1] - mask = mask[:, :curr_pos + 1, :curr_pos + 1] + # if self.model.caches_are_enabled(): + # input_pos = input_pos[:, curr_pos].contiguous() + # mask = mask[:, curr_pos, None, :].contiguous() + # else: + input_pos = input_pos[:, :curr_pos + 1] + mask = mask[:, :curr_pos + 1, :curr_pos + 1] else: _, tklng = tokens.size() From 58a7d3c372683d6f363fa72002a3a7b1d99492f4 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 14:46:23 -0900 Subject: [PATCH 572/589] use_cache fix --- exo/inference/torch/sharded_inference_engine.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index aa63d87d0..28645f7b7 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -51,6 +51,9 @@ def __init__(self, shard_downloader: HFShardDownloader): self.state = None self.oom_cnt = 0 + # cache settings + self.use_cache = bool(os.getenv("TORCH_USE_CACHE", "True").lower() == "true") + # device settings if os.environ.get("TORCH_DEVICE"): self.device = torch.device(os.environ["TORCH_DEVICE"]) @@ -129,7 +132,7 @@ def encode_wrapper() -> np.ndarray: total_response_length = tklng + self.sharded_model.max_generated_tokens # setup cache - if not self.sharded_model.model.caches_are_enabled(): + if not self.sharded_model.model.caches_are_enabled() and self.use_cache: with self.device: self.sharded_model.model.setup_caches( bsz, @@ -376,7 +379,7 @@ async def ensure_shard(self, shard: Shard): shard=shard, device=self.device, dtype=self.model_config["torch_dtype"], - use_cache=bool(os.getenv("TORCH_USE_CACHE", "True").lower() == "true"), + use_cache=self.use_cache, ), ) From efcb5b9d8731a6ef86596785d93f37886931cb7a Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 14:53:17 -0900 Subject: [PATCH 573/589] model load issue --- exo/inference/torch/models/llama3.py | 12 ++++----- .../torch/sharded_inference_engine.py | 25 +++++++++++-------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 6694ceb96..9f78db4dd 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -361,12 +361,12 @@ def generate( self.model.output_hidden_states = [self.shard.end_layer] if curr_pos > 0: - # if self.model.caches_are_enabled(): - # input_pos = input_pos[:, curr_pos].contiguous() - # mask = mask[:, curr_pos, None, :].contiguous() - # else: - input_pos = input_pos[:, :curr_pos + 1] - mask = mask[:, :curr_pos + 1, :curr_pos + 1] + if self.model.caches_are_enabled(): + input_pos = input_pos[:, curr_pos].contiguous() + mask = mask[:, curr_pos, None, :].contiguous() + else: + input_pos = input_pos[:, :curr_pos + 1] + mask = mask[:, :curr_pos + 1, :curr_pos + 1] else: _, tklng = tokens.size() diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 28645f7b7..914de10a1 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -370,23 +370,28 @@ async def ensure_shard(self, shard: Shard): # self.tokenizer = await _resolve_tokenizer(model_path) self.tokenizer = await _resolve_tokenizer(self.model_path) - - self.sharded_model = await asyncio.get_running_loop().run_in_executor( - self.executor, - functools.partial( - ShardedLlamaModel, + + def start_model(): + if DEBUG >= 4: + print("start_model called") + + self.sharded_model = ShardedLlamaModel( config=self.model_config, shard=shard, device=self.device, dtype=self.model_config["torch_dtype"], - use_cache=self.use_cache, - ), - ) + use_cache=self.use_cache + ) - # load sharded weights + load_weights_torch( + self.model_path, + self.sharded_model.model, + self.model_config + ) + await asyncio.get_running_loop().run_in_executor( self.executor, - functools.partial(load_weights_torch, self.model_path, self.sharded_model.model, self.model_config), + functools.partial(start_model), ) async def load_checkpoint(self, shard: Shard, path: str): From 4bf752b5b2221aff9d5cb0e56d3355e2250ac1c8 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 15:00:37 -0900 Subject: [PATCH 574/589] pass token issue --- exo/inference/torch/models/llama3.py | 2 +- exo/inference/torch/sharded_inference_engine.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/exo/inference/torch/models/llama3.py b/exo/inference/torch/models/llama3.py index 9f78db4dd..b8159cfb2 100644 --- a/exo/inference/torch/models/llama3.py +++ b/exo/inference/torch/models/llama3.py @@ -189,7 +189,7 @@ def forward( # hidden.append(h) if DEBUG >= 8: - print(f"\nhidden layer out H[{i}]->H[{i + 1}]\n{h}\n") + print(f"\nhidden layer out H[{self.shard.start_layer+i}]->H[{self.shard.start_layer+i+1}]\n{h}\n") if self.shard.is_last_layer(): # Apply normalization diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index 914de10a1..aa21565c1 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -217,14 +217,6 @@ def sample_wrapper(): if DEBUG >= 4: print(f"tokens: {tokens}") - if self.state.tokens is not None: - self.state.tokens = torch.cat([ - self.state.tokens.to(self.device), - tokens.clone() - ], dim=-1).to(self.device) - else: - self.state.tokens = tokens.clone() - return tokens.numpy(force=True) return await asyncio.get_running_loop().run_in_executor(self.executor, functools.partial(sample_wrapper)) @@ -269,6 +261,14 @@ def infer_wrapper(): model_cache = self.sharded_model.model.caches_are_enabled() + if self.state.tokens is not None: + self.state.tokens = torch.cat([ + self.state.tokens.to(self.device), + [in_tokens[:, -1]] + ], dim=-1).to(self.device) + else: + self.state.tokens = in_tokens.clone() + try: in_tokens = self.state.tokens.clone().to( device=self.device From 7cc42f174878ebed13883e5f6e3719b508c1410d Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 15:03:32 -0900 Subject: [PATCH 575/589] pass token issue --- exo/inference/torch/sharded_inference_engine.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index aa21565c1..aa5f37faa 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -261,13 +261,13 @@ def infer_wrapper(): model_cache = self.sharded_model.model.caches_are_enabled() - if self.state.tokens is not None: + if self.state.tokens is not None and input_tensor.size(-1) == 1: self.state.tokens = torch.cat([ self.state.tokens.to(self.device), - [in_tokens[:, -1]] + input_tensor.clone() ], dim=-1).to(self.device) else: - self.state.tokens = in_tokens.clone() + self.state.tokens = input_tensor.clone() try: in_tokens = self.state.tokens.clone().to( From 4e3e53ecbe1518c9de98d212da7c0c4b50689115 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 15:05:44 -0900 Subject: [PATCH 576/589] pass token issue --- exo/inference/torch/sharded_inference_engine.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/exo/inference/torch/sharded_inference_engine.py b/exo/inference/torch/sharded_inference_engine.py index aa5f37faa..b0b0ea258 100644 --- a/exo/inference/torch/sharded_inference_engine.py +++ b/exo/inference/torch/sharded_inference_engine.py @@ -243,6 +243,7 @@ async def infer_tensor( self.request_id = request_id if not self.request_id else self.request_id hidden_state = None + input_tensor = None if input_data.ndim == 3: hidden_state = torch.tensor(input_data).to( device=self.device, @@ -261,11 +262,12 @@ def infer_wrapper(): model_cache = self.sharded_model.model.caches_are_enabled() - if self.state.tokens is not None and input_tensor.size(-1) == 1: - self.state.tokens = torch.cat([ - self.state.tokens.to(self.device), - input_tensor.clone() - ], dim=-1).to(self.device) + if self.state.tokens is not None: + if input_data.ndim == 2 and input_tensor.size(-1) == 1: + self.state.tokens = torch.cat([ + self.state.tokens.to(self.device), + input_tensor.clone() + ], dim=-1).to(self.device) else: self.state.tokens = input_tensor.clone() From a09956e9b66ab0aa46f561b374fd23054d61e768 Mon Sep 17 00:00:00 2001 From: risingsunomi Date: Fri, 24 Jan 2025 16:28:37 -0900 Subject: [PATCH 577/589] removing screenshot --- 2025-01-16-102425_3840x1080_scrot.png | Bin 688253 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 2025-01-16-102425_3840x1080_scrot.png diff --git a/2025-01-16-102425_3840x1080_scrot.png b/2025-01-16-102425_3840x1080_scrot.png deleted file mode 100644 index fbe69540ced9bd3ff3f05009ecd5c0dbb96a7ec6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 688253 zcmZU51z1#V*EJ?6AfT(jAH-As`|xT_WAmA|(ydjdbVG_3zQ= zdB69){&RWBIE-h`oO|DUuf5jV<1a5Oj(wfbXtpU^_}Q49t>hD3PhkAdh(Dgqv| z){2`KI#v$FDJv7=(`lcv*GD@`G~;-x9rWnQ8lE}1sC(nx5K`1d3I^9KBUm;ZSp@sb1$J~}!& zA|a{1fH6z{z5%SD=1 zXaD=-yGP2uYvV_czAe8i9Ow&Iy)U$GPbMNi#>2b}895DXp1K^|x|0X~ea6nR{o}#=p@Otf}bui(p+Wgo3 zo4s8K8obn8*Srif<@P-Hamjqsgm4Uxia{w64ZB~DN^{+PUdvZF5n+UsJ0YW2UL5al zbX6En)_7R*Zhxee8gb0@^z_syvyzdK;j)yLkr}UW$Vy1iu@e_XBmHv)ln=<89hQDQ zp$}KL%~ztgRI@sG7IWp_i%aB)J<^)*@YOpX<4)Q4aM(yIv1G?zVB4+MH=ZDSmiOU; zd9Ty<&c*2jO~cyzZ6=)T7%TYOV?>pmbZK;P6UQ4ab$h$65^>_y$hy7WYlFWO zStt(=<4n9(Tr+PE+_6|Y>+7?ov$5>kqO81j&Y$3koSWF4s4)<$p-R|X(+hDG=NQlz z7K}swzL{0}U~p}A7TIzyfrgHbj+WMAyX9VIXQ$qAp#v*r+;B3$`Nk1A$qTic`~ zyM5lEmA;lN0xL8u={R7!Z1mox95-Xn>Ii`pZGIB=Ji6(Ac*K4peAV)4(jqqv#^Y+t zoo&I%g2?6F{e7N^8yFZSS-N+rsRy#v^hc8A@2m68S8mj}9c=y%y(_-A+}}2eIx1E2 zwC{=s3%Va{3T#yMj`g56oc9s@r?8k9e%&8$xJhK_!q$cgW~SPkpuFzIRWg#d@41Yih9eq{zc+uUbugq~O8;m#dA$NM=l? z6!n_tzW0xo;Q^zA1m$2yRw1Io3xUdVFT*Ro*_-SdBTajg;Ws6~iLqUOn1f zq4GSqgU6&%YOy|6&c?=uGiwU@&NJ47&8O=$` z=6~^2B*XXnRIu?tf3P;*rCpg?@kv#y6JjcA!9_!3G)*e9GhRp|Q)zFpr@o`3V`9Q{ zce%ey_&7T!hpB5PCm}72nDxc?dkOA(N2Xl$tguzEF)_{8<&%<l@GT)aitg`DhcO1D)u9Gnnl ztqRUE>oTkHtJtJ{Rt(m>;}9G>KQLK}TU<;=(@MpPI_c5Ai#Ef;{%~zcPgO82s>wRK z-+{_YmVq<8Z1)+%CHaE&)gn*{F;yY+PhKlQP}ec$EUd4e9WEqfq^25_NJvVy!bL63 z%=A86K~)}aBRppx?YzMi?5Y>G&9{A}QZK2j^E*D^I#r)uvNT^fJS81=S(?i9@|_5r zZ2d7QSb3VJy&w0kAYrqvZl}d`Y4Bs0#rexU{egW-XZJa;GAe&L>9(H-r;K6&x>5-^ z?Po?kJCh5i%;)V#wfXBUQeJACgXfeootb^nI7^(ueyNjvd^JxKa>reYlr>x|7e=h} zg0ye0-C-SXZr7@CpV+iA_&HW#)G?T0tzyRYoactesg&=^chB+nmw4q|cQz)_bzB6} zX9rs}#tP2Ds4?S30w)jG&pjfDYfK8mOp&RvfhqKk3yr!gEL&XM99nc$yczZjHIMJ9 zWN`asVo?cKRa6}9bcy6;qK-pkubfJbI&FHwLVL8LynOf31Gq_NCa&h__8gg~f_uB? zt=avEzS^!9VeN%RvEYN;R9klBV%plE73RzpUE?X_@@D_?&p{$5s?8_sw-yMf@OpB; zldQQ1ILC*)e?L}cJ^9UMM%(jnZgq9Fp`l@Rc9x9C3L!2o_nCLA!Ji;BG?e1wD_I#C zOQd6?Cnv`Mc#sv|^D6 z!P!mag1o4en3!9_y2B&1aKw^o2OCzr$DtnJal` zX2=i-VNubQ8ceG4oK)qJyTt`+oc9KDc^@?#?jN4|)!D`G<;$LWxe>Yvv^flCc&Z=I zdoe%f+8wn^@xjo#dSf}%^UOBpd^tEJRv_BX?rp=SO+0BempH5Gav1w^!`r7AS92dF0;@vmi3<(S= zYoGrPLxy-dJGP%|`!f;v8Tb$~Zn&)G>=$VV5}Qn3?Q%sR8h+QmlhL4)i77VeNhaYi zx_t^rqnll{Pd})PLTG3F#Ck8*{;@|Ro!RUlzHD>h)hpJ-;!7D{h(tLN6ybN|Ox`^w zx%}wX7CoNfKty#UMQs{u_#Ls16%MmD>PpvZh&2~o%?Lvti=nC6S*1LAwt7}5{D{=S z6_KOGlna-x7SAMma+&_3p@wv`y|-u8!HY@6?7q{%LsrUgGXN6W>5`n6q_p%{t=D<- zlfbjn4KEE|BNLM^rdJkA-(K%VL)#R1_UswLe)cCG_4e+rgRO0AXQ!;BBz3Z%pC2l9 z)6k@r>!+uuYnR*3>1w(NFy2B#8~Ff8Gm7vmlp)<+U9iocuJ|`2V_U9Ht><)ue|iwX zJz33KPmgNzZVll;67uH5!^0qp*9i#;OZ~xUXYx@-%Zs}g9}4IaizUgcs^Ue@@V5&}LNa*=}CWQxUS^(a}7$qI5%Z2wgO9*?M_b z4yH1NEXBFcXUI|g&J;1E$)T$IVO$k`+{221H$kIer&q5K=XKHbMG&f(yNGfaUn9NW z5_o)MUG#$z$6pg!WV>foDgr%hklk*S511~Q0Fx{_q7FRy!8 z@IrU|L#{S^9oy-W+Ocd_2@Ayg*;gg8yN7cPu9`2Gdf&$A>FKeuvND?B4VR#S4GjOZ zRAo($>xk-c*0L;7_kg9}8t==|C==dlAczPtRX;25xP$6JYITX}=;dBZ+?deX$f#LTAQK{k)1UzJ`g*UN2t2rj$sHSA9A5^R_4}(! zkt{kj_DjF!J7VkW>t(1c?PJlP5PZ~bj>``()hQFFgLej|JUbX_v8pQ z=%l0a&HBH7`}WPZFQ>BW1co}bZ~*x7Aoe}+bow-l~^=Ce~j>@sy*9)a1aq@5?hwF0GW>JF10$ddbB%#2s-0 zbK{>YOAl6D@E3X*2CaknJcQ3y{h6}R+1SgI!;9?glnmnqV%3uFiOvPBk#UE=5!rH% zJ>Ax{)+)V|b$(ozh?%8qji1+IMz-~fy*}NGiUp7Ip?H^R(b%4qaK>F~c7xRuKI6Xk zJzDE=_-+ExU(=PpRW7s%$=VUVqo=KrV_CO5iWMrq`n)7yb=6X%c9c@c6>6woStOGN zxAjC7tyF~l#)RxsXqmjl@$a3G-gQK_Q=K3mQ7fpQy0&1H{xnULzkN@X(1F05+3xj| zegbuWqRRsQC%f(XW;JJ5giCLx*F8tt@>#XFiRilId?sDw9KFYO@d`=+y)n zuVN8TO;6(!5vgctX>Htjgv+d5#cMY&UC{KaJ(^=}ZA~sgIHz~1u&Bs?FBy&?P4=?f&~MQM}taU$~^h z*7OrHGO}<-d$fKf&JV9tM+1a zw`r5HBtVu!x{AavaLu6Bc*_aP!nC6A6ePPWW-cNrUC{zP?U2hmf6i;D{&Gq@W; z!lbykFRUjC2fw>8j-rtku%xObD z_?n6n-ye}TNQUJrGQ}9zT(1%Mr_Uw5vsla@YW{KYNS!(j=>4dY6yMStQ-U9Z#0O5p+)rZeip!RgNSDI2_8pZFn{GdTp}{g~_> zGUsLaN#iDurn2lAf{lBV8lQ^0`b=L{k9$eF&TC^b^z_TFIwCXhef?G~4o70tU%mOX zQCqv$RyeHEwm@2=VIwO=la-UHI5LVGUOevtePl~ZW`6z$zX)hUUjF_uTx3i{LL!78=8n=EI?%(5gr zRe3_wWT4<1wXqYXv&a|a$EWf6*NQC+eoDS%a`{V7|h=(7P}vA1GXg-RZh3Cuz(!{5l>axuSpstMjuYtX^-J#?j=CZAwX2-B7JE`SI5?Pw zH|)BEj7Qp>C*Dh0E?WS9QFAySTj{iMHadCMWTRY3S0Q_wpc zDsHX%H`fRT@^n!5lI&7)=4a`!aEf<<_oeor$sNyBk3vA>wGH(oCxPiTU~Y6M1=9^^BU|HV1Mv zY;0_dj&6{0N@Vqy3Zqfr<4@1cl>jol!FLI({>|IByDNiTBO?`sh3vw@!fb2>6Kw3! z7En=YYHEPAprxgS^3E~f$llf1Xju39%F19K(FXm^04T5NX=%~tk!vASz<3-38ra_6 z=6Agll1Mlx6WKObQc@D<9w9dQ(`slaTdiF}Ru;mF(2Ho=rRKND4TSxCd-RttUk19K zAW$v#@E_hPF^u6s5n0c8a8m2>{3kx5GOPYWM8m=5gE=bCeI?JJSAJ$nF~?oDIy{9r zP1w%T_;y-yh>!8{R{c)fF*1(`Qv7!EWAwoxXF5*nE%KLXWHI`+-a=2cs7XKV%ZL z)kYB){Px{qeP@%6w|fwl+89qOP^IwK`SL;Ghb9IIuS}u#cL=czAfcyu6~N9p~Fgc7CV6 zfBzmX3fdcTK3jv8fgF*ugOq}aqB(}V>})}&^)WxZyCtB@hr77Z)2lI@cB{B_$OUbtkMG2j{iV&;gYY;A2t z3%b}odh`gSg(bG>x8`0aTg?>}6^V(7OT8Ihd&9=*gRctSXfh`N>Jt+aTX-tM0-G4n zX}Vl-SXl4D)=ZFBq!2H8bIGW~J}KABsqf~4xuzu}VpPDfpPilI=>Pg;sFt3#kr2MO@z0#-w}!(H6w?g=cf)#<9mBTIkV}C6BRbghka1qC+_v!rD;7EL z2S0oQf;{c&kwo%gPELK$|oQT9Ar3Q4xm~uv^Y)ov5s06_qohAGCixo!tsg` z8-tjKOFPH}@tf--PUR9S-h}JnHw>HskM0hN-h@`Q2oSnx@x-UFb>$;Z5a*g+N&%7M#+CNSG z?IzFmR*`b+gc`w?LXl|e;^G;hLfzhkul&WlZK+Sc@KS_*vszO0@I0U&e<+#7|2!v3 zFwg&dd5uLfdPp_=A%fr5jYj