Zoo (#23)

* Zoo Model * Remove optional import zoo * Read model name from zoo model * Logprobs passed through raw response for gold choices Co-authored-by: Simran <emailsimran@gmail.com> Co-authored-by: Dan Fu <danfu@cs.stanford.edu>
2 years ago · 5428afdc58
parent e0a76d1f93
commit 5428afdc58
14 changed files with 410 additions and 55 deletions
--- a/.flake8
+++ b/.flake8
@ -7,5 +7,5 @@
 [flake8]
 exclude = .git
 max-line-length = 88
-ignore = E731, E402, W503, E203
+ignore = E731, E402, W503, E203, PAI100, PAI101, PAI201, PAI202, PAI203
 per-file-ignores = __init__.py:F401, version.py:D100
--- a/manifest/api/app.py
+++ b/manifest/api/app.py
@ -20,6 +20,12 @@ PORT = int(os.environ.get("FLASK_PORT", 5000))
 MODEL_CONSTRUCTORS = {
    "huggingface": HuggingFaceModel,
 }
+try:
+    from manifest.api.models.zoo import ZooModel
+
+    MODEL_CONSTRUCTORS["zoo"] = ZooModel  # type: ignore
+except ImportError:
+    logger.warning("Zoo model not available.")


 def parse_args() -> argparse.Namespace:
@ -31,14 +37,19 @@ def parse_args() -> argparse.Namespace:
        type=str,
        required=True,
        help="Model type used for finding constructor.",
-        choices=["huggingface"],
+        choices=["huggingface", "zoo"],
    )
    parser.add_argument(
-        "--model_name",
+        "--model_name_or_path",
        default=None,
        type=str,
-        required=True,
-        help="Name of model. Used in initialize of model class.",
+        help="Name of model or path to model. Used in initialize of model class.",
+    )
+    parser.add_argument(
+        "--model_config",
+        default=None,
+        type=str,
+        help="Model config. Used in initialize of model class.",
    )
    parser.add_argument(
        "--cache_dir", default=None, type=str, help="Cache directory for models."
@ -79,7 +90,10 @@ def main() -> None:
    """Run main."""
    kwargs = parse_args()
    model_type = kwargs.model_type
-    model_name = kwargs.model_name
+    model_name_or_path = kwargs.model_name_or_path
+    model_config = kwargs.model_config
+    if not model_name_or_path and not model_config:
+        raise ValueError("Must provide model_name_or_path or model_config.")
    use_accelerate = kwargs.use_accelerate_multigpu
    if use_accelerate:
        logger.info("Using accelerate. Overridding --device argument.")
@ -91,7 +105,8 @@ def main() -> None:
    # Global model
    global model
    model = MODEL_CONSTRUCTORS[model_type](
-        model_name,
+        model_name_or_path,
+        model_config=model_config,
        cache_dir=kwargs.cache_dir,
        device=kwargs.device,
        use_accelerate=use_accelerate,
@ -112,9 +127,10 @@ def completions() -> Dict:
    if not isinstance(prompt, str):
        raise ValueError("Prompt must be a str")

-    results = []
+    results_text = []
    for generations in model.generate(prompt, **generation_args):
-        results.append(generations)
+        results_text.append(generations)
+    results = [{"text": r, "text_logprob": None} for r in results_text]
    # transform the result into the openai format
    return OpenAIResponse(results).__dict__()

@ -134,9 +150,10 @@ def choice_logits() -> Dict:
    if not isinstance(gold_choices, list):
        raise ValueError("Gold choices must be a list of string choices")

-    result = model.logits_scoring(prompt, gold_choices, **generation_args)
+    result, score = model.logits_scoring(prompt, gold_choices, **generation_args)
+    results = [{"text": result, "text_logprob": score}]
    # transform the result into the openai format
-    return OpenAIResponse([result]).__dict__()
+    return OpenAIResponse(results).__dict__()


@app.route("/params", methods=["POST"])
--- a/manifest/api/models/huggingface.py
+++ b/manifest/api/models/huggingface.py
@ -1,10 +1,11 @@
 """Huggingface model."""
 import json
 from pathlib import Path
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Tuple

 import torch
 from transformers import (
+    AutoModelForCausalLM,
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    BloomForCausalLM,
@ -25,13 +26,18 @@ MODEL_REGISTRY = {
    "EleutherAI/gpt-neo-2.7B": GPTNeoForCausalLM,
    "EleutherAI/gpt-j-6B": GPTJForCausalLM,
    "EleutherAI/gpt-neox-20b": GPTNeoXForCausalLM,
+    "facebook/opt-125m": OPTForCausalLM,
    "facebook/opt-1.3b": OPTForCausalLM,
    "facebook/opt-2.7b": OPTForCausalLM,
    "facebook/opt-6.7b": OPTForCausalLM,
    "facebook/opt-13b": OPTForCausalLM,
    "facebook/opt-30b": OPTForCausalLM,
    "gpt2": GPT2LMHeadModel,
+    "bigscience/bloom-560m": BloomForCausalLM,
+    "bigscience/bloom-1b7": BloomForCausalLM,
+    "bigscience/bloom-3b": BloomForCausalLM,
    "bigscience/bloom-7b1": BloomForCausalLM,
+    "bigscience/bloom": AutoModelForCausalLM,
    "bigscience/T0pp": AutoModelForSeq2SeqLM,
    "bigscience/T0_3B": AutoModelForSeq2SeqLM,
    "google/t5-xl-lm-adapt": AutoModelForSeq2SeqLM,
@ -117,7 +123,8 @@ class HuggingFaceModel(Model):

    def __init__(
        self,
-        model_name: str,
+        model_name_or_path: str,
+        model_config: str,
        cache_dir: str,
        device: int,
        use_accelerate: bool,
@ -131,7 +138,8 @@ class HuggingFaceModel(Model):
        All arguments will be passed in the request from Manifest.

        Args:
-            model_name: model name string.
+            model_name_or_path: model name string.
+            model_config: model config string.
            cache_dir: cache directory for model.
            device: device to use for model.
            use_accelerate: whether to use accelerate for multi-gpu inference.
@ -142,32 +150,43 @@ class HuggingFaceModel(Model):
        if use_accelerate and use_parallelize:
            raise ValueError("Cannot use both accelerate and parallelize")
        # Check if providing path
-        self.model_path = model_name
+        self.model_path = model_name_or_path
        if Path(self.model_path).exists() and Path(self.model_path).is_dir():
            # Try to find config
            if (Path(self.model_path) / "config.json").exists():
                config = json.load(open(Path(self.model_path) / "config.json"))
-                model_name = config["_name_or_path"]
-        self.model_name = model_name
+                model_name_or_path = config["_name_or_path"]
+        self.model_name = model_name_or_path
        print("Model Name:", self.model_name, "Model Path:", self.model_path)
        try:
-            tokenizer = AutoTokenizer.from_pretrained(model_name)
+            tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name, truncation_side="left"
+            )
        except ValueError:
-            tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
-
+            tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name, truncation_side="left", use_fast=False
+            )
        dtype = torch.float16 if use_fp16 else "auto"
-        try:
-            # Try to explicitely find a fp16 copy (gpt-j-6B for example)
-            model = MODEL_REGISTRY[model_name].from_pretrained(  # type: ignore
+        if self.model_name == "bigscience/bloom":
+            model = MODEL_REGISTRY[self.model_name].from_pretrained(  # type: ignore
                self.model_path,
                cache_dir=cache_dir,
-                revision="float16",
-                torch_dtype=torch.float16,
-            )
-        except Exception:
-            model = MODEL_REGISTRY[model_name].from_pretrained(  # type: ignore
-                self.model_path, cache_dir=cache_dir, torch_dtype=dtype
+                load_in_8bit=True,
+                device_map="auto",
            )
+        else:
+            try:
+                # Try to explicitely find a fp16 copy (gpt-j-6B for example)
+                model = MODEL_REGISTRY[self.model_name].from_pretrained(  # type: ignore
+                    self.model_path,
+                    cache_dir=cache_dir,
+                    revision="float16",
+                    torch_dtype=torch.float16,
+                )
+            except Exception:
+                model = MODEL_REGISTRY[self.model_name].from_pretrained(  # type: ignore
+                    self.model_path, cache_dir=cache_dir, torch_dtype=dtype
+                )
        model.eval()
        print(f"Loaded Model DType {model.dtype}")

@ -175,20 +194,21 @@ class HuggingFaceModel(Model):
        if not self.is_encdec:
            tokenizer.pad_token = tokenizer.eos_token

-        if use_accelerate:
-            self._dispatch_accelerate_model(model, perc_max_gpu_mem_red)
-            device = 0
-        elif use_parallelize:
-            model.parallelize()
-            device = 0
-        else:
-            if device > -1:
-                torch_device = (
-                    torch.device("cpu")
-                    if (device == -1 or not torch.cuda.is_available())
-                    else torch.device(f"cuda:{device}")
-                )
-                model = model.to(torch_device)  # type: ignore
+        if self.model_name != "bigscience/bloom":
+            if use_accelerate:
+                self._dispatch_accelerate_model(model, perc_max_gpu_mem_red)
+                device = 0
+            elif use_parallelize:
+                model.parallelize()
+                device = 0
+            else:
+                if device > -1:
+                    torch_device = (
+                        torch.device("cpu")
+                        if (device == -1 or not torch.cuda.is_available())
+                        else torch.device(f"cuda:{device}")
+                    )
+                    model = model.to(torch_device)  # type: ignore
        self.pipeline = Pipeline(  # type: ignore
            model=model, tokenizer=tokenizer, device=device
        )
@ -258,6 +278,7 @@ class HuggingFaceModel(Model):
        dispatch_model(model, device_map=device_map)
        return

+    @torch.no_grad()
    def generate(self, prompt: str, **kwargs: Any) -> List[str]:
        """
        Generate the prompt from model.
@ -303,9 +324,10 @@ class HuggingFaceModel(Model):
            final_results = [r["generated_text"][start_idx:] for r in result]
        return final_results

+    @torch.no_grad()
    def logits_scoring(
        self, prompt: str, gold_choices: List[str], **kwargs: Any
-    ) -> str:
+    ) -> Tuple[str, float]:
        """
        Given the prompt and gold choices, choose the best choice with max logits.

@ -461,4 +483,4 @@ class HuggingFaceModel(Model):
        if not self.is_encdec:
            seq_log_prob = seq_log_prob * (1 / (seq_token_log_probs != 0).sum(dim=-1))
        prediction = seq_log_prob.argmax(dim=-1).item()
-        return gold_choices[int(prediction)]
+        return gold_choices[int(prediction)], seq_log_prob[int(prediction)].item()
--- a/manifest/api/models/model.py
+++ b/manifest/api/models/model.py
@ -1,20 +1,37 @@
 """Model class."""
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Tuple


 class Model(ABC):
    """Model class."""

    @abstractmethod
-    def __init__(self, model_name: str, **kwargs: Any):
+    def __init__(
+        self,
+        model_name_or_path: str,
+        model_config: str,
+        cache_dir: str,
+        device: int,
+        use_accelerate: bool,
+        use_parallelize: bool,
+        perc_max_gpu_mem_red: float,
+        use_fp16: bool,
+    ):
        """
        Initialize model.

-        kwargs are passed to model as default parameters.
+        All arguments will be passed in the request from Manifest.

        Args:
-            model_name: model name string.
+            model_name_or_path: model name string.
+            model_config: model config string.
+            cache_dir: cache directory for model.
+            device: device to use for model.
+            use_accelerate: whether to use accelerate for multi-gpu inference.
+            use_parallelize: use HF default parallelize
+            perc_max_gpu_mem_red: percent max memory reduction in accelerate
+            use_fp16: use fp16 for model weights.
        """
        raise NotImplementedError()

@ -37,3 +54,19 @@ class Model(ABC):
            list of generated text (list of length 1 for 1 generation).
        """
        raise NotImplementedError()
+
+    @abstractmethod
+    def logits_scoring(
+        self, prompt: str, gold_choices: List[str], **kwargs: Any
+    ) -> Tuple[str, float]:
+        """
+        Given the prompt and gold choices, choose the best choice with max logits.
+
+        Args:
+            prompt: promt to generate from.
+            gold_choices: list of choices to choose from.
+
+        Returns:
+            the returned gold choice and the score.
+        """
+        raise NotImplementedError()
--- a/manifest/api/models/zoo.py
+++ b/manifest/api/models/zoo.py
@ -0,0 +1,94 @@
+"""Zoo model."""
+import os
+import sys
+from typing import Any, Dict, List, Tuple
+
+from manifest.api.models.model import Model
+
+ZOO_PATH = os.environ.get("ZOO_PATH", None)
+if not ZOO_PATH:
+    raise ImportError("ZOO_PATH environment variable not set.")
+sys.path.append(ZOO_PATH)
+
+from src.models.s4_seq import S4LMManifest  # type: ignore
+
+
+class ZooModel(Model):
+    """Zoo model."""
+
+    def __init__(
+        self,
+        model_name_or_path: str,
+        model_config: str,
+        cache_dir: str,
+        device: int,
+        use_accelerate: bool,
+        use_parallelize: bool,
+        perc_max_gpu_mem_red: float,
+        use_fp16: bool,
+    ):
+        """
+        Initialize model.
+
+        All arguments will be passed in the request from Manifest.
+
+        Args:
+            model_name_or_path: model name string.
+            model_config: model config path.
+            cache_dir: cache directory for model.
+            device: device to use for model.
+            use_accelerate: whether to use accelerate for multi-gpu inference.
+            use_parallelize: use HF default parallelize
+            perc_max_gpu_mem_red: percent max memory reduction in accelerate
+            use_fp16: use fp16 for model weights.
+        """
+        # Check if providing path
+        self.model_path = model_name_or_path
+        self.model_config = model_config
+        if not self.model_config:
+            raise ValueError("Must provide model config.")
+        self.model = S4LMManifest(
+            config_path=self.model_config,
+            weights_path=self.model_path,
+        )
+        # Can only load this after the model has been initialized
+        self.model_name = self.model.get_model_name()
+
+    def get_init_params(self) -> Dict:
+        """Return init params to determine what model is being used."""
+        return {
+            "model_name": self.model_name,
+            "model_path": self.model_path,
+            "model_config": self.model_config,
+        }
+
+    def generate(self, prompt: str, **kwargs: Any) -> List[str]:
+        """
+        Generate the prompt from model.
+
+        Outputs must be generated text, not including prompt.
+
+        Args:
+            prompt: promt to generate from.
+
+        Returns:
+            list of generated text (list of length 1 for 1 generation).
+        """
+        print(prompt)
+        final_results = self.model.generate(prompt, **kwargs)
+        return final_results
+
+    def logits_scoring(
+        self, prompt: str, gold_choices: List[str], **kwargs: Any
+    ) -> Tuple[str, float]:
+        """
+        Given the prompt and gold choices, choose the best choice with max logits.
+
+        Args:
+            prompt: promt to generate from.
+            gold_choices: list of choices to choose from.
+
+        Returns:
+            the returned gold choice and the score
+        """
+        raise NotImplementedError()
--- a/manifest/api/response.py
+++ b/manifest/api/response.py
@ -2,13 +2,13 @@

 import time
 import uuid
-from typing import Any, Dict
+from typing import Any, Dict, List


 class OpenAIResponse:
    """OpenAI response."""

-    def __init__(self, results: list) -> None:
+    def __init__(self, results: List[Dict[str, Any]]) -> None:
        """Initialize response."""
        self.results = results
        self.response_id = str(uuid.uuid4())
@ -23,7 +23,8 @@ class OpenAIResponse:
            "model": "flask_model",
            "choices": [
                {
-                    "text": result,
+                    "text": result["text"],
+                    "text_logprob": result["text_logprob"],
                    # TODO: Add in more metadata for HF models
                    # "logprobs": {
                    #     "tokens": result["tokens"],
--- a/manifest/clients/ai21.py
+++ b/manifest/clients/ai21.py
@ -144,3 +144,19 @@ class AI21Client(Client):
            return self.format_response(res.json())

        return _run_completion, request_params
+
+    def get_choice_logit_request(
+        self, query: str, gold_choices: List[str], request_args: Dict[str, Any] = {}
+    ) -> Tuple[Callable[[], Dict], Dict]:
+        """
+        Get request string function for choosing max choices.
+
+        Args:
+            query: query string.
+            gold_choices: choices for model to choose from via max logits.
+
+        Returns:
+            request function that takes no input.
+            request parameters as dict.
+        """
+        raise NotImplementedError("AI21 does not support choice logit request.")
--- a/manifest/clients/client.py
+++ b/manifest/clients/client.py
@ -81,3 +81,20 @@ class Client(ABC):
            request parameters as dict.
        """
        raise NotImplementedError()
+
+    @abstractmethod
+    def get_choice_logit_request(
+        self, query: str, gold_choices: List[str], request_args: Dict[str, Any] = {}
+    ) -> Tuple[Callable[[], Dict], Dict]:
+        """
+        Get request string function for choosing max choices.
+
+        Args:
+            query: query string.
+            gold_choices: choices for model to choose from via max logits.
+
+        Returns:
+            request function that takes no input.
+            request parameters as dict.
+        """
+        raise NotImplementedError()
--- a/manifest/clients/crfm.py
+++ b/manifest/clients/crfm.py
@ -149,3 +149,19 @@ class CRFMClient(Client):
            return self.format_response(request_result)

        return _run_completion, request_params
+
+    def get_choice_logit_request(
+        self, query: str, gold_choices: List[str], request_args: Dict[str, Any] = {}
+    ) -> Tuple[Callable[[], Dict], Dict]:
+        """
+        Get request string function for choosing max choices.
+
+        Args:
+            query: query string.
+            gold_choices: choices for model to choose from via max logits.
+
+        Returns:
+            request function that takes no input.
+            request parameters as dict.
+        """
+        raise NotImplementedError("CRFM does not support choice logit request.")
--- a/manifest/clients/openai.py
+++ b/manifest/clients/openai.py
@ -12,6 +12,8 @@ logger = logging.getLogger(__name__)

 OPENAI_ENGINES = {
    "text-davinci-002",
+    "text-davinci-001",
+    "davinci",
    "text-curie-001",
    "text-babbage-001",
    "text-ada-001",
@ -116,3 +118,19 @@ class OpenAIClient(Client):
                raise e

        return _run_completion, request_params
+
+    def get_choice_logit_request(
+        self, query: str, gold_choices: List[str], request_args: Dict[str, Any] = {}
+    ) -> Tuple[Callable[[], Dict], Dict]:
+        """
+        Get request string function for choosing max choices.
+
+        Args:
+            query: query string.
+            gold_choices: choices for model to choose from via max logits.
+
+        Returns:
+            request function that takes no input.
+            request parameters as dict.
+        """
+        raise NotImplementedError("OpenAI does not support choice logit request.")
--- a/manifest/clients/opt.py
+++ b/manifest/clients/opt.py
@ -86,3 +86,19 @@ class OPTClient(Client):
            return res.json()

        return _run_completion, request_params
+
+    def get_choice_logit_request(
+        self, query: str, gold_choices: List[str], request_args: Dict[str, Any] = {}
+    ) -> Tuple[Callable[[], Dict], Dict]:
+        """
+        Get request string function for choosing max choices.
+
+        Args:
+            query: query string.
+            gold_choices: choices for model to choose from via max logits.
+
+        Returns:
+            request function that takes no input.
+            request parameters as dict.
+        """
+        raise NotImplementedError("OPT does not support choice logit request.")
--- a/manifest/clients/zoo.py
+++ b/manifest/clients/zoo.py
@ -0,0 +1,102 @@
+"""Zoo client."""
+import logging
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+import requests
+
+from manifest.clients.client import Client
+
+logger = logging.getLogger(__name__)
+
+# User param -> (client param, default value)
+ZOO_PARAMS: Dict[str, Tuple[str, str]] = {}
+
+
+class ZooClient(Client):
+    """Zoo client."""
+
+    def connect(
+        self,
+        connection_str: Optional[str] = None,
+        client_args: Dict[str, Any] = {},
+    ) -> None:
+        """
+        Connect to the model.
+
+        Args:
+            connection_str: connection string.
+            client_args: client arguments.
+        """
+        self.host = connection_str.rstrip("/")
+        for key in ZOO_PARAMS:
+            setattr(self, key, client_args.pop(key, ZOO_PARAMS[key][1]))
+        self.model_params = self.get_model_params()
+
+    def close(self) -> None:
+        """Close the client."""
+        pass
+
+    def get_model_params(self) -> Dict:
+        """
+        Get model params.
+
+        By getting model params from the server, we can add to request
+        and make sure cache keys are unique to model.
+
+        Returns:
+            model params.
+        """
+        res = requests.post(self.host + "/params")
+        return res.json()
+
+    def get_model_inputs(self) -> List:
+        """
+        Get allowable model inputs.
+
+        Returns:
+            model inputs.
+        """
+        return list(ZOO_PARAMS.keys())
+
+    def get_request(
+        self, query: str, request_args: Dict[str, Any] = {}
+    ) -> Tuple[Callable[[], Dict], Dict]:
+        """
+        Get request string function.
+
+        Args:
+            query: query string.
+
+        Returns:
+            request function that takes no input.
+            request parameters as dict.
+        """
+        request_params = {"prompt": query}
+        # Zoo is greedy and takes all params
+        # TODO: Once zoo is finalized, fix this
+        for key in list(request_args.keys()):
+            request_params[key] = request_args.pop(key, None)
+        request_params.update(self.model_params)
+
+        def _run_completion() -> Dict:
+            post_str = self.host + "/completions"
+            res = requests.post(post_str, json=request_params)
+            return res.json()
+
+        return _run_completion, request_params
+
+    def get_choice_logit_request(
+        self, query: str, gold_choices: List[str], request_args: Dict[str, Any] = {}
+    ) -> Tuple[Callable[[], Dict], Dict]:
+        """
+        Get request string function for choosing max choices.
+
+        Args:
+            query: query string.
+            gold_choices: choices for model to choose from via max logits.
+
+        Returns:
+            request function that takes no input.
+            request parameters as dict.
+        """
+        raise NotImplementedError("Zoo does not support choice logit request.")
--- a/manifest/manifest.py
+++ b/manifest/manifest.py
@ -12,6 +12,7 @@ from manifest.clients.dummy import DummyClient
 from manifest.clients.huggingface import HuggingFaceClient
 from manifest.clients.openai import OpenAIClient
 from manifest.clients.opt import OPTClient
+from manifest.clients.zoo import ZooClient
 from manifest.prompt import Prompt
 from manifest.response import Response
 from manifest.session import Session
@ -25,6 +26,7 @@ CLIENT_CONSTRUCTORS = {
    "huggingface": HuggingFaceClient,
    "opt": OPTClient,
    "dummy": DummyClient,
+    "zoo": ZooClient,
 }

 CACHE_CONSTRUCTORS = {
@ -83,12 +85,12 @@ class Manifest:
            )
        self.client_name = client_name
        # Must pass kwargs as dict for client "pop" methods removed used arguments
-        self.client = CLIENT_CONSTRUCTORS[client_name](  # type: ignore
-            client_connection, client_args=kwargs
-        )
        self.cache = CACHE_CONSTRUCTORS[cache_name](  # type: ignore
            cache_connection, cache_args=kwargs
        )
+        self.client = CLIENT_CONSTRUCTORS[client_name](  # type: ignore
+            client_connection, client_args=kwargs
+        )
        self.session = Session(session_id)
        if len(kwargs) > 0:
            raise ValueError(f"{list(kwargs.items())} arguments are not recognized.")
--- a/setup.py
+++ b/setup.py
@ -44,6 +44,7 @@ REQUIRED = [
 # What packages are optional?
 EXTRAS = {
    "dev": [
+        "autopep8>=1.6.0",
        "black>=22.3.0",
        "isort>=5.9.3",
        "flake8>=4.0.0",