From 3d05c971f69c01ffa42c2f8691aeb229e253064e Mon Sep 17 00:00:00 2001
From: Reza Yazdani <reyazda@microsoft.com>
Date: Tue, 30 May 2023 15:49:21 -0700
Subject: [PATCH] add falcon test

---
 .../text-generation/test-falcon.py            | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 inference/huggingface/text-generation/test-falcon.py

diff --git a/inference/huggingface/text-generation/test-falcon.py b/inference/huggingface/text-generation/test-falcon.py
new file mode 100644
index 000000000..e3829fcea
--- /dev/null
+++ b/inference/huggingface/text-generation/test-falcon.py
@@ -0,0 +1,30 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
+import transformers
+import torch
+import deepspeed
+import time
+from deepspeed.accelerator import get_accelerator
+
+model = "tiiuae/falcon-40b"
+
+tokenizer = AutoTokenizer.from_pretrained(model, use_fast=True)
+
+
+model = AutoModelForCausalLM.from_pretrained(model, trust_remote_code=True).bfloat16()
+model = deepspeed.init_inference(model, mp_size=4)
+
+
+input_prompt = [
+   "Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:"
+   ]
+
+input_tokens = tokenizer.batch_encode_plus(input_prompt, return_tensors="pt",)
+
+for t in input_tokens:
+    if torch.is_tensor(input_tokens[t]):
+        input_tokens[t] = input_tokens[t].to(get_accelerator().current_device_name())
+input_tokens.pop('token_type_ids')
+
+sequences = model.generate(**input_tokens, min_length=200, max_length=300, do_sample=True)
+
+print(f"Result: {tokenizer.batch_decode(sequences, skip_special_tokens=True)[0]}")