diff --git a/hotpotqa.ipynb b/hotpotqa.ipynb index 0bd48a1..cbf4782 100644 --- a/hotpotqa.ipynb +++ b/hotpotqa.ipynb @@ -9,18 +9,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "import os\n", - "import openai\n", - " \n", - "openai.api_key = os.environ[\"OPENAI_API_KEY\"]\n", + "from openai import OpenAI\n", + "\n", + "client = OpenAI()\n", + "\n", "\n", "def llm(prompt, stop=[\"\\n\"]):\n", - " response = openai.Completion.create(\n", - " model=\"text-davinci-002\",\n", + " response = client.completions.create(\n", + " model=\"gpt-3.5-turbo-instruct\",\n", " prompt=prompt,\n", " temperature=0,\n", " max_tokens=100,\n", @@ -29,12 +30,52 @@ " presence_penalty=0.0,\n", " stop=stop\n", " )\n", - " return response[\"choices\"][0][\"text\"]" + " print(response)\n", + " return response.choices[0].text" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting gym\n", + " Downloading gym-0.26.2.tar.gz (721 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m721.7/721.7 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25ldone\n", + "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", + "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25hCollecting numpy>=1.18.0\n", + " Using cached numpy-1.26.3-cp310-cp310-macosx_11_0_arm64.whl (14.0 MB)\n", + "Collecting cloudpickle>=1.2.0\n", + " Downloading cloudpickle-3.0.0-py3-none-any.whl (20 kB)\n", + "Collecting gym-notices>=0.0.4\n", + " Downloading gym_notices-0.0.8-py3-none-any.whl (3.0 kB)\n", + "Building wheels for collected packages: gym\n", + " Building wheel for gym (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for gym: filename=gym-0.26.2-py3-none-any.whl size=827619 sha256=a6200f73121b6848d45f66df86dd465e296248aedfc07271e74c3ac986446103\n", + " Stored in directory: /Users/satoshi/Library/Caches/pip/wheels/b9/22/6d/3e7b32d98451b4cd9d12417052affbeeeea012955d437da1da\n", + "Successfully built gym\n", + "Installing collected packages: gym-notices, numpy, cloudpickle, gym\n", + "Successfully installed cloudpickle-3.0.0 gym-0.26.2 gym-notices-0.0.8 numpy-1.26.3\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install gym" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -61,7 +102,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -119,7 +160,200 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3687 Question: What movie did actress Irene Jacob complete before the American action crime thriller film directed by Stuart Bird?\n", + "Completion(id='cmpl-8hd82Rcy020ZOdl8YF23RWpJ7VgbO', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=\" I need to search Irene Jacob and Stuart Bird, find the movie Irene Jacob completed before Stuart Bird's film.\\nAction 1: Search[Irene Jacob]\")], created=1705409206, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=31, prompt_tokens=1607, total_tokens=1638))\n", + "Thought 1: I need to search Irene Jacob and Stuart Bird, find the movie Irene Jacob completed before Stuart Bird's film.\n", + "Action 1: Search[Irene Jacob]\n", + "Observation 1: Irène Marie Jacob (born 15 July 1966) is a French-Swiss actress known for her work with Polish film director Krzysztof Kieślowski.[2] She won the 1991 Cannes Film Festival Award for Best Actress for the Kieślowski film The Double Life of Veronique, and was nominated for the BAFTA Award for Best Actress in a Leading Role for her 1994 film Three Colours: Red. Her other film appearances include The Secret Garden (1993), Beyond the Clouds (1995), U.S. Marshals (1998), and Eternity (2016).. Irène Jacob was born in Suresnes, Hauts-de-Seine, a western suburb of Paris.[3] The youngest child with three older brothers, she was raised in a highly educated and intellectual family and environment: her father, Maurice Jacob, was a physicist; her mother, a psychologist; one brother, Francis Jacob, a musician; and her other two brothers, scientists.[4][5] In 1969, at the age of three, Irène moved with her family to Geneva, Switzerland, where she became interested in the arts.. My family was very shy with feelings and never spoke about them, but we evolved a little bit.\n", + "\n", + "Completion(id='cmpl-8hd84tX7QKz0rzDcr0hwt9jowNbwy', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=\" The paragraph mentions several films, but not Stuart Bird's film. I need to look up Stuart Bird.\\nAction 2: Lookup[Stuart Bird]\")], created=1705409208, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=31, prompt_tokens=1913, total_tokens=1944))\n", + "Thought 2: The paragraph mentions several films, but not Stuart Bird's film. I need to look up Stuart Bird.\n", + "Action 2: Lookup[Stuart Bird]\n", + "Observation 2: No more results.\n", + "\n", + "\n", + "Completion(id='cmpl-8hd85okU6TUPrYY7DrYRTxyob3xax', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=\" I cannot find Stuart Bird's film. Maybe I can search for the movie Irene Jacob completed before Stuart Bird's film.\\nAction 3: Search[The Double Life of Veronique]\")], created=1705409209, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=38, prompt_tokens=1957, total_tokens=1995))\n", + "Thought 3: I cannot find Stuart Bird's film. Maybe I can search for the movie Irene Jacob completed before Stuart Bird's film.\n", + "Action 3: Search[The Double Life of Veronique]\n", + "Observation 3: The Double Life of Veronique (French: La double vie de Véronique, Polish: Podwójne życie Weroniki) is a 1991 drama film directed by Krzysztof Kieślowski and starring Irène Jacob. Written by Kieślowski and Krzysztof Piesiewicz, the film explores the themes of identity, love, and human intuition through the characters of Weronika, a Polish choir soprano, and her double, Véronique, a French music teacher. Despite not knowing each other, the two women share a mysterious and emotional bond that transcends language and geography.. The Double Life of Véronique was Kieślowski's first film to be produced partly outside his native Poland.[1] It won the Prize of the Ecumenical Jury and the FIPRESCI Prize at the 1991 Cannes Film Festival, as well as the Best Actress Award for Jacob.[2] Although selected as the Polish entry for the Best Foreign Language Film at the 64th Academy Awards, it was not accepted as a nominee.[3]. In 1968, a Polish girl glimpses the winter stars, while in France, another girl witnesses the first spring leaf.\n", + "\n", + "Completion(id='cmpl-8hd87Gf3dJCX7PkQKj8MIIGp0F1ZW', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=\" The Double Life of Veronique was completed before Stuart Bird's film. So the answer is The Double Life of Veronique.\\nAction 4: Finish[The Double Life of Veronique]\")], created=1705409211, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=41, prompt_tokens=2263, total_tokens=2304))\n", + "Thought 4: The Double Life of Veronique was completed before Stuart Bird's film. So the answer is The Double Life of Veronique.\n", + "Action 4: Finish[The Double Life of Veronique]\n", + "Observation 4: Episode finished, reward = 0\n", + "\n", + "\n", + "{'steps': 4, 'answer': 'The Double Life of Veronique', 'gt_answer': 'Beyond the Clouds', 'question_idx': 3687, 'reward': False, 'em': False, 'f1': 0} \n", + "\n", + "0 1 0.0 6.15775990486145\n", + "-----------\n", + "\n", + "6238 Question: Who created the show with Wendy Schaal doing the voice of Francine?\n", + "Completion(id='cmpl-8hd88qwkfKBCQXZi4hIWo36CoytQE', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' I need to search Wendy Schaal and Francine, find the show they both worked on, then find who created the show.\\nAction 1: Search[Wendy Schaal]')], created=1705409212, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=37, prompt_tokens=1603, total_tokens=1640))\n", + "Thought 1: I need to search Wendy Schaal and Francine, find the show they both worked on, then find who created the show.\n", + "Action 1: Search[Wendy Schaal]\n", + "Observation 1: Wendy Schaal (born July 2, 1954)[1] is an American actress known for her work in Joe Dante films such as Innerspace, The 'Burbs, and Small Soldiers. Her other film credits include starring in films such as Where the Boys Are '84, Creature, Going Under, and Munchies. She had many guest roles in television series of the mid-1980s, most notably as Marilyn Kelsy in Airwolf. Since 2005 she has primarily worked in voice acting, most notably voicing Francine Smith in the animated comedy television series American Dad!. Schaal was born in Chicago, Illinois, the daughter of Lois Schaal (née Treacy) and actor Richard Schaal.[1] Schaal's father was married to actress Valerie Harper from 1964 to 1978, during which time Harper was her stepmother.[2] From birth until she was five, Schaal lived with her parents in Crete, Illinois, at which time she moved with her mother to Newport Beach, California after her parents divorced.[3] Schaal studied acting with Viola Spolin in Chicago when she was nine years old, later moving to Wisconsin and then California when she was 11.\n", + "\n", + "Completion(id='cmpl-8hd8A8WQW56krWWXReCdQ58TStZTG', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' Wendy Schaal is an actress known for her work in Joe Dante films and voice acting. I need to search Francine next and find the show they both worked on.\\nAction 2: Search[Francine]')], created=1705409214, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=43, prompt_tokens=1902, total_tokens=1945))\n", + "Thought 2: Wendy Schaal is an actress known for her work in Joe Dante films and voice acting. I need to search Francine next and find the show they both worked on.\n", + "Action 2: Search[Francine]\n", + "Observation 2: Francine is a female given name. The name is of French origin. The name Francine was most popular in France itself during the 1940s (Besnard & Desplanques 2003), and was well used in the United States during the 1940s and 1950s (Evans 2006). Short forms are Frannie, Frans and Fran.. Main pageContentsCurrent eventsRandom articleAbout WikipediaContact usDonate.\n", + "\n", + "Completion(id='cmpl-8hd8CPED50UDpMliyfOD2UzjMgOMT', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' Francine is a female given name, so it is not the show they both worked on. I need to look up Francine Smith, the character Wendy Schaal voices.\\nAction 3: Search[Francine Smith]')], created=1705409216, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=45, prompt_tokens=2046, total_tokens=2091))\n", + "Thought 3: Francine is a female given name, so it is not the show they both worked on. I need to look up Francine Smith, the character Wendy Schaal voices.\n", + "Action 3: Search[Francine Smith]\n", + "Observation 3: Francine Lee Smith (formerly Ling; née Dawson) is a fictional character on the American animated sitcom American Dad! [1] She is the wife of the main character, Stan Smith and the mother of Hayley and Steve Smith. Francine is voiced by Wendy Schaal.. Early in the series, Francine is often seen as the voice of reason in her dysfunctional family, adopting the stereotypical role of the long-suffering wife or doting mother. However, as the show progresses, her backstory develops with allusions to unconventional and surreal activities, including working for strip clubs, underground fighting rings, cult memberships, multiple murders, and substance abuse.. In the series, Francine was born to a wealthy couple from South Carolina, Francine's birth parents – Nicholas and Cassandra Dawson – gave her up when she was a baby so they could be upgraded to first class on a flight in \"Big Trouble in Little Langley\".\n", + "\n", + "Completion(id='cmpl-8hd8FN0zVgyZiZKxolWSNM9dSL3aU', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' Francine Smith is a character on American Dad! and Wendy Schaal voices her. I need to find who created the show.\\nAction 4: Finish[Seth MacFarlane]')], created=1705409219, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=37, prompt_tokens=2288, total_tokens=2325))\n", + "Thought 4: Francine Smith is a character on American Dad! and Wendy Schaal voices her. I need to find who created the show.\n", + "Action 4: Finish[Seth MacFarlane]\n", + "Observation 4: Episode finished, reward = 1\n", + "\n", + "\n", + "{'steps': 4, 'answer': 'Seth MacFarlane', 'gt_answer': 'Seth MacFarlane', 'question_idx': 6238, 'reward': True, 'em': True, 'f1': 1.0} \n", + "\n", + "1 2 0.5 7.033779978752136\n", + "-----------\n", + "\n", + "5388 Question: What objects were carried into battle by these naval ships for qhich the QF 6-pounder Hotchkiss were introduced to defend against?\n", + "Completion(id='cmpl-8hd8GSn918ccASqeYyEb2gCWSlE0s', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' I need to search QF 6-pounder Hotchkiss, find the naval ships it was introduced to defend against, then find the objects carried into battle by those ships.\\nAction 1: Search[QF 6-pounder Hotchkiss]')], created=1705409220, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=52, prompt_tokens=1618, total_tokens=1670))\n", + "Thought 1: I need to search QF 6-pounder Hotchkiss, find the naval ships it was introduced to defend against, then find the objects carried into battle by those ships.\n", + "Action 1: Search[QF 6-pounder Hotchkiss]\n", + "Observation 1: The Ordnance QF Hotchkiss 6 pounder gun Mk I and Mk II or QF 6 pounder 8 cwt were a family of long-lived light 57 mm naval guns introduced in 1885 to defend against new, small and fast vessels such as torpedo boats and later submarines. There were many variants produced, often under license which ranged in length from 40 to 58 calibers, but 40 caliber was the most common version.. 6-pounders were widely used by the navies of a number of nations and often used by both sides in a conflict. Due to advances in torpedo delivery and performance, 6-pounder guns were rapidly made obsolete and were replaced with larger guns aboard most larger warships. This led to their being used ashore during World War I as coastal defense guns, the first tank guns and as anti-aircraft guns, whether on improvised or specialized HA/LA mounts.\n", + "\n", + "Completion(id='cmpl-8hd8Ircs1kfqW9UpIp8wzjGCW7rxn', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' The QF 6-pounder Hotchkiss was introduced to defend against new, small and fast vessels such as torpedo boats and later submarines. I need to find the objects carried into battle by these naval ships.\\nAction 2: Lookup[objects carried into battle]')], created=1705409222, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=54, prompt_tokens=1870, total_tokens=1924))\n", + "Thought 2: The QF 6-pounder Hotchkiss was introduced to defend against new, small and fast vessels such as torpedo boats and later submarines. I need to find the objects carried into battle by these naval ships.\n", + "Action 2: Lookup[objects carried into battle]\n", + "Observation 2: No more results.\n", + "\n", + "\n", + "Completion(id='cmpl-8hd8JjpmezmTiLdZ4ylb07flTlD3Z', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' Since there are no results for objects carried into battle, I need to search for the specific naval ships that used the QF 6-pounder Hotchkiss.\\nAction 3: Search[naval ships using QF 6-pounder Hotchkiss]')], created=1705409223, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=53, prompt_tokens=1937, total_tokens=1990))\n", + "Thought 3: Since there are no results for objects carried into battle, I need to search for the specific naval ships that used the QF 6-pounder Hotchkiss.\n", + "Action 3: Search[naval ships using QF 6-pounder Hotchkiss]\n", + "Observation 3: Could not find naval ships using QF 6-pounder Hotchkiss. Similar: ['QF 6-pounder Hotchkiss', 'QF 3-pounder Hotchkiss', 'QF 6-pounder Nordenfelt', 'QF 3-pounder Vickers', 'QF 2-pounder naval gun'].\n", + "\n", + "Completion(id='cmpl-8hd8MaA6ba63DsN7I051EY8swyhLO', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' Since there are no results for naval ships using QF 6-pounder Hotchkiss, I need to search for the specific naval ships that used the QF 6-pounder Hotchkiss.\\nAction 4: Search[naval ships using QF 6-pounder Hotchkiss]')], created=1705409226, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=61, prompt_tokens=2071, total_tokens=2132))\n", + "Thought 4: Since there are no results for naval ships using QF 6-pounder Hotchkiss, I need to search for the specific naval ships that used the QF 6-pounder Hotchkiss.\n", + "Action 4: Search[naval ships using QF 6-pounder Hotchkiss]\n", + "Observation 4: Could not find naval ships using QF 6-pounder Hotchkiss. Similar: ['QF 6-pounder Hotchkiss', 'QF 3-pounder Hotchkiss', 'QF 6-pounder Nordenfelt', 'QF 3-pounder Vickers', 'QF 2-pounder naval gun'].\n", + "\n", + "Completion(id='cmpl-8hd8PiPEbKgZcEisX4kOwgdd3g6eq', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' Since there are no results for naval ships using QF 6-pounder Hotchkiss, I need to search for the specific naval ships that used the QF 6-pounder Hotchkiss.\\nAction 5: Search[naval ships using QF 6-pounder Hotchkiss]')], created=1705409229, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=61, prompt_tokens=2213, total_tokens=2274))\n", + "Thought 5: Since there are no results for naval ships using QF 6-pounder Hotchkiss, I need to search for the specific naval ships that used the QF 6-pounder Hotchkiss.\n", + "Action 5: Search[naval ships using QF 6-pounder Hotchkiss]\n", + "Observation 5: Could not find naval ships using QF 6-pounder Hotchkiss. Similar: ['QF 6-pounder Hotchkiss', 'QF 3-pounder Hotchkiss', 'QF 6-pounder Nordenfelt', 'QF 3-pounder Vickers', 'QF 2-pounder naval gun'].\n", + "\n", + "Completion(id='cmpl-8hd8SQH89VV4CGts5fwUwYtrx49Yb', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' Since there are no results for naval ships using QF 6-pounder Hotchkiss, I need to search for the specific naval ships that used the QF 6-pounder Hotchkiss.\\nAction 6: Search[naval ships using QF 6-pounder Hotchkiss]')], created=1705409232, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=61, prompt_tokens=2355, total_tokens=2416))\n", + "Thought 6: Since there are no results for naval ships using QF 6-pounder Hotchkiss, I need to search for the specific naval ships that used the QF 6-pounder Hotchkiss.\n", + "Action 6: Search[naval ships using QF 6-pounder Hotchkiss]\n", + "Observation 6: Could not find naval ships using QF 6-pounder Hotchkiss. Similar: ['QF 6-pounder Hotchkiss', 'QF 3-pounder Hotchkiss', 'QF 6-pounder Nordenfelt', 'QF 3-pounder Vickers', 'QF 2-pounder naval gun'].\n", + "\n", + "Completion(id='cmpl-8hd8UybedvjJ9EsnDaipPgBhhsYp0', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' Since there are no results for naval ships using QF 6-pounder Hotchkiss, I need to search for the specific naval ships that used the QF 6-pounder Hotchkiss.\\nAction 7: Search[naval ships using QF 6-pounder Hotchkiss]')], created=1705409234, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=61, prompt_tokens=2497, total_tokens=2558))\n", + "Thought 7: Since there are no results for naval ships using QF 6-pounder Hotchkiss, I need to search for the specific naval ships that used the QF 6-pounder Hotchkiss.\n", + "Action 7: Search[naval ships using QF 6-pounder Hotchkiss]\n", + "Observation 7: Could not find naval ships using QF 6-pounder Hotchkiss. Similar: ['QF 6-pounder Hotchkiss', 'QF 3-pounder Hotchkiss', 'QF 6-pounder Nordenfelt', 'QF 3-pounder Vickers', 'QF 2-pounder naval gun'].\n", + "\n", + "{'steps': 8, 'answer': '', 'gt_answer': 'torpedoes', 'question_idx': 5388, 'reward': False, 'em': False, 'f1': 0} \n", + "\n", + "1 3 0.3333333333333333 10.564918677012125\n", + "-----------\n", + "\n", + "3522 Question: Who is older Danny Green or James Worthy?\n", + "Completion(id='cmpl-8hd8Xus3CzSkP8F1z8cfhp0imTVoB', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' I need to search Danny Green and James Worthy, find their ages, then find who is older.\\nAction 1: Search[Danny Green]')], created=1705409237, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=30, prompt_tokens=1598, total_tokens=1628))\n", + "Thought 1: I need to search Danny Green and James Worthy, find their ages, then find who is older.\n", + "Action 1: Search[Danny Green]\n", + "Observation 1: Could not find [Danny Green]. Similar: ['Danny Green (basketball)', 'Danny Green (boxer)', 'Danny Green (actor)', 'Daniel Green', 'List of NBA players with most championships'].\n", + "\n", + "Completion(id='cmpl-8hd8bFlwDOesS0xid7uApZsNQYjGV', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' To find their ages, I can search Danny Green (basketball) and James Worthy.\\nAction 2: Search[Danny Green (basketball)]')], created=1705409241, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=31, prompt_tokens=1680, total_tokens=1711))\n", + "Thought 2: To find their ages, I can search Danny Green (basketball) and James Worthy.\n", + "Action 2: Search[Danny Green (basketball)]\n", + "Observation 2: Daniel Richard Green (born June 22, 1987) is an American professional basketball player who last played for the Philadelphia 76ers of the National Basketball Association (NBA). In his NBA career, Green has played for six teams. As of 2020, Green is one of just four players in history to have won NBA championships with three different teams; he won titles with the San Antonio Spurs in 2014, the Toronto Raptors in 2019, and the Los Angeles Lakers in 2020.. Green spent his college basketball career at the University of North Carolina (UNC) from 2005 to 2009. He played in more games (145) and had more wins (123) than any Tar Heel before him.\n", + "\n", + "Completion(id='cmpl-8hd8eF6ZCDmlu38WI4raQ1fuLnRyS', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=\" Danny Green (basketball) was born on June 22, 1987. I need to find James Worthy's age next.\\nAction 3: Search[James Worthy]\")], created=1705409244, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=38, prompt_tokens=1872, total_tokens=1910))\n", + "Thought 3: Danny Green (basketball) was born on June 22, 1987. I need to find James Worthy's age next.\n", + "Action 3: Search[James Worthy]\n", + "Observation 3: James Ager Worthy (born February 27, 1961) is an American former professional basketball player.[1] Nicknamed \"Big Game James\", he played his entire professional career with the Los Angeles Lakers in the National Basketball Association (NBA). Worthy was a seven-time NBA All-Star, a two-time All-NBA Team member who won three NBA championships and was voted the NBA Finals MVP in 1988. He was named to both the NBA's 50th and 75th anniversary teams.. A standout college basketball player for the North Carolina Tar Heels, the small forward was a consensus first-team All-American and shared national player of the year honors en route to leading his team to the 1982 NCAA championship. Named the tournament's Most Outstanding Player, he was selected by the Lakers with the first overall pick of the 1982 NBA draft..\n", + "\n", + "Completion(id='cmpl-8hd8g7wCoPDkIoaBdiBKO1kuHZhwY', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' James Worthy was born on February 27, 1961. Danny Green (basketball) (1987) < James Worthy (1961), so James Worthy is older.\\nAction 4: Finish[James Worthy]')], created=1705409246, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=49, prompt_tokens=2096, total_tokens=2145))\n", + "Thought 4: James Worthy was born on February 27, 1961. Danny Green (basketball) (1987) < James Worthy (1961), so James Worthy is older.\n", + "Action 4: Finish[James Worthy]\n", + "Observation 4: Episode finished, reward = 1\n", + "\n", + "\n", + "{'steps': 4, 'answer': 'James Worthy', 'gt_answer': 'James Worthy', 'question_idx': 3522, 'reward': True, 'em': True, 'f1': 1.0} \n", + "\n", + "2 4 0.5 10.402567744255066\n", + "-----------\n", + "\n", + "3824 Question: Author David Chanoff has collaborated with a U.S. Navy admiral who served as the ambassador to the United Kingdom under which President?\n", + "Completion(id='cmpl-8hd8hqejQ5Pi6KTNqfr7dAoVIdWSG', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' I need to search David Chanoff, find his collaborations, then find the U.S. Navy admiral he collaborated with.\\nAction 1: Search[David Chanoff]')], created=1705409247, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=35, prompt_tokens=1615, total_tokens=1650))\n", + "Thought 1: I need to search David Chanoff, find his collaborations, then find the U.S. Navy admiral he collaborated with.\n", + "Action 1: Search[David Chanoff]\n", + "Observation 1: David Chanoff is a noted author of non-fiction work. His work has typically involved collaborations with the principal protagonist of the work concerned. His collaborators have included; Augustus A. White, Joycelyn Elders, Đoàn Văn Toại, William J. Crowe, Ariel Sharon, Kenneth Good and Felix Zandman.\n", + "\n", + "Completion(id='cmpl-8hd8jgrW4JpdT9m5ynmgOyqoCl5Xr', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' David Chanoff has collaborated with a U.S. Navy admiral. I need to search U.S. Navy admiral next and find the admiral who served as the ambassador to the United Kingdom.\\nAction 2: Search[U.S. Navy admiral]')], created=1705409249, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=52, prompt_tokens=1729, total_tokens=1781))\n", + "Thought 2: David Chanoff has collaborated with a U.S. Navy admiral. I need to search U.S. Navy admiral next and find the admiral who served as the ambassador to the United Kingdom.\n", + "Action 2: Search[U.S. Navy admiral]\n", + "Observation 2: Could not find U.S. Navy admiral. Similar: ['Admiral of the Navy', 'Fleet admiral (United States)', 'Admiral (United States)', 'List of United States Navy four-star admirals', 'Admiral Strauss'].\n", + "\n", + "Completion(id='cmpl-8hd8mK1xL9nMZHHQHnXxbzysTC50A', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' I need to instead search U.S. Navy admiral who served as the ambassador to the United Kingdom.\\nAction 3: Search[U.S. Navy admiral who served as the ambassador to the United Kingdom]')], created=1705409252, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=42, prompt_tokens=1845, total_tokens=1887))\n", + "Thought 3: I need to instead search U.S. Navy admiral who served as the ambassador to the United Kingdom.\n", + "Action 3: Search[U.S. Navy admiral who served as the ambassador to the United Kingdom]\n", + "Observation 3: Could not find U.S. Navy admiral who served as the ambassador to the United Kingdom. Similar: ['List of United States Navy four-star admirals', 'Raymond A. Spruance (redirect from Vice Admiral Spruance)', 'Kenneth Braithwaite (category United States Navy rear admirals)', 'United States Secretary of the Navy', 'First Navy Jack'].\n", + "\n", + "Completion(id='cmpl-8hd8pA9K1nJxYTlzbNJfCDpEQhNjv', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' I need to instead search U.S. Navy admiral who served as the ambassador to the United Kingdom under which President.\\nAction 4: Search[U.S. Navy admiral who served as the ambassador to the United Kingdom under which President]')], created=1705409255, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=48, prompt_tokens=1975, total_tokens=2023))\n", + "Thought 4: I need to instead search U.S. Navy admiral who served as the ambassador to the United Kingdom under which President.\n", + "Action 4: Search[U.S. Navy admiral who served as the ambassador to the United Kingdom under which President]\n", + "Observation 4: Could not find U.S. Navy admiral who served as the ambassador to the United Kingdom under which President. Similar: ['William J. Crowe (category Ambassadors of the United States to the United Kingdom)', 'Raymond A. Spruance (redirect from Vice Admiral Spruance)', 'United States Secretary of the Navy', 'United Kingdom–United States relations', 'Franklin D. Roosevelt (redirect from 32nd U.S.A. President)'].\n", + "\n", + "Completion(id='cmpl-8hd8tnKXPKswcN8uC38pbeqdqrA4V', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=\" I need to instead search U.S. Navy admiral who served as the ambassador to the United Kingdom under which President. I can also try searching for David Chanoff's collaborations with U.S. Navy admirals.\\nAction 5: Search[David Chanoff's collaborations with U.S. Navy admirals]\")], created=1705409259, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=62, prompt_tokens=2128, total_tokens=2190))\n", + "Thought 5: I need to instead search U.S. Navy admiral who served as the ambassador to the United Kingdom under which President. I can also try searching for David Chanoff's collaborations with U.S. Navy admirals.\n", + "Action 5: Search[David Chanoff's collaborations with U.S. Navy admirals]\n", + "Observation 5: Could not find David Chanoff's collaborations with U.S. Navy admirals. Similar: ['Japanese war crimes (category Imperial Japanese Navy)'].\n", + "\n", + "Completion(id='cmpl-8hd8wWK3SRQoBRnKkTlmasLPx03Vk', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=\" I need to instead search David Chanoff's collaborations with U.S. Navy admirals who served as ambassadors to the United Kingdom.\\nAction 6: Search[David Chanoff's collaborations with U.S. Navy admirals who served as ambassadors to the United Kingdom]\")], created=1705409262, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=53, prompt_tokens=2229, total_tokens=2282))\n", + "Thought 6: I need to instead search David Chanoff's collaborations with U.S. Navy admirals who served as ambassadors to the United Kingdom.\n", + "Action 6: Search[David Chanoff's collaborations with U.S. Navy admirals who served as ambassadors to the United Kingdom]\n", + "Observation 6: Could not find David Chanoff's collaborations with U.S. Navy admirals who served as ambassadors to the United Kingdom. Similar: ['Japanese war crimes (redirect from Japanese war crimes in the Pacific)'].\n", + "\n", + "Completion(id='cmpl-8hd8yhVOK9CnoA1dLUkhXWh8tDkc7', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=\" I need to instead search David Chanoff's collaborations with U.S. Navy admirals who served as ambassadors to the United Kingdom under which President.\\nAction 7: Search[David Chanoff's collaborations with U.S. Navy admirals who served as ambassadors to the United Kingdom under which President]\")], created=1705409264, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=59, prompt_tokens=2333, total_tokens=2392))\n", + "Thought 7: I need to instead search David Chanoff's collaborations with U.S. Navy admirals who served as ambassadors to the United Kingdom under which President.\n", + "Action 7: Search[David Chanoff's collaborations with U.S. Navy admirals who served as ambassadors to the United Kingdom under which President]\n", + "Observation 7: Could not find David Chanoff's collaborations with U.S. Navy admirals who served as ambassadors to the United Kingdom under which President. Similar: ['Japanese war crimes (redirect from Japanese war crimes in the Pacific)'].\n", + "\n", + "{'steps': 8, 'answer': '', 'gt_answer': 'Bill Clinton', 'question_idx': 3824, 'reward': False, 'em': False, 'f1': 0} \n", + "\n", + "2 5 0.4 12.337999200820922\n", + "-----------\n", + "\n", + "2866 Question: What was the first year a scientific journal published by an organization located in the Bronx was published?\n", + "Completion(id='cmpl-8hd91FZOgGHfC72wxh7NsBxNrKKGA', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' I need to search scientific journals published by organizations located in the Bronx, find the first year they were published, then find the first year a scientific journal published by an organization located in the Bronx was published.\\nAction 1: Search[scientific journals published by organizations located in the Bronx]')], created=1705409267, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=57, prompt_tokens=1607, total_tokens=1664))\n", + "Thought 1: I need to search scientific journals published by organizations located in the Bronx, find the first year they were published, then find the first year a scientific journal published by an organization located in the Bronx was published.\n", + "Action 1: Search[scientific journals published by organizations located in the Bronx]\n", + "Observation 1: Could not find scientific journals published by organizations located in the Bronx. Similar: ['Bronx High School of Science', 'Bronx Zoo', 'Pelham Bay Park (redirect from The Bronx Victory Column & Memorial Grove)', 'New York Botanical Garden (redirect from Bronx Botanical Garden)', 'New York University (redirect from NYU in London)'].\n", + "\n", + "Completion(id='cmpl-8hd94XBrX4UlMSv30C4jXaiUUjqRs', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' I need to search for a specific scientific journal published by an organization located in the Bronx. Maybe I can search for \"Bronx scientific journal\".\\nAction 2: Search[Bronx scientific journal]')], created=1705409270, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=40, prompt_tokens=1745, total_tokens=1785))\n", + "Thought 2: I need to search for a specific scientific journal published by an organization located in the Bronx. Maybe I can search for \"Bronx scientific journal\".\n", + "Action 2: Search[Bronx scientific journal]\n", + "Observation 2: Could not find Bronx scientific journal. Similar: ['Bronx High School of Science', 'Bronx Zoo', 'Scientific racism', 'List of Bronx High School of Science alumni', 'List of people from the Bronx'].\n", + "\n", + "Completion(id='cmpl-8hd97CVgm6qgzMZ0GsIPKSFPrZjoo', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' I need to search for a specific scientific journal published by an organization located in the Bronx. Maybe I can search for \"Bronx scientific organization\".\\nAction 3: Search[Bronx scientific organization]')], created=1705409273, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=40, prompt_tokens=1840, total_tokens=1880))\n", + "Thought 3: I need to search for a specific scientific journal published by an organization located in the Bronx. Maybe I can search for \"Bronx scientific organization\".\n", + "Action 3: Search[Bronx scientific organization]\n", + "Observation 3: Could not find Bronx scientific organization. Similar: ['Bronx High School of Science', 'Scientific racism', 'Bronx Zoo', 'Wildlife Conservation Society (category Non-profit organizations based in the Bronx)', 'List of Bronx High School of Science alumni'].\n", + "\n", + "Completion(id='cmpl-8hd9AdbDBI3w4s9BtrwB0LvGcEhPw', choices=[CompletionChoice(finish_reason='stop', index=0, logprobs=None, text=' I need to search for a specific scientific journal published by an organization located in the Bronx. Maybe I can search for \"Bronx scientific organization journal\".\\nAction 4: Search[Bronx scientific organization journal]')], created=1705409276, model='gpt-3.5-turbo-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=42, prompt_tokens=1942, total_tokens=1984))\n", + "Thought 4: I need to search for a specific scientific journal published by an organization located in the Bronx. Maybe I can search for \"Bronx scientific organization journal\".\n", + "Action 4: Search[Bronx scientific organization journal]\n", + "Observation 4: Could not find Bronx scientific organization journal. Similar: ['Bronx High School of Science', 'List of Bronx High School of Science alumni', 'Bronx Zoo', 'Scientific racism', 'Science fair (redirect from Scientific fair)'].\n", + "\n" + ] + } + ], "source": [ "import random\n", "import time\n", @@ -137,11 +371,18 @@ " print('-----------')\n", " print()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -155,9 +396,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.10.13" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }