Skip to content

Commit 7ddcba1

Browse files
rachellimtodor-markovBorisPowerdavecummings
authored
Lots of CLI changes (#22)
* Add CLI option to download files (#34) * Option to check if file has been uploaded in the past before uploading (#33) The check is done based on filename, file purpose and file size * Add fine-tuning hparams directly into the fine-tunes CLI (#35) * update fine_tunes cli use_packing argument (#38) * A file verification and remediation tool. It applies the following validations: - prints the number of examples, and warns if it's lower than 100 - ensures prompt and completion columns are present - optionally removes any additional columns - ensures all completions are non-empty - infers which type of fine-tuning the data is most likely in (classification, conditional generation and open-ended generation) - optionally removes duplicate rows - infers the existence of a common suffix, and if there is none, suggests one for classification and conditional generation - optionally prepends a space to each completion, to make tokenization better - optionally splits into training and validation set for the classification use case - optionally ensures there's an ending string for all completions - optionally lowercases completions or prompts if more than a 1/3 of alphanumeric characters are upper case It interactively asks the user to accept or reject recommendations. If the user is happy, then it saves the modified output file as a jsonl, which is ready for being used in fine-tuning with the printed command. * Completion: remove from kwargs before passing to EngineAPI (#37) * Version bump before pushing to external Co-authored-by: Todor Markov <[email protected]> Co-authored-by: Boris Power <[email protected]> Co-authored-by: Dave Cummings <[email protected]>
1 parent 250c33d commit 7ddcba1

File tree

7 files changed

+973
-23
lines changed

7 files changed

+973
-23
lines changed

bin/openai

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
#!/usr/bin/env python
22
import argparse
3-
import json
43
import logging
5-
import os
64
import sys
75

86
import openai
97
from openai.cli import display_error
10-
from openai.cli import register as api_register
8+
from openai.cli import api_register, tools_register
119

1210
logger = logging.getLogger()
1311
formatter = logging.Formatter("[%(asctime)s] %(message)s")
@@ -40,9 +38,11 @@ def main():
4038
parser.set_defaults(func=help)
4139

4240
subparsers = parser.add_subparsers()
43-
sub = subparsers.add_parser("api", help="Direct API calls")
41+
sub_api = subparsers.add_parser("api", help="Direct API calls")
42+
sub_tools = subparsers.add_parser("tools", help="Client side tools for convenience")
4443

45-
api_register(sub)
44+
api_register(sub_api)
45+
tools_register(sub_tools)
4646

4747
args = parser.parse_args()
4848
if args.verbosity == 1:

openai/api_resources/completion.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def create(cls, *args, **kwargs):
1919
of valid parameters.
2020
"""
2121
start = time.time()
22-
timeout = kwargs.get("timeout", None)
22+
timeout = kwargs.pop("timeout", None)
2323
if kwargs.get("model", None) is None and kwargs.get("engine", None) is None:
2424
raise InvalidRequestError(
2525
"Must provide an 'engine' or 'model' parameter to create a Completion.",

openai/api_resources/file.py

+59
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
from __future__ import absolute_import, division, print_function
22

3+
import json
4+
import os
5+
36
import openai
47
from openai import api_requestor, util
58
from openai.api_resources.abstract import (
@@ -29,3 +32,59 @@ def create(
2932
return util.convert_to_openai_object(
3033
response, api_key, api_version, organization
3134
)
35+
36+
@classmethod
37+
def download(
38+
cls, id, api_key=None, api_base=None, api_version=None, organization=None
39+
):
40+
requestor = api_requestor.APIRequestor(
41+
api_key,
42+
api_base=api_base or openai.file_api_base or openai.api_base,
43+
api_version=api_version,
44+
organization=organization,
45+
)
46+
url = f"{cls.class_url()}/{id}/content"
47+
rbody, rcode, rheaders, _, _ = requestor.request_raw("get", url)
48+
if not 200 <= rcode < 300:
49+
raise requestor.handle_error_response(
50+
rbody, rcode, json.loads(rbody), rheaders, stream_error=False
51+
)
52+
return rbody
53+
54+
@classmethod
55+
def find_matching_files(
56+
cls,
57+
api_key=None,
58+
api_base=None,
59+
api_version=None,
60+
organization=None,
61+
file=None,
62+
purpose=None,
63+
):
64+
if file is None:
65+
raise openai.error.InvalidRequestError(
66+
"'file' is a required property", "file"
67+
)
68+
if purpose is None:
69+
raise openai.error.InvalidRequestError(
70+
"'purpose' is a required property", "purpose"
71+
)
72+
all_files = cls.list(
73+
api_key=api_key,
74+
api_base=api_base or openai.file_api_base or openai.api_base,
75+
api_version=api_version,
76+
organization=organization,
77+
).get("data", [])
78+
matching_files = []
79+
for f in all_files:
80+
if f["purpose"] != purpose:
81+
continue
82+
if not hasattr(file, "name") or f["filename"] != file.name:
83+
continue
84+
file.seek(0, os.SEEK_END)
85+
if f["bytes"] != file.tell():
86+
file.seek(0)
87+
continue
88+
file.seek(0)
89+
matching_files.append(f)
90+
return matching_files

0 commit comments

Comments
 (0)