Skip to content

Commit c2be8c4

Browse files
ziltozilto
zilto
authored and
zilto
committed
reviews incorporated
1 parent e031082 commit c2be8c4

File tree

3 files changed

+55
-16
lines changed

3 files changed

+55
-16
lines changed

contrib/hamilton/contrib/user/zilto/llm_generate_code/README.md

+23-4
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,34 @@ This module uses the OpenAI completion API to generate code.
44

55
For any language, you can request `generated_code` to get the generated response. If you are generating Python code, you can execute it in a subprocess by requesting `execution_output` and `execution_error`.
66

7-
# Configuration Options
8-
## Config.when
7+
## Example
8+
```python
9+
from hamilton import driver
10+
import __init__ as llm_generate_code
11+
12+
dr = driver.Builder().with_modules(llm_generate_code).build()
13+
14+
dr.execute(
15+
["execution_output", "execution_error"],
16+
inputs=dict(
17+
query="Retrieve the primary type from a `typing.Annotated` object`",
18+
)
19+
)
20+
```
21+
22+
## Configuration Options
23+
### Config.when
924
This module doesn't receive configurations.
1025

11-
## Inputs
26+
### Inputs
1227
- `query`: The query for which you want code generated.
1328
- `api_key`: Set the OpenAI API key to use. If None, read the environment variable `OPENAI_API_KEY`
1429
- `code_language`: Set the code language to generate the reponse in. Defaults to `python`
1530

16-
## Overrides
31+
### Overrides
1732
- `prompt_template_to_generate_code`: Create a new prompt template with the fields `query` and `code_language`.
1833
- `prompt_to_generate_code`: Manually provide a prompt to generate Python code
34+
35+
## Extension / Limitations
36+
- Executing arbitrary generated code is a security risk. Proceed with caution.
37+
- You need to manually install dependencies for your generated code to be executed (i.e., you need to `pip install pandas` yourself)

contrib/hamilton/contrib/user/zilto/llm_generate_code/__init__.py

+31-11
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,18 @@
1414

1515

1616
def llm_client(api_key: Optional[str] = None) -> openai.OpenAI:
17-
"""Create an OpenAI client"""
17+
"""Create an OpenAI client."""
1818
if api_key is None:
1919
api_key = os.environ.get("OPENAI_API_KEY")
2020

2121
return openai.OpenAI(api_key=api_key)
2222

2323

2424
def prompt_template_to_generate_code() -> str:
25+
"""Prompt template to generate code.
26+
27+
It must include the fields `code_language` and `query`.
28+
"""
2529
return """Write some {code_language} code to solve the user's problem.
2630
2731
Return only python code in Markdown format, e.g.:
@@ -40,36 +44,47 @@ def prompt_template_to_generate_code() -> str:
4044
def prompt_to_generate_code(
4145
prompt_template_to_generate_code: str, query: str, code_language: str = "python"
4246
) -> str:
47+
"""Fill the prompt template with the code language and the user query."""
4348
return prompt_template_to_generate_code.format(
4449
query=query,
4550
code_language=code_language,
4651
)
4752

4853

4954
def response_generated_code(llm_client: openai.OpenAI, prompt_to_generate_code: str) -> str:
55+
"""Call the OpenAI API completion endpoint with the prompt to generate code."""
5056
response = llm_client.completions.create(
5157
model="gpt-3.5-turbo-instruct",
5258
prompt=prompt_to_generate_code,
5359
)
5460
return response.choices[0].text
5561

5662

57-
def generated_code(response_generated_code: str) -> str:
58-
_, _, lower_part = response_generated_code.partition("```python")
63+
def parsed_generated_code(response_generated_code: str, code_language: str = "python") -> str:
64+
"""Retrieve the code section from the generated text."""
65+
_, _, lower_part = response_generated_code.partition(f"```{code_language}")
5966
code_part, _, _ = lower_part.partition("```")
6067
return code_part
6168

6269

63-
def code_prepared_for_execution(generated_code: str, code_language: str = "python") -> str:
70+
def code_prepared_for_execution(parsed_generated_code: str, code_language: str = "python") -> str:
71+
"""If code is Python, append to it statements prepare it to be run in a subprocess.
72+
73+
We collect all local variables in a directory and filter out Python builtins to keep
74+
only the variables from the generated code. print() is used to send string data from
75+
the subprocess back to the parent proceess via its `stdout`.
76+
"""
77+
6478
if code_language != "python":
6579
raise ValueError("Can only execute the generated code if `code_language` = 'python'")
6680

67-
code_to_get_vars = """
68-
excluded_vars = { 'excluded_vars', '__builtins__', '__annotations__'} | set(dir(__builtins__))
69-
local_vars = {k:v for k,v in locals().items() if k not in excluded_vars}
70-
print(local_vars)
71-
"""
72-
return generated_code + code_to_get_vars
81+
code_to_get_vars = (
82+
"excluded_vars = { 'excluded_vars', '__builtins__', '__annotations__'} | set(dir(__builtins__))\n"
83+
"local_vars = {k:v for k,v in locals().items() if k not in excluded_vars}\n"
84+
"print(local_vars)"
85+
)
86+
87+
return parsed_generated_code + code_to_get_vars
7388

7489

7590
@extract_fields(
@@ -78,7 +93,12 @@ def code_prepared_for_execution(generated_code: str, code_language: str = "pytho
7893
execution_error=str,
7994
)
8095
)
81-
def execute_output(code_prepared_for_execution: str) -> dict:
96+
def executed_output(code_prepared_for_execution: str) -> dict:
97+
"""Execute the generated Python code + appended utilities in a subprocess.
98+
99+
The output and errors from the code are collected as strings. Executing
100+
the code in a subprocess provides isolation, but isn't a security guarantee.
101+
"""
82102
process = subprocess.Popen(
83103
["python", "-c", code_prepared_for_execution],
84104
stdout=subprocess.PIPE,
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
22
"schema": "1.0",
3-
"use_case_tags": ["LLM", "OpenAI"],
3+
"use_case_tags": ["LLM", "OpenAI", "code generation"],
44
"secondary_tags": {}
55
}

0 commit comments

Comments
 (0)