mirror of
https://github.com/runyanjake/memechain.git
synced 2025-10-04 23:57:29 -07:00
Hook up caption_image tool
This commit is contained in:
parent
8ad6808ee5
commit
6b3cb37fad
41
README.md
41
README.md
@ -32,9 +32,48 @@ Pull model
|
||||
```
|
||||
ollama pull llama3
|
||||
```
|
||||
If failing, check status of ollama process:
|
||||
If failing, check status of ollama process (Ubuntu):
|
||||
```
|
||||
sudo service ollama status
|
||||
```
|
||||
Or start the program from search. (MacOS)
|
||||
|
||||
## Results
|
||||
|
||||
### So This Actually Works!
|
||||
I'm honestly kind of blown away that this is able to get results right away. Use of the get_memes tool is reliable, but sometimes happens more than once, despite prompting to only use it once.
|
||||
Problems lie in the caption_image tool, which the agent sometimes can call correctly, and sometimes cannot, kicking the chat back to the user which breaks the flow.
|
||||
However, when the agent is able to call caption_image correctly, we get the results we're after:
|
||||
|
||||
```
|
||||
env➜ memechain git:(main) ✗ python main.py
|
||||
|
||||
> Entering new AgentExecutor chain...
|
||||
Let's get started.
|
||||
|
||||
Thought: To generate an image for the "two buttons" meme, I need to find a template_id that corresponds to this meme. Then, I can use the Caption Image tool to create a new meme with the desired text.
|
||||
|
||||
Action: Get Memes
|
||||
Action Input: None (no input needed)
|
||||
Observation: ID: 181913649, Name: Drake Hotline Bling
|
||||
ID: 87743020, Name: Two Buttons
|
||||
...
|
||||
ID: 398221598, Name: Goose Chase
|
||||
Thought:Thought: Now that I have the list of template_ids and names from Get Memes, I can find the ID for the "two buttons" meme. The name "Two Buttons" matches with the template_id 87743020.
|
||||
|
||||
Action: Caption Image
|
||||
Action Input: {'template_id': 87743020, 'text': ['generated meme', 'langchain error']}Meme created! URL: https://i.imgflip.com/9jt1zu.jpg
|
||||
|
||||
Observation: https://i.imgflip.com/9jt1zu.jpg
|
||||
Thought:I've got the image URL!
|
||||
|
||||
Action: Download Image
|
||||
Action Input: {'url': 'https://i.imgflip.com/9jt1zu.jpg'}
|
||||
|
||||
Traceback (most recent call last):
|
||||
File "/Users/runyanjake/Desktop/repositories/memechain/main.py", line 54, in <module>
|
||||
...
|
||||
File "/Users/runyanjake/Desktop/repositories/memechain/env/lib/python3.13/site-packages/requests/sessions.py", line 792, in get_adapter
|
||||
raise InvalidSchema(f"No connection adapters were found for {url!r}")
|
||||
requests.exceptions.InvalidSchema: No connection adapters were found for "{'url': 'https://i.imgflip.com/9jt1zu.jpg'}"
|
||||
```
|
||||
|
29
main.py
29
main.py
@ -11,30 +11,35 @@ from tools.caption_image import caption_image
|
||||
from tools.download_image import download_image
|
||||
|
||||
system_prompt = """
|
||||
You are an assistant that looks up the numerical template_id of a meme from imgflip.
|
||||
The following tools are available to you:
|
||||
You are an assistant that helps users create memes using the Imgflip API.
|
||||
|
||||
1. get_memes - Does not take any agruments. Returns a list of template_ids (integer) and names (string) which are the titles of the memes that correspond to the template_id.
|
||||
2. caption_image - Given a valid template_id, top text, and bottom text, generates an image with the desired text. Returns the url of the new meme as a string.
|
||||
3. download_image - Given a valid url returned from the caption_image tool, downloads the image we made locally.
|
||||
Your tasks include:
|
||||
1. Searching for the numerical template_id of a requested meme using the "Get Memes" tool.
|
||||
- This tool should only be used once per request.
|
||||
- If the template_id cannot be found, inform the user.
|
||||
|
||||
Use these tools if necessary to answer questions.
|
||||
2. Generating a meme using the "Caption Image" tool once the template_id is found.
|
||||
- The tool input must be valid JSON with the keys: "template_id" (integer) and "text" (list of strings). Keys must be enclosed in double quotes.
|
||||
|
||||
3. Downloading the generated meme using the "Download Image" tool if requested.
|
||||
|
||||
Your tool invocations must match the exact string of one of the tools listed above.
|
||||
"""
|
||||
|
||||
prompt_template = f"""
|
||||
{system_prompt}
|
||||
{system_prompt}
|
||||
|
||||
Question: {{question}}
|
||||
Question: {{question}}
|
||||
|
||||
Answer: Let's think step by step.
|
||||
Answer: Let's think step by step. We should generate exactly one meme given the directions of the user. I will not ask the user for additional input after their request. Once the meme is created, I will conclude our conversation.
|
||||
"""
|
||||
|
||||
prompt = ChatPromptTemplate.from_template(prompt_template)
|
||||
|
||||
tools = [
|
||||
Tool(name="Get Memes", func=get_memes, description="Does not take any agruments. Returns a list of template_ids (integer) and names (string) which are the titles of the memes that correspond to the template_id."),
|
||||
Tool(name="Caption Image", func=caption_image, description="Given a valid template_id, top text, and bottom text, generates an image with the desired text. Returns the url of the new meme as a string."),
|
||||
Tool(name="Download Image", func=download_image, description="Given a valid url returned from the caption_image tool, downloads the image we made locally.")
|
||||
Tool(name="Caption Image", func=caption_image, description="Given a template_id and list of text strings, returns the url of a new meme as a string. Tool input is valid json syntax, with the following keys: 'template_id' (integer) and 'text' (list of strings)."),
|
||||
Tool(name="Download Image", func=download_image, description="Given a valid url returned from the caption_image tool, downloads the image we made locally. Tool input is valid json syntax, with the following key: 'url' (string).")
|
||||
]
|
||||
|
||||
llm = OllamaLLM(model="llama3")
|
||||
@ -46,6 +51,6 @@ agent_executor = initialize_agent(
|
||||
verbose=True
|
||||
)
|
||||
|
||||
response = agent_executor.invoke({"input": "Generate an image for the 'stick poke' meme with the top text 'come on' and the bottom text 'do something'."})
|
||||
response = agent_executor.invoke({"input": "Generate an image for the 'two buttons' meme with first text 'generated meme' and second text 'langchain error'."})
|
||||
print(response)
|
||||
|
||||
|
@ -4,23 +4,31 @@ import requests
|
||||
CAPTION_IMAGE_URL = "https://api.imgflip.com/caption_image"
|
||||
|
||||
def load_config():
|
||||
with open('config.json') as config_file:
|
||||
with open('tools/config.json') as config_file:
|
||||
return json.load(config_file)
|
||||
|
||||
def caption_image(template_id, text0, text1):
|
||||
def caption_image(input_data):
|
||||
# Replace single quotes with double quotes because langchain likes to use single quotes
|
||||
input_data = input_data.replace("'", '"')
|
||||
|
||||
data = json.loads(input_data)
|
||||
template_id = data['template_id']
|
||||
text = data['text']
|
||||
|
||||
config = load_config()
|
||||
username = config['username']
|
||||
password = config['password']
|
||||
|
||||
url = "https://api.imgflip.com/caption_image"
|
||||
url = CAPTION_IMAGE_URL
|
||||
payload = {
|
||||
"template_id": template_id,
|
||||
"username": username,
|
||||
"password": password,
|
||||
"text0": text0,
|
||||
"text1": text1
|
||||
}
|
||||
|
||||
for i in range(len(text)):
|
||||
payload[f'text{i}'] = text[i]
|
||||
|
||||
response = requests.post(url, data=payload)
|
||||
result = response.json()
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user