From 21d80ca68346dfdb8d3556015a723a9217f8566f Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 22 Jul 2025 18:32:44 -0700 Subject: [PATCH] initital commit --- .gitignore | 2 + __pycache__/model_tools.cpython-310.pyc | Bin 0 -> 7132 bytes __pycache__/web_tools.cpython-310.pyc | Bin 0 -> 8167 bytes model_tools.py | 272 ++++++++++++++++++++ requirements.txt | 2 + run_agent.py | 324 ++++++++++++++++++++++++ terminal_tool.py | 0 web_tools.py | 265 +++++++++++++++++++ 8 files changed, 865 insertions(+) create mode 100644 .gitignore create mode 100644 __pycache__/model_tools.cpython-310.pyc create mode 100644 __pycache__/web_tools.cpython-310.pyc create mode 100644 model_tools.py create mode 100644 requirements.txt create mode 100644 run_agent.py create mode 100644 terminal_tool.py create mode 100644 web_tools.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..a57ed39ef --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/venv/ +/_pycache/ \ No newline at end of file diff --git a/__pycache__/model_tools.cpython-310.pyc b/__pycache__/model_tools.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..519e30120efd8d130760c2cd9b98a49c3ea6fb0d GIT binary patch literal 7132 zcmc&(O>i8=6`q-$U9DC>mW8k-8H=`LEG@_qkRpK%kQm#5i7^!Br`Uwr8f~|B2hGkd zJu{ZI$SJB6DiuN%6y$~->>NU<3YVO6$}!g*Qn}0_2a<~~gyfP6fspULnb}?8pGu`t zm6f-*XS(~n*RS9E-h1u#*jUNH=lJ2*n!hO<#-He6@H32uZ{r(0VHt)rs)jVB<(X9z zzd6rp<*K=sUA0^JYTl$V+bgt&sza?}wb&Z24!26x65i)kUggw?ve&H{W5!swWI+!7 zz^;y7XQU{HF)}7gYDA9oS7GMJdO?oLF^rDiV02vW!RQ{$C2l>A#1%XzI~-0s+so;c|h$)dmF|s$%7c1d(XI3pw+MZP>q4wEN0%W4yf536Z6ES z98;bJ)g#s0)NS(6j3MV|jOxL(zkvQ6)*My`neLpl9GPh&OJ2q_ndwB%B z7Qrjeyndx7!0UFbTVndRr;;y2a#1~`4$1P2p+C8f^n$q@lHi+3p3HvR3?@MDPI=em zdAwPkF|J?-mn`P@XN(+WPE&Fv{asCe@1|dAo#k)&9_KU+s`O-ADo;GyZhL`1Kk`(m z^lZ}&M2jCpz3m5~j_P3`LQE1ty{TGGARJ$cCK^xaK&-d5IChGbfY!06Pl>wYdBIBQ zR4Ck5r>)b)n5f$=an8|hJ4)yUDoi;gBs;o&&Xo!eBJ$mJS4rXeq0;M4U6qK9RO_zq z`VE#QS)ZrjiJmySnTD?jwML=bgYkV@&OctNeD%a;P>=z4QmsPmzA z>fy4eYiH9dCxULOwHj?YSm|t)O3wvOLsd!wAKnJT4IurK<#6XUe(QKH=8GICsq7 zitW>G5SHzD&sXTh6Z9h0I&+A}=?$wtd7gr$)i-LPbI$d)YEH+kZK$odpj8-YzfQac zKVx_}j&JZwG=M=%=A3SE6Bl!GFTRs_HYMN(D{hth zWZpI9e$1b~NEBw|Y{QZVA9q0X)MR3KCN#bEPx* zE}h9s74D1ny2yy84#lPlXK!_!&|UMC*mT2Y%8WdksmI04hIJlV?tXNZy+?#i%!iQV zpRS+CLtcn~j=u%jJ8peNq+$uJJax|TLv+XjgM?M+hMt16Jon6Lv_PE@ek%gs?Q~R# zH-tBiA8Z0{R>X4wA%p~#IuwwYDK^r9$Rq=8L#w9p1NWTD*sR2b_S(x(SzG|10WG>m zo`u}$Vm56^D!g`kLqr{tJ>6E>itTW#qv8?ZQeC?p(p5Y}u+&hx`@oaVdAAj{gderm zlqTI~8w0t-TE!Av+refl;=X0ka?V%8J$IL{k`k|))KCgg&TBGaJ&qG0E z>jIkY8z&PSWOZxhNLIpn4~hwc}Ht!izVz<$b?C?WFB-(DRS(VY6&V5_a$?n1&#vc zvYn_V?}n-sBswT!;wgHJ(bF5Uz3InxGi-U?ub)r>=}fevuoH!}6F3)dC}oMU%sQ9a zYzvN8&h&39SON1OtBJH1#D#=}o+l*_k&-_=PB8`%4hje$RpJ0WmAYxbrQktZMZ6=^ zm%TlQj)rpka-{x%0F2pWmY(M}lBnkcJR1&J46g4v;GnPai8yuL4#1TB0)nk{zdvQ?~0-3tI+w!ExEDXehI0Ixs|cKm^%h zuvm`s*pyq3iy4S?*G@2ez{FCCh|p27$nzH20D_3qknH$DKU0)TP;K~b)LMO7 zw+YvR)yI`?so>}_I{ejv^aSZkJPnhn!QigRPY!~1pwU>vmSvgv<}B0xmz77K|LlL_ zZ_NIy{RQ66WOEA1*yq*|^)U92Yv*Kih*~z^{<}S4e?qkE-|4%+c2^`PFwEi0k$eiB zhH=5zHZB{Xxq~#SiGo-M{1UT2t!kTaz4Xwyw zlI(O>a7;Yp8eYg?k?9h>ezKXm20~jFx|hWo(1hG(O_7BuU&4XeUsErF(`gpIPmrnJ zG@2hrr1O9U!b|b-=DJ{lK&C<>0qehP#(7RH2dwN)Xg;1Ii=mOhY&O71K%v4ejppnj zy+Sj`GX2#k%nUpNsDe7GjNXTeS&DjmzQ09h8@bwsr4MeKmr<6;+!gDB#eHO6=;uOH z7D7u7ZRd8Z9eU1fThu>GD~nk9FwY)t7?^E`_O>17cPxGHwvGHPPvx0!!iH>~HFCwg zkPi#n_BQ4)g;7v=k;lfEcY*ukm^Z||J#qqci{a3AVS8x1xPyExEM6X#d*$Rdm1UM| zUNK>jw{-u;y1hZ>O{kYy!a$;S4C?7;X}gptmXM^BE8+|kN^vjEMfy2C0cci7kn#0a zlg`BwWqVi-4vdRQiAF@Sc`WYr4Gf$s;?b;>O6vrad{3Nx>WoNoCtk=&Ag3R(q4(o3 z^#nD0shOmP?1baq8NEPVihc}fdXaid)Eq_==atrN-QD+`zu_Ymv$8TlE23PAbFjrY zhp3Il^_g*8auG6ukaED-Ci_qL5tG<{IY5qI5KLEniE#ZELx-HEPj(-32#TushqB0(k?f!@q&^r zc7u1&>CTZOoHO)D2*chscdQoxbka(U?1BY1fZ=CVerR1rakI_FNAqkWw6`8NjSD#w zlw_V(!EN$z6uKF(Z@#`@ylBXw3%T#ZxRc*abJHjnok!??fNbkY2ZcwrcQ!jA-2jyW zDl}jg#dD-?1e#p;6RbHM%0#tGxd)?54<(u6Zq!9sM&xpR5ZG+G^=7t0IDM{b%4_?u zY@&@^pvOO8dSuzen}(@B$UZqgy&{rp4QB&v+)yYdpwa|ogcQEo%y=gBsQST-G8 zlyeZ#7O8=_BuPkmEAij^b5Em?;{WZ-ti@*>7lll`LMjue z>cMK#43a^lQk1iaQMKY%74KAEwMa|D}q-{Iq77Q5C88nLOa32~`!XM(U_e*}`jG#YD? z{JAtdZcdov)|5GJ&YH#7tS|H<7_8&l`%v0K`(uBeLr0pSv4iAAT0ev5N*iT={-W)g zl*}Mp6uQ4)-B9W}kj)Bbjg0Qv7)~hxn?20X)xcmUjMAhdIk>M@`~?3MacZCzPe?0g zz`K$xg^n!Eo?Oojw-P>d>&2ww;Vfwk^f$4yZsi3;vZQM6*XjM7IjPjKhASH`7ubJ@ zV0Yo!W6z&D{bCIl3bpT?d{HR>oU7YDW$t_{NC_M+i-vpIoIVPcNzFEzma{ZD_|Viv zj39R@X6W+Ov)xDOOOFsIGNi^oU|B$M$|~9>r>>^`x&5VG(vRciu2aRcLCFiBD#3Mf zsxAe5tNSBX&vCk=67D)5+MtA7G4$B?&_y^JK%#DXU6UN%Sw3WPtieOW#m^uYmO+z% z`cZ27v5R{gz-H5XG&%vOWN`UatO>~V3Pt-%eF~kpSgR4v)oRfJJbwEAdzb(6>$eaB zuDK@eEaA7v%VVff>VuP48k-t1aJd}a#$Ac-@4EKBUW zQ%|}91I*wKBFZ~TpT7T-4@7TfdYiq}Ekf-fs`HAb)6*md+fqa@B*drhzx$@R`ZW68 z+{(&|z6U*)74+Wyc~95@FRJJhG<%YQ;;uc2uC*-mNg_a13kbY(1-DdmF>>;}Lpge- zd*Xj_Z`=iEw)h)f1a}`3=>^n{?sgZ+qZ1Eiw?L)*>LOn$Emp*0a<{v9pd%5ds=L-!617!M}-6U zJVdiejhf2RcqnylRy*C>5$-W2#5qsl&W6V0VS?|Z^sxDg?lB@jrybjbuE+-4NE)x_ z8^s4vCgcA+9wr12PNT8S;?$Hmotw?gK0mjAZeea|-&ASTGK;yQS-^J!Dcxc8rpzLz YclV}mC(Jpdebc7(TG1-3u3Vl%ls9qQ^wsx zN4YTrbL=I-&e~EmrM2QHf;^1YT(sN;+g(t@wnDcV*q+OLd%?AWPzq31KDlh!jx`Uu z)vD^{Nm^pV7nW=;HmI)7uBaC^2ra+mNoKn!+ZK(N^bo6ow3CbW6mHnvVHVF2Cu6?0T8u zQcEIU#o}PMJD!9t#tuGv_z8cP*Nw=h~&VwWITK0C2>dCjxoqwls^3u7+4`<%N_oCIY8%ts> zG6O6vDo6oUaGNlw{maBt9{$@!4-ZE*oFJ3;*gxmDE;LL85*vGO%^OI6b!9yr};K6KQl_Asx)`>NpFn}SL z+Soc}nEGnm0#kg|QmU!^hsx!i5j=S(`OzXtd)Kt8C zn_FD(h{rYYmluw$ww-MT%FJ{vSnG(nOdD1uB`15;vB*p=3K+?Z{G5~v3CW~(#I-uR z3d5=RmkKNR*n|eSid)n!*#z3$Oz+dw2K%7;D%+B5P_i*nbR1Y-9@`!Dyy!VxNMDWD zJ@#&vY1e-A>fEt(CD%?)&RUb*7baghHhp65*t@mvcy`r~XxiO-?bOLDGcO-+oV#%H zwVB4{nG0{cHFIXJW<-V*QO*zNYo;tf5TgSzxeX${q1ff)MsngtmE>FcKBO2Tc!L)E zxh97(!2K7d7rnNa4#bjchwbT0(z`C2fj|8NA=|>AJPDi&ru$ZHs8O=;F|@X(zMpj|!fGr)6C zN13~sC%_K$>qbw5u9-Z4BS)IY3qh`D+%|b}LAzsoNNr0ubE<9rdZA~K19QWGRu+4x zeOtStnOaaeTx576W-D;875pVMrDH3eke}7^9LHOs#<33p2Pig#M5wxc+^0LO zVPk4(V|D5?vCZdX7Bsl_;8z-Dn=#YD#{xTW#6!pKHw&fn_(q8xe0)Q{=}DTj<#<-G zJv+-_uoK=;e)!BW$7L_c%Y`L)EwVd3E&e34o zT5Tj76%EDW(cmCuF(g~`#jk(L&H{Bgi;79BZYdSbyJWpS*P{Z!7M8KQXGTg88xKOX zfQ~x*2YC!%-29kDc|qSV*2+=71HlZU;n?kLG-H3H5ciC7j&P&gf(Rn>y6?GBo`>y@ zuWV>E&}S4(KUA%tmLW%JEhSLc@mya-gE6fQ)m<0SfSS3{_7{|)kBbWR*+J$!SvL8q zucOl6hr&3#S0Bk8GAer6sGzLq2lUbh#=qqA)Xjw4`B5SE88%sQ3PXu=%8LIL58Vr7 z#RJ%JWtWY_E*sb3DUrVfyG$0`L<_Ru`Mw1R^!&uDW8$(JHK&r5fVb?QTJuH7h6Hx@OW?v28T=;%4@6mwC&q?N4g33&0+G7^SFj-? z#ZQP2pM1n-617Ua0a)ue%<2|N{HlJfX2@eOR&oMGcn}3y{-jrC=T%%UpThV11w8i~`}B&j z1wOuwMo}ICgWZg09?IrzfOUkuO_OVYy>X_1#F@zBX(`yYPJ zPNxcXcQ;3*nTO?DjM455x-PK^jLP`D;1sgLS7 zO?`^VXnh#__!mrmi`u+M#Rh!si{Y!kWvQ-mlS`z?C-7TJ`~eV zp+gh@O&5iIr}6hG&I*vUx{VXKJNkz@Qo%;|Q)LlPFN)?8vs)@^jdRo4Sa%~!ggmWq zmu=~}ZSq|xXG`ZYIHVy%qKXIdAoJ>c=Bf+@&5#M95k60_vz7z%sLqO35Q?iIB+*>U zMJ9EP3=a_=qEJkn!GB5=N?KC1yGhhHk{CA0oc{%1e2R-JMR}H5l^TtV%Hz)D8WmB3o=zi}KQoL9;IKbAtu%B3{!|JLNn$3fq7^g;) z$UIEqNKK4)kCL*boV1XLN|7UGf;vCC3lBvLNiRsZwrpyPatR?x`@LJA-6nNbXN>VF zkrWnRQ^#1&{)Xu6NIUW#>xWX_h82wmCc+PY#cQFz_H}jz3=OzQp>` z&vr*Y<0&IDhcn}IweBf&BYX9bvz+m4^9eSt&T?=d7m}2#1H|!KO_w9!S53cIGcL;S z(Bqz{*l3_xqY=BiXHY3iD53(+58!-Al0dwuQ$0z}q@1CG?2ht=<77$>2(9uNlDtw$ zYAi`*`;zNgZGlP1;f_qPY6sgYLg~Blf$;Ze=Y29zrd~Q+(o6afej~Zj+`c1+rVbsi Xyjm(7dMQ`Z3-~8LqyL1q=HLDe`A_ZV literal 0 HcmV?d00001 diff --git a/model_tools.py b/model_tools.py new file mode 100644 index 000000000..6e4d828d6 --- /dev/null +++ b/model_tools.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python3 +""" +Model Tools Module + +This module constructs tool schemas and handlers for AI model API calls. +It imports tools from various toolset modules and provides a unified interface +for defining tools and executing function calls. + +Currently supports: +- Web tools (search, extract, crawl) from web_tools.py + +Usage: + from model_tools import get_tool_definitions, handle_function_call + + # Get tool definitions for model API + tools = get_tool_definitions() + + # Handle function calls from model + result = handle_function_call("web_search_tool", {"query": "Python", "limit": 3}) +""" + +import json +from typing import Dict, Any, List + +# Import toolsets +from web_tools import web_search_tool, web_extract_tool, web_crawl_tool, check_tavily_api_key + +def get_web_tool_definitions() -> List[Dict[str, Any]]: + """ + Get tool definitions for web tools in OpenAI's expected format. + + Returns: + List[Dict]: List of web tool definitions compatible with OpenAI API + """ + return [ + { + "type": "function", + "function": { + "name": "web_search_tool", + "description": "Search the web for information on any topic. Returns relevant results with titles, URLs, content snippets, and answers. Uses advanced search depth for comprehensive results.", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The search query to look up on the web" + }, + "limit": { + "type": "integer", + "description": "Maximum number of results to return (default: 5, max: 10)", + "default": 5, + "minimum": 1, + "maximum": 10 + } + }, + "required": ["query"] + } + } + }, + { + "type": "function", + "function": { + "name": "web_extract_tool", + "description": "Extract and read the full content from specific web page URLs. Useful for getting detailed information from webpages found through search.", + "parameters": { + "type": "object", + "properties": { + "urls": { + "type": "array", + "items": {"type": "string"}, + "description": "List of URLs to extract content from (max 5 URLs per call)", + "maxItems": 5 + }, + "format": { + "type": "string", + "enum": ["markdown", "html"], + "description": "Desired output format for extracted content (optional)" + } + }, + "required": ["urls"] + } + } + }, + { + "type": "function", + "function": { + "name": "web_crawl_tool", + "description": "Crawl a website with specific instructions to find and extract targeted content. Uses AI to intelligently navigate and extract relevant information from across the site.", + "parameters": { + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "The base URL to crawl (can include or exclude https://)" + }, + "instructions": { + "type": "string", + "description": "Specific instructions for what to crawl/extract using AI intelligence (e.g., 'Find pricing information', 'Get documentation pages', 'Extract contact details')" + }, + "depth": { + "type": "string", + "enum": ["basic", "advanced"], + "description": "Depth of extraction - 'basic' for surface content, 'advanced' for deeper analysis (default: basic)", + "default": "basic" + } + }, + "required": ["url"] + } + } + } + ] + +def get_tool_definitions() -> List[Dict[str, Any]]: + """ + Get all available tool definitions for model API calls. + + This function aggregates tool definitions from all available toolsets. + Currently includes web tools, but can be extended to include other toolsets. + + Returns: + List[Dict]: Complete list of all available tool definitions + """ + tools = [] + + # Add web tools + tools.extend(get_web_tool_definitions()) + + # Future toolsets can be added here: + # tools.extend(get_file_tool_definitions()) + # tools.extend(get_code_tool_definitions()) + # tools.extend(get_database_tool_definitions()) + + return tools + +def handle_web_function_call(function_name: str, function_args: Dict[str, Any]) -> str: + """ + Handle function calls for web tools. + + Args: + function_name (str): Name of the web function to call + function_args (Dict): Arguments for the function + + Returns: + str: Function result as JSON string + """ + if function_name == "web_search_tool": + query = function_args.get("query", "") + limit = function_args.get("limit", 5) + # Ensure limit is within bounds + limit = max(1, min(10, limit)) + return web_search_tool(query, limit) + + elif function_name == "web_extract_tool": + urls = function_args.get("urls", []) + # Limit URLs to prevent abuse + urls = urls[:5] if isinstance(urls, list) else [] + format = function_args.get("format") + return web_extract_tool(urls, format) + + elif function_name == "web_crawl_tool": + url = function_args.get("url", "") + instructions = function_args.get("instructions") + depth = function_args.get("depth", "basic") + return web_crawl_tool(url, instructions, depth) + + else: + return json.dumps({"error": f"Unknown web function: {function_name}"}) + +def handle_function_call(function_name: str, function_args: Dict[str, Any]) -> str: + """ + Main function call dispatcher that routes calls to appropriate toolsets. + + This function determines which toolset a function belongs to and dispatches + the call to the appropriate handler. This makes it easy to add new toolsets + without changing the main calling interface. + + Args: + function_name (str): Name of the function to call + function_args (Dict): Arguments for the function + + Returns: + str: Function result as JSON string + + Raises: + None: Returns error as JSON string instead of raising exceptions + """ + try: + # Route web tools + if function_name in ["web_search_tool", "web_extract_tool", "web_crawl_tool"]: + return handle_web_function_call(function_name, function_args) + + # Future toolsets can be routed here: + # elif function_name in ["file_read_tool", "file_write_tool"]: + # return handle_file_function_call(function_name, function_args) + # elif function_name in ["code_execute_tool", "code_analyze_tool"]: + # return handle_code_function_call(function_name, function_args) + + else: + error_msg = f"Unknown function: {function_name}" + print(f"āŒ {error_msg}") + return json.dumps({"error": error_msg}) + + except Exception as e: + error_msg = f"Error executing {function_name}: {str(e)}" + print(f"āŒ {error_msg}") + return json.dumps({"error": error_msg}) + +def get_available_toolsets() -> Dict[str, Dict[str, Any]]: + """ + Get information about all available toolsets and their status. + + Returns: + Dict: Information about each toolset including availability and tools + """ + toolsets = { + "web_tools": { + "available": check_tavily_api_key(), + "tools": ["web_search_tool", "web_extract_tool", "web_crawl_tool"], + "description": "Web search, content extraction, and website crawling tools", + "requirements": ["TAVILY_API_KEY environment variable"] + } + # Future toolsets can be added here + } + + return toolsets + +def check_toolset_requirements() -> Dict[str, bool]: + """ + Check if all requirements for available toolsets are met. + + Returns: + Dict: Status of each toolset's requirements + """ + return { + "web_tools": check_tavily_api_key() + } + +if __name__ == "__main__": + """ + Simple test/demo when run directly + """ + print("šŸ› ļø Model Tools Module") + print("=" * 40) + + # Check toolset requirements + requirements = check_toolset_requirements() + print("šŸ“‹ Toolset Requirements:") + for toolset, available in requirements.items(): + status = "āœ…" if available else "āŒ" + print(f" {status} {toolset}: {'Available' if available else 'Missing requirements'}") + + # Show available tools + tools = get_tool_definitions() + print(f"\nšŸ”§ Available Tools ({len(tools)} total):") + for tool in tools: + func_name = tool["function"]["name"] + desc = tool["function"]["description"] + print(f" šŸ“Œ {func_name}: {desc[:80]}{'...' if len(desc) > 80 else ''}") + + # Show toolset info + toolsets = get_available_toolsets() + print(f"\nšŸ“¦ Toolset Information:") + for name, info in toolsets.items(): + status = "āœ…" if info["available"] else "āŒ" + print(f" {status} {name}: {info['description']}") + if not info["available"]: + print(f" Requirements: {', '.join(info['requirements'])}") + + print("\nšŸ’” Usage Example:") + print(" from model_tools import get_tool_definitions, handle_function_call") + print(" tools = get_tool_definitions()") + print(" result = handle_function_call('web_search_tool', {'query': 'Python'})") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..a78b5a2e8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +tavily-python +openai \ No newline at end of file diff --git a/run_agent.py b/run_agent.py new file mode 100644 index 000000000..729682513 --- /dev/null +++ b/run_agent.py @@ -0,0 +1,324 @@ +#!/usr/bin/env python3 +""" +AI Agent Runner with Tool Calling + +This module provides a clean, standalone agent that can execute AI models +with tool calling capabilities. It handles the conversation loop, tool execution, +and response management. + +Features: +- Automatic tool calling loop until completion +- Configurable model parameters +- Error handling and recovery +- Message history management +- Support for multiple model providers + +Usage: + from run_agent import AIAgent + + agent = AIAgent(base_url="http://localhost:30000/v1", model="claude-opus-4-20250514") + response = agent.run_conversation("Tell me about the latest Python updates") +""" + +import json +import os +import time +from typing import List, Dict, Any, Optional +from openai import OpenAI + +# Import our tool system +from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements + + +class AIAgent: + """ + AI Agent with tool calling capabilities. + + This class manages the conversation flow, tool execution, and response handling + for AI models that support function calling. + """ + + def __init__( + self, + base_url: str = None, + api_key: str = None, + model: str = "gpt-4", + max_iterations: int = 10, + tool_delay: float = 1.0 + ): + """ + Initialize the AI Agent. + + Args: + base_url (str): Base URL for the model API (optional) + api_key (str): API key for authentication (optional, uses env var if not provided) + model (str): Model name to use (default: "gpt-4") + max_iterations (int): Maximum number of tool calling iterations (default: 10) + tool_delay (float): Delay between tool calls in seconds (default: 1.0) + """ + self.model = model + self.max_iterations = max_iterations + self.tool_delay = tool_delay + + # Initialize OpenAI client + client_kwargs = {} + if base_url: + client_kwargs["base_url"] = base_url + if api_key: + client_kwargs["api_key"] = api_key + else: + client_kwargs["api_key"] = os.getenv("OPENAI_API_KEY", "dummy-key") + + try: + self.client = OpenAI(**client_kwargs) + print(f"šŸ¤– AI Agent initialized with model: {self.model}") + if base_url: + print(f"šŸ”— Using custom base URL: {base_url}") + except Exception as e: + raise RuntimeError(f"Failed to initialize OpenAI client: {e}") + + # Get available tools + self.tools = get_tool_definitions() + print(f"šŸ› ļø Loaded {len(self.tools)} tools") + + # Check tool requirements + requirements = check_toolset_requirements() + missing_reqs = [name for name, available in requirements.items() if not available] + if missing_reqs: + print(f"āš ļø Some tools may not work due to missing requirements: {missing_reqs}") + + def create_system_message(self, custom_system: str = None) -> str: + """ + Create the system message for the agent. + + Args: + custom_system (str): Custom system message (optional) + + Returns: + str: System message content + """ + if custom_system: + return custom_system + + return ( + "You are an AI assistant that provides helpful responses. You may use extremely long chains of thought " + "to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help " + "come to a correct solution prior to answering. You should enclose your thoughts and internal monologue " + "inside tags.\n\n" + "You are equipped with web research tools that allow you to search the web, extract content from web pages, " + "and crawl websites. Use these tools to gather current information and provide accurate, well-researched responses. " + "You can call multiple tools in parallel if they are not reliant on each other's results. You can also use " + "sequential tool calls to build on data you've collected from previous tool calls. Continue using tools until " + "you feel confident you have enough information to provide a comprehensive answer." + ) + + def run_conversation( + self, + user_message: str, + system_message: str = None, + conversation_history: List[Dict[str, Any]] = None + ) -> Dict[str, Any]: + """ + Run a complete conversation with tool calling until completion. + + Args: + user_message (str): The user's message/question + system_message (str): Custom system message (optional) + conversation_history (List[Dict]): Previous conversation messages (optional) + + Returns: + Dict: Complete conversation result with final response and message history + """ + # Initialize conversation + messages = conversation_history or [] + + # Add system message if not already present + if not messages or messages[0]["role"] != "system": + messages.insert(0, { + "role": "system", + "content": self.create_system_message(system_message) + }) + + # Add user message + messages.append({ + "role": "user", + "content": user_message + }) + + print(f"šŸ’¬ Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'") + + # Main conversation loop + api_call_count = 0 + final_response = None + + while api_call_count < self.max_iterations: + api_call_count += 1 + print(f"\nšŸ”„ Making API call #{api_call_count}...") + + try: + # Make API call with tools + response = self.client.chat.completions.create( + model=self.model, + messages=messages, + tools=self.tools if self.tools else None + ) + + assistant_message = response.choices[0].message + + # Handle assistant response + if assistant_message.content: + print(f"šŸ¤– Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}") + + # Check for tool calls + if assistant_message.tool_calls: + print(f"šŸ”§ Processing {len(assistant_message.tool_calls)} tool call(s)...") + + # Add assistant message with tool calls to conversation + messages.append({ + "role": "assistant", + "content": assistant_message.content, + "tool_calls": [ + { + "id": tool_call.id, + "type": tool_call.type, + "function": { + "name": tool_call.function.name, + "arguments": tool_call.function.arguments + } + } + for tool_call in assistant_message.tool_calls + ] + }) + + # Execute each tool call + for i, tool_call in enumerate(assistant_message.tool_calls, 1): + function_name = tool_call.function.name + + try: + function_args = json.loads(tool_call.function.arguments) + except json.JSONDecodeError as e: + print(f"āŒ Invalid JSON in tool call arguments: {e}") + function_args = {} + + print(f" šŸ“ž Tool {i}: {function_name}({list(function_args.keys())})") + + # Execute the tool + function_result = handle_function_call(function_name, function_args) + + # Add tool result to conversation + messages.append({ + "role": "tool", + "content": function_result, + "tool_call_id": tool_call.id + }) + + print(f" āœ… Tool {i} completed") + + # Delay between tool calls + if self.tool_delay > 0 and i < len(assistant_message.tool_calls): + time.sleep(self.tool_delay) + + # Continue loop for next response + continue + + else: + # No tool calls - this is the final response + final_response = assistant_message.content or "" + + # Add final assistant message + messages.append({ + "role": "assistant", + "content": final_response + }) + + print(f"šŸŽ‰ Conversation completed after {api_call_count} API call(s)") + break + + except Exception as e: + error_msg = f"Error during API call #{api_call_count}: {str(e)}" + print(f"āŒ {error_msg}") + + # Add error to conversation and try to continue + messages.append({ + "role": "assistant", + "content": f"I encountered an error: {error_msg}. Let me try a different approach." + }) + + # If we're near the limit, break to avoid infinite loops + if api_call_count >= self.max_iterations - 1: + final_response = f"I apologize, but I encountered repeated errors: {error_msg}" + break + + # Handle max iterations reached + if api_call_count >= self.max_iterations: + print(f"āš ļø Reached maximum iterations ({self.max_iterations}). Stopping to prevent infinite loop.") + if final_response is None: + final_response = "I've reached the maximum number of iterations. Here's what I found so far." + + return { + "final_response": final_response, + "messages": messages, + "api_calls": api_call_count, + "completed": final_response is not None + } + + def chat(self, message: str) -> str: + """ + Simple chat interface that returns just the final response. + + Args: + message (str): User message + + Returns: + str: Final assistant response + """ + result = self.run_conversation(message) + return result["final_response"] + + +def main(): + """ + Main function for running the agent directly. + """ + print("šŸ¤– AI Agent with Tool Calling") + print("=" * 50) + + # Initialize agent with local SGLang server (modify as needed) + try: + agent = AIAgent( + base_url="https://api.anthropic.com/v1/", + model="claude-opus-4-20250514" + ) + except RuntimeError as e: + print(f"āŒ Failed to initialize agent: {e}") + return + + # Example conversation + user_query = ( + "Tell me about the latest developments in Python 3.12 and what new features " + "developers should know about. Please search for current information." + ) + + print(f"\nšŸ“ User Query: {user_query}") + print("\n" + "=" * 50) + + # Run conversation + result = agent.run_conversation(user_query) + + print("\n" + "=" * 50) + print("šŸ“‹ CONVERSATION SUMMARY") + print("=" * 50) + print(f"āœ… Completed: {result['completed']}") + print(f"šŸ“ž API Calls: {result['api_calls']}") + print(f"šŸ’¬ Messages: {len(result['messages'])}") + + if result['final_response']: + print(f"\nšŸŽÆ FINAL RESPONSE:") + print("-" * 30) + print(result['final_response']) + + print("\nšŸ‘‹ Agent execution completed!") + + +if __name__ == "__main__": + main() diff --git a/terminal_tool.py b/terminal_tool.py new file mode 100644 index 000000000..e69de29bb diff --git a/web_tools.py b/web_tools.py new file mode 100644 index 000000000..07d11b790 --- /dev/null +++ b/web_tools.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python3 +""" +Standalone Web Tools Module + +This module provides generic web tools that work with multiple backend providers. +Currently uses Tavily as the backend, but the interface makes it easy to swap +to other providers like Firecrawl without changing the function signatures. + +Available tools: +- web_search_tool: Search the web for information +- web_extract_tool: Extract content from specific web pages +- web_crawl_tool: Crawl websites with specific instructions + +Backend compatibility: +- Tavily: https://docs.tavily.com/ +- Firecrawl: https://docs.firecrawl.dev/features/search + +Usage: + from web_tools import web_search_tool, web_extract_tool, web_crawl_tool + + # Search the web + results = web_search_tool("Python machine learning libraries", limit=3) + + # Extract content from URLs + content = web_extract_tool(["https://example.com"], format="markdown") + + # Crawl a website + crawl_data = web_crawl_tool("example.com", "Find contact information") +""" + +#TODO: Search Capabilities over the scraped pages +#TODO: Store the pages in something +#TODO: Tool to see what pages are available/saved to search over + +import json +import os +import re +from typing import List +from tavily import TavilyClient + +# Initialize Tavily client once at module level +tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY")) + + +def clean_base64_images(text: str) -> str: + """ + Remove base64 encoded images from text to reduce token count and clutter. + + This function finds and removes base64 encoded images in various formats: + - (data:image/png;base64,...) + - (data:image/jpeg;base64,...) + - (data:image/svg+xml;base64,...) + - data:image/[type];base64,... (without parentheses) + + Args: + text: The text content to clean + + Returns: + Cleaned text with base64 images replaced with placeholders + """ + # Pattern to match base64 encoded images wrapped in parentheses + # Matches: (data:image/[type];base64,[base64-string]) + base64_with_parens_pattern = r'\(data:image/[^;]+;base64,[A-Za-z0-9+/=]+\)' + + # Pattern to match base64 encoded images without parentheses + # Matches: data:image/[type];base64,[base64-string] + base64_pattern = r'data:image/[^;]+;base64,[A-Za-z0-9+/=]+' + + # Replace parentheses-wrapped images first + cleaned_text = re.sub(base64_with_parens_pattern, '[BASE64_IMAGE_REMOVED]', text) + + # Then replace any remaining non-parentheses images + cleaned_text = re.sub(base64_pattern, '[BASE64_IMAGE_REMOVED]', cleaned_text) + + return cleaned_text + + +def web_search_tool(query: str, limit: int = 5) -> str: + """ + Search the web for information using available search API backend. + + This function provides a generic interface for web search that can work + with multiple backends. Currently uses Tavily but can be easily swapped. + + Args: + query (str): The search query to look up + limit (int): Maximum number of results to return (default: 5) + + Returns: + str: JSON string containing search results with the following structure: + { + "query": str, + "results": [ + { + "title": str, + "url": str, + "content": str, + "score": float + }, + ... + ] + } + + Raises: + Exception: If search fails or API key is not set + """ + try: + print(f"šŸ” Searching the web for: '{query}' (limit: {limit})") + + # Use Tavily's search functionality + response = tavily_client.search(query=query, max_results=limit, search_depth="advanced") + + print(f"āœ… Found {len(response.get('results', []))} results") + result_json = json.dumps(response, indent=2) + # Clean base64 images from search results + return clean_base64_images(result_json) + + except Exception as e: + error_msg = f"Error searching web: {str(e)}" + print(f"āŒ {error_msg}") + return json.dumps({"error": error_msg}) + + +def web_extract_tool(urls: List[str], format: str = None) -> str: + """ + Extract content from specific web pages using available extraction API backend. + + This function provides a generic interface for web content extraction that + can work with multiple backends. Currently uses Tavily but can be easily swapped. + + Args: + urls (List[str]): List of URLs to extract content from + format (str): Desired output format ("markdown" or "html", optional) + + Returns: + str: JSON string containing extracted content with the following structure: + { + "results": [ + { + "url": str, + "title": str, + "raw_content": str, + "content": str + }, + ... + ] + } + + Raises: + Exception: If extraction fails or API key is not set + """ + try: + print(f"šŸ“„ Extracting content from {len(urls)} URL(s)") + + # Use Tavily's extract functionality + response = tavily_client.extract(urls=urls, format=format) + + print(f"āœ… Extracted content from {len(response.get('results', []))} pages") + + # Print summary of extracted pages for debugging + for result in response.get('results', []): + url = result.get('url', 'Unknown URL') + content_length = len(result.get('raw_content', '')) + print(f" šŸ“ {url} ({content_length} characters)") + + result_json = json.dumps(response, indent=2) + # Clean base64 images from extracted content + return clean_base64_images(result_json) + + except Exception as e: + error_msg = f"Error extracting content: {str(e)}" + print(f"āŒ {error_msg}") + return json.dumps({"error": error_msg}) + + +def web_crawl_tool(url: str, instructions: str = None, depth: str = "basic") -> str: + """ + Crawl a website with specific instructions using available crawling API backend. + + This function provides a generic interface for web crawling that can work + with multiple backends. Currently uses Tavily but can be easily swapped. + + Args: + url (str): The base URL to crawl (can include or exclude https://) + instructions (str): Instructions for what to crawl/extract using LLM intelligence (optional) + depth (str): Depth of extraction ("basic" or "advanced", default: "basic") + + Returns: + str: JSON string containing crawled content with the following structure: + { + "results": [ + { + "url": str, + "title": str, + "content": str + }, + ... + ] + } + + Raises: + Exception: If crawling fails or API key is not set + """ + try: + instructions_text = f" with instructions: '{instructions}'" if instructions else "" + print(f"šŸ•·ļø Crawling {url}{instructions_text}") + + # Use Tavily's crawl functionality + response = tavily_client.crawl( + url=url, + limit=20, # Reasonable limit for most use cases + instructions=instructions or "Get all available content", + extract_depth=depth + ) + + print(f"āœ… Crawled {len(response.get('results', []))} pages") + + # Print summary of crawled pages for debugging + for result in response.get('results', []): + page_url = result.get('url', 'Unknown URL') + content_length = len(result.get('content', '')) + print(f" 🌐 {page_url} ({content_length} characters)") + + result_json = json.dumps(response, indent=2) + # Clean base64 images from crawled content + return clean_base64_images(result_json) + + except Exception as e: + error_msg = f"Error crawling website: {str(e)}" + print(f"āŒ {error_msg}") + return json.dumps({"error": error_msg}) + + +# Convenience function to check if API key is available +def check_tavily_api_key() -> bool: + """ + Check if the Tavily API key is available in environment variables. + + Returns: + bool: True if API key is set, False otherwise + """ + return bool(os.getenv("TAVILY_API_KEY")) + + +if __name__ == "__main__": + """ + Simple test/demo when run directly + """ + print("🌐 Standalone Web Tools Module") + print("=" * 40) + + # Check if API key is available + if not check_tavily_api_key(): + print("āŒ TAVILY_API_KEY environment variable not set") + print("Please set your API key: export TAVILY_API_KEY='your-key-here'") + print("Get API key at: https://tavily.com/") + exit(1) + + print("āœ… Tavily API key found") + print("šŸ› ļø Web tools ready for use!") + print("\nExample usage:") + print(" from web_tools import web_search_tool, web_extract_tool, web_crawl_tool") + print(" results = web_search_tool('Python tutorials')") + print(" content = web_extract_tool(['https://example.com'])") + print(" crawl_data = web_crawl_tool('example.com', 'Find documentation')")