[{"data":1,"prerenderedAt":205},["ShallowReactive",2],{"DlFXI4Eibt_Bn9lrEZz1TYbHCWFZj3IvqwHQSEW-Exc":3,"442qEOIwRfFJP4735ItNKu50ZNJYbQ2g0yyn3V0CJP0":194},{"code":4,"msg":5,"data":6},0,"",{"category":7,"tag":11,"hot":39,"new":78,"banner":118,"data":143,"cache":193},[8,9,10],"Agent","OpenAI","LLM",[12,14,17,20,23,25,27,30,33,36],{"title":8,"total":13},39,{"title":15,"total":16},"Google",44,{"title":18,"total":19},"Nvidia",13,{"title":21,"total":22},"Claude",11,{"title":9,"total":24},35,{"title":10,"total":26},85,{"title":28,"total":29},"DeepSeek",9,{"title":31,"total":32},"OCR",1,{"title":34,"total":35},"Chat",7,{"title":37,"total":38},"Generator",116,[40,48,55,64,71],{"id":41,"publish_date":42,"is_original":4,"collection":5,"cover_url":43,"cover_url_1_1":44,"title":45,"summary":46,"author":47},557,"2022-04-29","article_res/cover/7a9b1375ed9bb298154981bae42b794d.jpeg","article_res/cover/afa281dd52bc0454e6735daa8e6b0706.jpeg","Translation and summary of Messari Report [2.8 Kristin Smith, Blockchain Association and Katie Haun, a16z]","We need unity and speed right now.","Translation",{"id":49,"publish_date":50,"is_original":4,"collection":5,"cover_url":51,"cover_url_1_1":52,"title":53,"summary":54,"author":47},531,"2022-05-25","article_res/cover/e8362057f8fa189594c60afdfaaeb6e5.jpeg","article_res/cover/8ea08d0d6fa7eee6b57ed4ec61b61ad6.jpeg","Decentralized Society: Finding Web3’s Soul / Decentralized Society: Finding the Soul of Web3 -7","Decentralization through Pluralism When analyzing ecosystems, it's desirable to measure how decentralized it is.",{"id":56,"publish_date":57,"is_original":32,"collection":58,"cover_url":59,"cover_url_1_1":60,"title":61,"summary":62,"author":63},127,"2024-11-14","#Google #AI Game #World Model #AI Story","article_res/cover/0233a875b7ec2debf59779e311547569.jpeg","article_res/cover/6ffddb6ae4914b3c699493311aa9f198.jpeg","Google Launches \"Unbounded\": A Generative Infinite Character Life Simulation Game","Unbounded: A Generative Infinite Game of Character Life Simulation","Renee's Entrepreneurial Journey",{"id":13,"publish_date":65,"is_original":32,"collection":66,"cover_url":67,"cover_url_1_1":68,"title":69,"summary":70,"author":63},"2025-02-14","#Deep Dive into LLMs #Andrej Karpathy #LLM #Tool Use #Hallucination","article_res/cover/11e858ad6b74dfa80f923d549b62855c.jpeg","article_res/cover/615e1b320f1fc163edc1d2d154a6de33.jpeg","Andrej Karpathy's in-depth explanation of LLM (Part 4): Hallucinations","hallucinations, tool use, knowledge/working memory",{"id":72,"publish_date":73,"is_original":4,"collection":5,"cover_url":74,"cover_url_1_1":75,"title":76,"summary":77,"author":47},579,"2022-04-07","article_res/cover/39387376ba28447af1eb40576b9df215.jpeg","article_res/cover/02727ede8551ed49901d0abe6d6305b7.jpeg","Messari Report Translation and Summary 【1-7 Surviving the Winter】","I’d be more cautious here: 10 year and 10 hour thinking only.",[79,87,95,103,111],{"id":80,"publish_date":81,"is_original":32,"collection":82,"cover_url":83,"cover_url_1_1":84,"title":85,"summary":86,"author":63},627,"2025-03-20","#AI Avatar #AI Video Generation","article_res/cover/d95481358f73924989f8c4ee9c75d1c8.jpeg","article_res/cover/b74bc0fab01f8b6a6aa87696c0c3ed8b.jpeg","DisPose: Generating Animated Videos by Driving Video with Reference Images","DisPose is a controllable human image animation method that enhances video generation.",{"id":88,"publish_date":89,"is_original":32,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":63},626,"2025-03-21","#Deep Dive into LLMs #LLM #RL #Andrej Karpathy #AlphaGo","article_res/cover/446553a5c8f8f2f07d97b20eaee84e56.jpeg","article_res/cover/e6c2823409c9b34624064b9acbaca6f1.jpeg","AlphaGo and the Power of Reinforcement Learning - Andrej Karpathy's Deep Dive on LLMs (Part 9)","Simply learning from humans will never surpass human capabilities.",{"id":96,"publish_date":97,"is_original":32,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":63},625,"2025-03-22","#Deep Dive into LLMs #LLM #RL #RLHF #Andrej Karpathy","article_res/cover/8da81d38b1e5cf558a164710fd8a5389.jpeg","article_res/cover/96f028d76c362a99a0dd56389e8f7a9b.jpeg","Reinforcement Learning from Human Feedback (RLHF) - Andrej Karpathy's Deep Dive on LLMs (Part 10)","Fine-Tuning Language Models from Human Preferences",{"id":104,"publish_date":105,"is_original":32,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":63},624,"2025-03-23","#Deep Dive into LLMs #LLM #Andrej Karpathy #AI Agent #MMM","article_res/cover/a5e7c3d48bb09109684d6513287c661d.jpeg","article_res/cover/d3f22b7c0ab8d82fd2da457a299e0773.jpeg","The Future of Large Language Models - Andrej Karpathy's In-Depth Explanation of LLM (Part 11)","preview of things to come",{"id":112,"publish_date":105,"is_original":32,"collection":113,"cover_url":114,"cover_url_1_1":115,"title":116,"summary":117,"author":63},623,"#Google #Voe #AI Video Generation","article_res/cover/c44062fea0f336c2b96b3928292392c2.jpeg","article_res/cover/a041041c69092ad3db191c5bf3ff981b.jpeg","Trial of Google's video generation model VOE2","Our state-of-the-art video generation model",[119,127,135],{"id":120,"publish_date":121,"is_original":32,"collection":122,"cover_url":123,"cover_url_1_1":124,"title":125,"summary":126,"author":63},160,"2024-10-04","#Philosophy","article_res/cover/496990c49211e8b7f996b7d39c18168e.jpeg","article_res/cover/14dbaa1ade9cb4316d5829423a900362.jpeg","Time","The fungus of the morning does not know the waxing and waning of the moon, and the cicada does not know the seasons; this is a short life. To the south of the state of Chu there is a dark spirit which regards five hundred years as spring and five hundred years as autumn. In ancient times there was a great tree called the Ming which regarded eight thousand years as spring and eight thousand years as autumn; this is a long life.",{"id":128,"publish_date":129,"is_original":32,"collection":130,"cover_url":131,"cover_url_1_1":132,"title":133,"summary":134,"author":63},98,"2024-12-17","#AI Video Generator #Sora #Pika","article_res/cover/3b86e85d03fff4f356a3e4cf2bb329c9.jpeg","article_res/cover/5fa5c20ad0b40f8f544d257c0ef02938.jpeg","Pika 2.0 video generation officially released: effect comparison with Sora","今天，我们推出了Pika 2.0模型。卓越的文字对齐效果。惊人的视觉表现。还有✨场景成分✨",{"id":136,"publish_date":137,"is_original":32,"collection":138,"cover_url":139,"cover_url_1_1":140,"title":141,"summary":142,"author":63},71,"2025-01-14","#Nvidia #World Foundation Model #Cosmos #Physical AI #Embodied AI","article_res/cover/feddf8c832dfb45d28804291f6a42a9e.jpeg","article_res/cover/d6bc2f1186d96b78228c2283a17a3645.jpeg","NVIDIA's Cosmos World Model","Cosmos World Foundation Model Platform for Physical AI",[144,163,188],{"title":8,"items":145},[146,147,155],{"id":104,"publish_date":105,"is_original":32,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":63},{"id":148,"publish_date":149,"is_original":32,"collection":150,"cover_url":151,"cover_url_1_1":152,"title":153,"summary":154,"author":63},622,"2025-03-24","#OWL #AI Agent #MAS #MCP #CUA","article_res/cover/cb50ca7f2bf4d1ed50202d7406e1c19a.jpeg","article_res/cover/4aa7aa3badfacf3cc84121334f1050dd.jpeg","OWL: Multi-agent collaboration","OWL: Optimized Workforce Learning for General Multi-Agent Assistance in Real-World Task Automation",{"id":156,"publish_date":157,"is_original":32,"collection":158,"cover_url":159,"cover_url_1_1":160,"title":161,"summary":162,"author":63},620,"2025-03-26","#LLM #Google #Gemini #AI Agent","article_res/cover/53751a6dbbe990b1eb0b63f3b062aed4.jpeg","article_res/cover/031344981f0a212ff82d1f3a64aa5756.jpeg","Gemini 2.5 Pro, claimed to be far ahead of the competition, has been released with great fanfare: comprehensively surpassing other LLMs and topping the global rankings","Gemini 2.5: Our most intelligent AI model",{"title":9,"items":164},[165,172,180],{"id":166,"publish_date":157,"is_original":32,"collection":167,"cover_url":168,"cover_url_1_1":169,"title":170,"summary":171,"author":63},619,"#OpenAI #AI Image Generator #4o #MMM #AR Transformer","article_res/cover/2faffc97fcecf3151552cb0fd3206d89.jpeg","article_res/cover/1133cb4948af44cee2e7fbe79efb69e5.jpeg","The native image function of GPT-4o is officially launched","Introducing 4o Image Generation",{"id":173,"publish_date":174,"is_original":4,"collection":175,"cover_url":176,"cover_url_1_1":177,"title":178,"summary":179,"author":63},434,"2023-07-15","#Anthropic #OpenAI #Google #AI Code Generator #Claude","article_res/cover/e1b6f600a2b9f262a4392684e5f2ce25.jpeg","article_res/cover/6e1772e83f78f9a351ab23d3e414adee.jpeg","Latest Updates on Google Bard /Anthropic Claude2 / ChatGPT Code Interpreter","We want our models to use their programming skills to provide more natural interfaces to the basic functions of our computers.  \n - OpenAI",{"id":181,"publish_date":182,"is_original":4,"collection":183,"cover_url":184,"cover_url_1_1":185,"title":186,"summary":187,"author":63},417,"2023-08-24","#OpenAI","article_res/cover/bccf897d50a88b18364e35f7466387e0.jpeg","article_res/cover/2f871085c1073717c1703ae86e18056f.jpeg","The GPT-3.5 Turbo fine-tuning (fine-tuning function) has been released～","Developers can now bring their own data to customize GPT-3.5 Turbo for their use cases.",{"title":10,"items":189},[190,191,192],{"id":88,"publish_date":89,"is_original":32,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":63},{"id":96,"publish_date":97,"is_original":32,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":63},{"id":104,"publish_date":105,"is_original":32,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":63},true,{"code":4,"msg":5,"data":195},{"id":196,"publish_date":197,"is_original":32,"collection":198,"articles_id":199,"cover_url":200,"cover_url_1_1":201,"title":202,"summary":203,"author":63,"content":204},351,"2024-01-04","#Tencent #AI Agent","RTyMIJI0C-EJB8sxp98dzg","article_res/cover/6cbc4e6da87f70a4653a0103da6fea4e.jpeg","article_res/cover/21a5401f37fda9c0a18f9c02caffba20.jpeg","Tencent's AppAgent: A multimodal intelligent agent that automatically interacts with applications","AppAgent: Multimodal Agents as Smartphone Users, an LLM-based multimodal agent framework designed to operate mobile apps","\u003Cdiv class=\"rich_media_content js_underline_content\n                       autoTypeSetting24psection\n            \" id=\"js_content\">\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Tencent released a paper last month, titled \"AppAgent: Multimodal Agents as Smartphone Users,\" with the paper link available at https://arxiv.org/abs/2312.13771.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003229\" data-ratio=\"0.912962962962963\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423812873830.5431710271501713.png\">\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>This agent can perform complex tasks and operate smartphone applications. By simplifying the action space to interact with smartphone apps, it mimics human tapping and swiping interactions, bypassing the need for backend system access, thereby expanding its applicability across various applications.\u003C/p>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"https://res.cooltool.vip/article_res/assets/17423812869340.5479170384283105.mp4\" poster=\"https://res.cooltool.vip/article_res/assets/17423812816710.35882315466030157.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>The core of its functionality lies in its innovative learning approach. The agent learns to navigate and use new applications either through autonomous exploration or by observing human demonstrations. This process generates a knowledge base that the agent uses to execute complex tasks across different applications, demonstrating proficiency in handling various advanced tasks.\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>\u003Cstrong>Method\u003C/strong>\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>App Agent operates in two phases: the exploration phase and the deployment phase.\u003C/p>\u003Col data-tool=\"mdnice编辑器\" class=\"list-paddingleft-1\" style='margin-top: 8px;margin-bottom: 8px;padding-left: 25px;width: 557.438px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;'>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">App Agent observes user interface interactions across different applications. After sufficient observation, App Agent becomes adept at using these applications. This knowledge is meticulously documented. Once the learning phase is complete, the agent is ready to take action.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003227\" data-ratio=\"0.7342592592592593\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423812871210.2806356580416518.jpeg\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">App Agent is capable of handling advanced tasks within any supported application. This method enables App Agent to efficiently complete various complex tasks across different applications.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003228\" data-ratio=\"0.912962962962963\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423812871150.11727157846883296.jpeg\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003C/ol>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>\u003Cstrong>DEMO\u003C/strong>\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Demonstration of App Agent's exploration and deployment on applications.\u003C/p>\u003Cul data-tool=\"mdnice编辑器\" class=\"list-paddingleft-1\" style='margin-top: 8px;margin-bottom: 8px;padding-left: 25px;width: 557.438px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;'>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">The demonstration video shows the process of using AppAgent during the deployment phase to follow a user on X (Twitter).\u003C/p>\u003C/section>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"https://res.cooltool.vip/article_res/assets/17423812818230.22262151416093956.mp4\" poster=\"https://res.cooltool.vip/article_res/assets/17423812818290.9216348500921288.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">An interesting experiment showcasing AppAgent's ability to pass CAPTCHA.\u003C/p>\u003C/section>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"https://res.cooltool.vip/article_res/assets/17423812826020.1109877067712075.mp4\" poster=\"https://res.cooltool.vip/article_res/assets/17423812818250.6677591548434423.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">An example of using a grid overlay to locate UI elements not labeled with numerical tags.\u003C/p>\u003C/section>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"https://res.cooltool.vip/article_res/assets/17423812849590.6437960641085598.mp4\" poster=\"https://res.cooltool.vip/article_res/assets/17423812837640.41003032002156714.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003C/li>\u003C/ul>\u003Cp style=\"display: none;\">\u003Cmp-style-type data-value=\"3\">\u003C/mp-style-type>\u003C/p>\u003C/div>",1752585428879]