[{"data":1,"prerenderedAt":205},["ShallowReactive",2],{"DlFXI4Eibt_Bn9lrEZz1TYbHCWFZj3IvqwHQSEW-Exc":3,"WuziK3ylIqa_Gr2eIKPPWyYuUcTaziR7MBWVZqtbloE":194},{"code":4,"msg":5,"data":6},0,"",{"category":7,"tag":11,"hot":39,"new":78,"banner":118,"data":143,"cache":193},[8,9,10],"Agent","OpenAI","LLM",[12,14,17,20,23,25,27,30,33,36],{"title":8,"total":13},39,{"title":15,"total":16},"Google",44,{"title":18,"total":19},"Nvidia",13,{"title":21,"total":22},"Claude",11,{"title":9,"total":24},35,{"title":10,"total":26},85,{"title":28,"total":29},"DeepSeek",9,{"title":31,"total":32},"OCR",1,{"title":34,"total":35},"Chat",7,{"title":37,"total":38},"Generator",116,[40,48,55,64,71],{"id":41,"publish_date":42,"is_original":4,"collection":5,"cover_url":43,"cover_url_1_1":44,"title":45,"summary":46,"author":47},557,"2022-04-29","article_res/cover/7a9b1375ed9bb298154981bae42b794d.jpeg","article_res/cover/afa281dd52bc0454e6735daa8e6b0706.jpeg","Translation and summary of Messari Report [2.8 Kristin Smith, Blockchain Association and Katie Haun, a16z]","We need unity and speed right now.","Translation",{"id":49,"publish_date":50,"is_original":4,"collection":5,"cover_url":51,"cover_url_1_1":52,"title":53,"summary":54,"author":47},531,"2022-05-25","article_res/cover/e8362057f8fa189594c60afdfaaeb6e5.jpeg","article_res/cover/8ea08d0d6fa7eee6b57ed4ec61b61ad6.jpeg","Decentralized Society: Finding Web3’s Soul / Decentralized Society: Finding the Soul of Web3 -7","Decentralization through Pluralism When analyzing ecosystems, it's desirable to measure how decentralized it is.",{"id":56,"publish_date":57,"is_original":32,"collection":58,"cover_url":59,"cover_url_1_1":60,"title":61,"summary":62,"author":63},127,"2024-11-14","#Google #AI Game #World Model #AI Story","article_res/cover/0233a875b7ec2debf59779e311547569.jpeg","article_res/cover/6ffddb6ae4914b3c699493311aa9f198.jpeg","Google Launches \"Unbounded\": A Generative Infinite Character Life Simulation Game","Unbounded: A Generative Infinite Game of Character Life Simulation","Renee's Entrepreneurial Journey",{"id":13,"publish_date":65,"is_original":32,"collection":66,"cover_url":67,"cover_url_1_1":68,"title":69,"summary":70,"author":63},"2025-02-14","#Deep Dive into LLMs #Andrej Karpathy #LLM #Tool Use #Hallucination","article_res/cover/11e858ad6b74dfa80f923d549b62855c.jpeg","article_res/cover/615e1b320f1fc163edc1d2d154a6de33.jpeg","Andrej Karpathy's in-depth explanation of LLM (Part 4): Hallucinations","hallucinations, tool use, knowledge/working memory",{"id":72,"publish_date":73,"is_original":4,"collection":5,"cover_url":74,"cover_url_1_1":75,"title":76,"summary":77,"author":47},579,"2022-04-07","article_res/cover/39387376ba28447af1eb40576b9df215.jpeg","article_res/cover/02727ede8551ed49901d0abe6d6305b7.jpeg","Messari Report Translation and Summary 【1-7 Surviving the Winter】","I’d be more cautious here: 10 year and 10 hour thinking only.",[79,87,95,103,111],{"id":80,"publish_date":81,"is_original":32,"collection":82,"cover_url":83,"cover_url_1_1":84,"title":85,"summary":86,"author":63},627,"2025-03-20","#AI Avatar #AI Video Generation","article_res/cover/d95481358f73924989f8c4ee9c75d1c8.jpeg","article_res/cover/b74bc0fab01f8b6a6aa87696c0c3ed8b.jpeg","DisPose: Generating Animated Videos by Driving Video with Reference Images","DisPose is a controllable human image animation method that enhances video generation.",{"id":88,"publish_date":89,"is_original":32,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":63},626,"2025-03-21","#Deep Dive into LLMs #LLM #RL #Andrej Karpathy #AlphaGo","article_res/cover/446553a5c8f8f2f07d97b20eaee84e56.jpeg","article_res/cover/e6c2823409c9b34624064b9acbaca6f1.jpeg","AlphaGo and the Power of Reinforcement Learning - Andrej Karpathy's Deep Dive on LLMs (Part 9)","Simply learning from humans will never surpass human capabilities.",{"id":96,"publish_date":97,"is_original":32,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":63},625,"2025-03-22","#Deep Dive into LLMs #LLM #RL #RLHF #Andrej Karpathy","article_res/cover/8da81d38b1e5cf558a164710fd8a5389.jpeg","article_res/cover/96f028d76c362a99a0dd56389e8f7a9b.jpeg","Reinforcement Learning from Human Feedback (RLHF) - Andrej Karpathy's Deep Dive on LLMs (Part 10)","Fine-Tuning Language Models from Human Preferences",{"id":104,"publish_date":105,"is_original":32,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":63},624,"2025-03-23","#Deep Dive into LLMs #LLM #Andrej Karpathy #AI Agent #MMM","article_res/cover/a5e7c3d48bb09109684d6513287c661d.jpeg","article_res/cover/d3f22b7c0ab8d82fd2da457a299e0773.jpeg","The Future of Large Language Models - Andrej Karpathy's In-Depth Explanation of LLM (Part 11)","preview of things to come",{"id":112,"publish_date":105,"is_original":32,"collection":113,"cover_url":114,"cover_url_1_1":115,"title":116,"summary":117,"author":63},623,"#Google #Voe #AI Video Generation","article_res/cover/c44062fea0f336c2b96b3928292392c2.jpeg","article_res/cover/a041041c69092ad3db191c5bf3ff981b.jpeg","Trial of Google's video generation model VOE2","Our state-of-the-art video generation model",[119,127,135],{"id":120,"publish_date":121,"is_original":32,"collection":122,"cover_url":123,"cover_url_1_1":124,"title":125,"summary":126,"author":63},160,"2024-10-04","#Philosophy","article_res/cover/496990c49211e8b7f996b7d39c18168e.jpeg","article_res/cover/14dbaa1ade9cb4316d5829423a900362.jpeg","Time","The fungus of the morning does not know the waxing and waning of the moon, and the cicada does not know the seasons; this is a short life. To the south of the state of Chu there is a dark spirit which regards five hundred years as spring and five hundred years as autumn. In ancient times there was a great tree called the Ming which regarded eight thousand years as spring and eight thousand years as autumn; this is a long life.",{"id":128,"publish_date":129,"is_original":32,"collection":130,"cover_url":131,"cover_url_1_1":132,"title":133,"summary":134,"author":63},98,"2024-12-17","#AI Video Generator #Sora #Pika","article_res/cover/3b86e85d03fff4f356a3e4cf2bb329c9.jpeg","article_res/cover/5fa5c20ad0b40f8f544d257c0ef02938.jpeg","Pika 2.0 video generation officially released: effect comparison with Sora","今天，我们推出了Pika 2.0模型。卓越的文字对齐效果。惊人的视觉表现。还有✨场景成分✨",{"id":136,"publish_date":137,"is_original":32,"collection":138,"cover_url":139,"cover_url_1_1":140,"title":141,"summary":142,"author":63},71,"2025-01-14","#Nvidia #World Foundation Model #Cosmos #Physical AI #Embodied AI","article_res/cover/feddf8c832dfb45d28804291f6a42a9e.jpeg","article_res/cover/d6bc2f1186d96b78228c2283a17a3645.jpeg","NVIDIA's Cosmos World Model","Cosmos World Foundation Model Platform for Physical AI",[144,163,188],{"title":8,"items":145},[146,147,155],{"id":104,"publish_date":105,"is_original":32,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":63},{"id":148,"publish_date":149,"is_original":32,"collection":150,"cover_url":151,"cover_url_1_1":152,"title":153,"summary":154,"author":63},622,"2025-03-24","#OWL #AI Agent #MAS #MCP #CUA","article_res/cover/cb50ca7f2bf4d1ed50202d7406e1c19a.jpeg","article_res/cover/4aa7aa3badfacf3cc84121334f1050dd.jpeg","OWL: Multi-agent collaboration","OWL: Optimized Workforce Learning for General Multi-Agent Assistance in Real-World Task Automation",{"id":156,"publish_date":157,"is_original":32,"collection":158,"cover_url":159,"cover_url_1_1":160,"title":161,"summary":162,"author":63},620,"2025-03-26","#LLM #Google #Gemini #AI Agent","article_res/cover/53751a6dbbe990b1eb0b63f3b062aed4.jpeg","article_res/cover/031344981f0a212ff82d1f3a64aa5756.jpeg","Gemini 2.5 Pro, claimed to be far ahead of the competition, has been released with great fanfare: comprehensively surpassing other LLMs and topping the global rankings","Gemini 2.5: Our most intelligent AI model",{"title":9,"items":164},[165,172,180],{"id":166,"publish_date":157,"is_original":32,"collection":167,"cover_url":168,"cover_url_1_1":169,"title":170,"summary":171,"author":63},619,"#OpenAI #AI Image Generator #4o #MMM #AR Transformer","article_res/cover/2faffc97fcecf3151552cb0fd3206d89.jpeg","article_res/cover/1133cb4948af44cee2e7fbe79efb69e5.jpeg","The native image function of GPT-4o is officially launched","Introducing 4o Image Generation",{"id":173,"publish_date":174,"is_original":4,"collection":175,"cover_url":176,"cover_url_1_1":177,"title":178,"summary":179,"author":63},434,"2023-07-15","#Anthropic #OpenAI #Google #AI Code Generator #Claude","article_res/cover/e1b6f600a2b9f262a4392684e5f2ce25.jpeg","article_res/cover/6e1772e83f78f9a351ab23d3e414adee.jpeg","Latest Updates on Google Bard /Anthropic Claude2 / ChatGPT Code Interpreter","We want our models to use their programming skills to provide more natural interfaces to the basic functions of our computers.  \n - OpenAI",{"id":181,"publish_date":182,"is_original":4,"collection":183,"cover_url":184,"cover_url_1_1":185,"title":186,"summary":187,"author":63},417,"2023-08-24","#OpenAI","article_res/cover/bccf897d50a88b18364e35f7466387e0.jpeg","article_res/cover/2f871085c1073717c1703ae86e18056f.jpeg","The GPT-3.5 Turbo fine-tuning (fine-tuning function) has been released～","Developers can now bring their own data to customize GPT-3.5 Turbo for their use cases.",{"title":10,"items":189},[190,191,192],{"id":88,"publish_date":89,"is_original":32,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":63},{"id":96,"publish_date":97,"is_original":32,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":63},{"id":104,"publish_date":105,"is_original":32,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":63},true,{"code":4,"msg":5,"data":195},{"id":196,"publish_date":197,"is_original":32,"collection":198,"articles_id":199,"cover_url":200,"cover_url_1_1":201,"title":202,"summary":203,"author":63,"content":204},70,"2025-01-15","#Google #LLM","PxWmCIIQFQqg8x7_tGbVtQ","article_res/cover/b014765b22653d87d4b09437b3578b8d.jpeg","article_res/cover/714f4bec5c754f37ed7679e409f188f9.jpeg","Google has released TITANS, the successor to the Transformer architecture","Titans: Learning to Memorize at Test Time","\u003Cdiv class=\"rich_media_content js_underline_content\n                       autoTypeSetting24psection\n            \" id=\"js_content\">\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>First, the term \"successor to the Transformer architecture\" above 👆🏻 is something I saw someone use on x.com. I don't have the capability to judge the importance of TITANS; I'm just learning about it.\u003C/p>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The attention mechanism (Attention) has been key to the progress of most large language models (LLMs), but it cannot scale to long contexts.\u003C/p>\u003Cblockquote style='margin-top: 20px;margin-bottom: 20px;padding: 10px 10px 10px 20px;border-top: 3px none rgba(0, 0, 0, 0.4);border-right: 3px none rgba(0, 0, 0, 0.4);border-bottom: 3px none rgba(0, 0, 0, 0.4);border-left-color: rgba(0, 0, 0, 0.4);color: rgb(0, 0, 0);font-size: 16px;;border-radius: 0px;background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0.05);width: auto;height: auto;box-shadow: rgba(0, 0, 0, 0) 0px 0px 0px 0px;overflow: auto;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;'>\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;;line-height: 1.8em;letter-spacing: 0em;text-indent: 0em;\">“The true art of memory is the art of attention!\"\u003C/p>\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;;line-height: 1.8em;letter-spacing: 0em;text-indent: 0em;\">— Samuel Johnson, 1787\u003C/p>\u003C/blockquote>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>TITANS is a new architecture that combines the attention mechanism with a meta context memory, allowing it to learn how to remember at test time. Compared to Transformers and modern linear RNNs, TITANS outperforms them in performance and can effectively scale to a context window of over 2M, surpassing even very large models like GPT-4 and Llama3-80B.\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection data-mpa-template-id=\"1024\" data-mpa-category=\"模板\" mpa-from-tpl=\"t\" data-mpa-material-zoom=\"0\">\u003Csection data-mpa-category=\"模板\" yb-mpa-mark=\"mark-main\" style=\"width: 100%;padding: 0 14px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 100%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"padding: 12px 12px 21px 12px;border-top: 2px solid #1D7850;border-right: 2px solid #1D7850;border-left: 2px solid #1D7850;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: center;align-items: center;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg class=\"rich_pages wxw-img\" data-cropselx1=\"0\" data-cropselx2=\"522\" data-cropsely1=\"0\" data-cropsely2=\"690\" data-imgfileid=\"100009203\" data-ratio=\"1.1027777777777779\" data-w=\"1080\" style=\"width: 578px;height: 637px;\" src=\"https://res.cooltool.vip/article_res/assets/17423771795820.9296873986667242.jpeg\">\u003C/section>\u003C/section>\u003Csection style=\"width: 100%;display: flex;align-items: flex-start;justify-content: space-between;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 60px;height: 2px;border-top: 2px solid #1D7850;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr mpa-from-tpl=\"t\">\u003C/section>\u003Csection data-mid=\"\" mpa-from-tpl=\"t\" style=\"margin-top: -11px;margin-bottom: 0px;text-align: center;\">\u003Cp data-mid=\"\" style=\"font-weight: bold;font-size: 18px;color: rgb(228, 91, 85);line-height: 25px;word-break: break-word;\">https://arxiv.org/pdf/2501.00663v1\u003C/p>\u003C/section>\u003Csection data-mid=\"\" mpa-from-tpl=\"t\" style=\"margin-bottom: 0px;width: 60px;height: 2px;border-top: 2px solid rgb(29, 120, 80);\">\u003Cbr mpa-from-tpl=\"t\" style=\"letter-spacing: 0.578px;\">\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>In summary, the introduction of the TITANS architecture is an innovative attempt to address the problem of long contexts and improve memory capabilities. Compared to traditional Transformer architectures, TITANS maintains efficient performance over a larger context window and can dynamically remember during testing, demonstrating its potential.\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: center;align-items: center;width: 100%;padding: 0px 6px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: flex-start;align-items: center;flex-direction: column;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 14px;height: 37px;align-self: flex-start;display: flex;justify-content: center;align-items: center;margin-left: -20.1px;margin-top: 10px;margin-bottom: -27.1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009168\" data-ratio=\"2.7142857142857144\" data-w=\"28\" src=\"https://res.cooltool.vip/article_res/assets/17423771795750.31696045001723916.png\">\u003C/section>\u003Csection style=\"display: flex;justify-content: center;align-items: center;align-self: flex-end;margin-right: 8px;margin-bottom: 5px;z-index: 1;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 3px;height: 3px;background: #FFFFFF;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFDDDD;border-radius: 50%;margin-left: 5px;margin-right: 5px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFE730;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003C/section>\u003Csection style=\"width: 8px;height: 10px;align-self: flex-end;margin-right: -12.1px;margin-bottom: -20.1px;display: flex;justify-content: center;align-items: center;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009169\" data-ratio=\"1.25\" data-w=\"16\" src=\"https://res.cooltool.vip/article_res/assets/17423771795740.2483567694571971.png\">\u003C/section>\u003Csection style=\"text-align: left;padding: 6px 14px 7px 15px;border-width: 7px 1px 1px;border-style: solid;border-color: rgb(0, 0, 0);\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cp style=\"font-size: 16px;font-family: PingFangSC-Semibold, PingFang SC;font-weight: bold;color: #333333;line-height: 22px;letter-spacing: 1px;\" data-mid=\"\">How to design long-term memory?\u003C/p>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The TITANS team approached this question from the perspective of human memory. Human short-term memory is very accurate but has a limited window (about 30 seconds). So, how to handle longer contexts? The TITANS team used other types of memory systems to store potentially useful information.\u003C/p>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>They believe that the attention mechanism, due to its limited context window and accurate dependency modeling, serves as short-term memory. Therefore, TITANS needs a neural network memory module that can remember a longer history as a long-term and more persistent memory.\u003C/p>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>: the memory system is responsible for storing information, but remembering training data may be useless during testing because the test data distribution may differ from the training data. Therefore, the TITANS team needs to teach the memory module how to remember/forget information during testing.\u003C/p>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>To this end, the TITANS team proposed: encoding past history into the parameters of the neural network (similar to TTT) and training an online meta-model to learn how to remember/forget data during testing.\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: center;align-items: center;width: 100%;padding: 0px 6px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: flex-start;align-items: center;flex-direction: column;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 14px;height: 37px;align-self: flex-start;display: flex;justify-content: center;align-items: center;margin-left: -20.1px;margin-top: 10px;margin-bottom: -27.1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg data-imgfileid=\"100009171\" data-ratio=\"2.7142857142857144\" data-w=\"28\" src=\"https://res.cooltool.vip/article_res/assets/17423771795740.6007350348544211.png\">\u003C/section>\u003Csection style=\"display: flex;justify-content: center;align-items: center;align-self: flex-end;margin-right: 8px;margin-bottom: 5px;z-index: 1;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 3px;height: 3px;background: #FFFFFF;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFDDDD;border-radius: 50%;margin-left: 5px;margin-right: 5px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFE730;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003C/section>\u003Csection style=\"width: 8px;height: 10px;align-self: flex-end;margin-right: -12.1px;margin-bottom: -20.1px;display: flex;justify-content: center;align-items: center;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009170\" data-ratio=\"1.25\" data-w=\"16\" src=\"https://res.cooltool.vip/article_res/assets/17423771797360.07401385131989735.png\">\u003C/section>\u003Csection style=\"text-align: left;padding: 6px 14px 7px 15px;border-width: 7px 1px 1px;border-style: solid;border-color: rgb(0, 0, 0);\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cp style=\"font-size: 16px;font-family: PingFangSC-Semibold, PingFang SC;font-weight: bold;color: #333333;line-height: 22px;letter-spacing: 1px;\" data-mid=\"\">Which tokens need to be remembered?\u003C/p>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The TITANS team again approached this question from the perspective of human memory. The human brain prioritizes remembering events that defy expectations (i.e., surprising events). However, although an event may be surprising at one moment, it may not continue to surprise us. The initial moment is enough to draw attention, thus remembering the entire timeframe.\u003C/p>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The TITANS team simulated this process to train long-term memory, dividing the surprise of a token into:\u003C/p>\u003Col style='margin-top: 8px;margin-bottom: 8px;;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">\u003Cstrong style=\";color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">Instantaneous surprise\u003C/strong>\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">\u003Cstrong style=\";color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">(Decaying) past surprise\u003C/strong>\u003C/section>\u003C/li>\u003C/ol>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>Instantaneous surprise is measured by the gradient between the memory and the incoming token, while past surprise is the decaying cumulative value of past tokens.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100009182\" data-ratio=\"0.16296296296296298\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423771798170.9766011693633585.jpeg\">\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: center;align-items: center;width: 100%;padding: 0px 6px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: flex-start;align-items: center;flex-direction: column;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 14px;height: 37px;align-self: flex-start;display: flex;justify-content: center;align-items: center;margin-left: -20.1px;margin-top: 10px;margin-bottom: -27.1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg data-imgfileid=\"100009172\" data-ratio=\"2.7142857142857144\" data-w=\"28\" src=\"https://res.cooltool.vip/article_res/assets/17423771797270.6140339382779112.png\">\u003C/section>\u003Csection style=\"display: flex;justify-content: center;align-items: center;align-self: flex-end;margin-right: 8px;margin-bottom: 5px;z-index: 1;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 3px;height: 3px;background: #FFFFFF;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFDDDD;border-radius: 50%;margin-left: 5px;margin-right: 5px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFE730;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003C/section>\u003Csection style=\"width: 8px;height: 10px;align-self: flex-end;margin-right: -12.1px;margin-bottom: -20.1px;display: flex;justify-content: center;align-items: center;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg data-imgfileid=\"100009173\" data-ratio=\"1.25\" data-w=\"16\" src=\"https://res.cooltool.vip/article_res/assets/17423771795820.07590436488943664.png\">\u003C/section>\u003Csection style=\"text-align: left;padding: 6px 14px 7px 15px;border-width: 7px 1px 1px;border-style: solid;border-color: rgb(0, 0, 0);\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cp style=\"font-size: 16px;font-family: PingFangSC-Semibold, PingFang SC;font-weight: bold;color: #333333;line-height: 22px;letter-spacing: 1px;\" data-mid=\"\">How is memory forgotten?\u003C/p>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>in the memory update rule. Interestingly, this weight decay can be seen as a generalized form of data-dependent gating in RNNs, utilizing matrix or vector-valued memory.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100009183\" data-ratio=\"0.12314814814814815\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423771795760.6322422517538382.jpeg\">\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: center;align-items: center;width: 100%;padding: 0px 6px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: flex-start;align-items: center;flex-direction: column;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 14px;height: 37px;align-self: flex-start;display: flex;justify-content: center;align-items: center;margin-left: -20.1px;margin-top: 10px;margin-bottom: -27.1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg data-imgfileid=\"100009175\" data-ratio=\"2.7142857142857144\" data-w=\"28\" src=\"https://res.cooltool.vip/article_res/assets/17423771798290.40493392122197336.png\">\u003C/section>\u003Csection style=\"display: flex;justify-content: center;align-items: center;align-self: flex-end;margin-right: 8px;margin-bottom: 5px;z-index: 1;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 3px;height: 3px;background: #FFFFFF;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFDDDD;border-radius: 50%;margin-left: 5px;margin-right: 5px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFE730;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003C/section>\u003Csection style=\"width: 8px;height: 10px;align-self: flex-end;margin-right: -12.1px;margin-bottom: -20.1px;display: flex;justify-content: center;align-items: center;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg data-imgfileid=\"100009174\" data-ratio=\"1.25\" data-w=\"16\" src=\"https://res.cooltool.vip/article_res/assets/17423771795770.4555224796705941.png\">\u003C/section>\u003Csection style=\"text-align: left;padding: 6px 14px 7px 15px;border-width: 7px 1px 1px;border-style: solid;border-color: rgb(0, 0, 0);\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cp style=\"font-size: 16px;font-family: PingFangSC-Semibold, PingFang SC;font-weight: bold;color: #333333;line-height: 22px;letter-spacing: 1px;\" data-mid=\"\">Is this design parallelizable?\u003C/p>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>and combined it with weight decay through additional matrix multiplication. So, how to handle the decaying past surprise? The TITANS team realized that it could be calculated through parallel scanning within each mini-batch.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100009184\" data-ratio=\"0.26666666666666666\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423771809620.7198599840749913.jpeg\">\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: center;align-items: center;width: 100%;padding: 0px 6px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: flex-start;align-items: center;flex-direction: column;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 14px;height: 37px;align-self: flex-start;display: flex;justify-content: center;align-items: center;margin-left: -20.1px;margin-top: 10px;margin-bottom: -27.1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009176\" data-ratio=\"2.7142857142857144\" data-w=\"28\" src=\"https://res.cooltool.vip/article_res/assets/17423771797280.4037294704491825.png\">\u003C/section>\u003Csection style=\"display: flex;justify-content: center;align-items: center;align-self: flex-end;margin-right: 8px;margin-bottom: 5px;z-index: 1;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 3px;height: 3px;background: #FFFFFF;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFDDDD;border-radius: 50%;margin-left: 5px;margin-right: 5px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFE730;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003C/section>\u003Csection style=\"width: 8px;height: 10px;align-self: flex-end;margin-right: -12.1px;margin-bottom: -20.1px;display: flex;justify-content: center;align-items: center;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg data-imgfileid=\"100009177\" data-ratio=\"1.25\" data-w=\"16\" src=\"https://res.cooltool.vip/article_res/assets/17423771795800.8474519930792841.png\">\u003C/section>\u003Csection style=\"text-align: left;padding: 6px 14px 7px 15px;border-width: 7px 1px 1px;border-style: solid;border-color: rgb(0, 0, 0);\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cp style=\"font-size: 16px;font-family: PingFangSC-Semibold, PingFang SC;font-weight: bold;color: #333333;line-height: 22px;letter-spacing: 1px;\" data-mid=\"\">How to integrate memory?\u003C/p>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The TITANS team demonstrated three architectural variants where memory can serve as:\u003C/p>\u003Col style='margin-top: 8px;margin-bottom: 8px;;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">\u003Cstrong style=\";color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">context\u003C/strong>\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100009185\" data-ratio=\"0.45\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423771805050.7307751076615703.jpeg\">\u003C/p>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">\u003Cstrong style=\";color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">head\u003C/strong>\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100009202\" data-ratio=\"0.45\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423771804820.21356340266253326.jpeg\">\u003C/p>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">\u003Cstrong style=\";color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">layer\u003C/strong>\u003C/section>\u003C/li>\u003C/ol>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>divides the input into segments (which can be large, even equal to the context window of current attention-based large language models) and uses past memory states to extract corresponding memories, which are then updated through attention outputs.\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: center;align-items: center;width: 100%;padding: 0px 6px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: flex-start;align-items: center;flex-direction: column;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 14px;height: 37px;align-self: flex-start;display: flex;justify-content: center;align-items: center;margin-left: -20.1px;margin-top: 10px;margin-bottom: -27.1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009178\" data-ratio=\"2.7142857142857144\" data-w=\"28\" src=\"https://res.cooltool.vip/article_res/assets/17423771795790.24203358995553037.png\">\u003C/section>\u003Csection style=\"display: flex;justify-content: center;align-items: center;align-self: flex-end;margin-right: 8px;margin-bottom: 5px;z-index: 1;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 3px;height: 3px;background: #FFFFFF;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFDDDD;border-radius: 50%;margin-left: 5px;margin-right: 5px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFE730;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003C/section>\u003Csection style=\"width: 8px;height: 10px;align-self: flex-end;margin-right: -12.1px;margin-bottom: -20.1px;display: flex;justify-content: center;align-items: center;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg data-imgfileid=\"100009179\" data-ratio=\"1.25\" data-w=\"16\" src=\"https://res.cooltool.vip/article_res/assets/17423771797280.7438169491539515.png\">\u003C/section>\u003Csection style=\"text-align: left;padding: 6px 14px 7px 15px;border-width: 7px 1px 1px;border-style: solid;border-color: rgb(0, 0, 0);\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cp style=\"font-size: 16px;font-family: PingFangSC-Semibold, PingFang SC;font-weight: bold;color: #333333;line-height: 22px;letter-spacing: 1px;\" data-mid=\"\">TITANS' performance in experiments\u003C/p>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The TITANS team focused on language modeling, common sense reasoning, \"needle in a haystack,\" and time series prediction tasks,\u003C/p>\u003Cp>\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009198\" data-ratio=\"1.0972222222222223\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423771814550.5832040416976465.jpeg\">\u003C/p>\u003Cp>\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009190\" data-ratio=\"0.3148148148148148\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423771805990.23428925418218038.jpeg\">\u003C/p>\u003Cp>\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009189\" data-ratio=\"0.29074074074074074\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423771795900.9226449062306954.png\">\u003C/p>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>GPT-4 and Llama3-80B.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100009192\" data-ratio=\"0.40185185185185185\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423771797460.9067818671890819.jpeg\">\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: center;align-items: center;width: 100%;padding: 0px 6px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: flex-start;align-items: center;flex-direction: column;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 14px;height: 37px;align-self: flex-start;display: flex;justify-content: center;align-items: center;margin-left: -20.1px;margin-top: 10px;margin-bottom: -27.1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009181\" data-ratio=\"2.7142857142857144\" data-w=\"28\" src=\"https://res.cooltool.vip/article_res/assets/17423771797470.4529312295553136.png\">\u003C/section>\u003Csection style=\"display: flex;justify-content: center;align-items: center;align-self: flex-end;margin-right: 8px;margin-bottom: 5px;z-index: 1;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 3px;height: 3px;background: #FFFFFF;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFDDDD;border-radius: 50%;margin-left: 5px;margin-right: 5px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFE730;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003C/section>\u003Csection style=\"width: 8px;height: 10px;align-self: flex-end;margin-right: -12.1px;margin-bottom: -20.1px;display: flex;justify-content: center;align-items: center;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg data-imgfileid=\"100009180\" data-ratio=\"1.25\" data-w=\"16\" src=\"https://res.cooltool.vip/article_res/assets/17423771797360.3545994181030392.png\">\u003C/section>\u003Csection style=\"text-align: left;padding: 6px 14px 7px 15px;border-width: 7px 1px 1px;border-style: solid;border-color: rgb(0, 0, 0);\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cp style=\"font-size: 16px;font-family: PingFangSC-Semibold, PingFang SC;font-weight: bold;color: #333333;line-height: 22px;letter-spacing: 1px;\" data-mid=\"\">Summary\u003C/p>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The TITANS architecture demonstrates how to solve the problem of long contexts by combining a dynamic memory module with the attention mechanism. It far exceeds existing Transformer and RNN architectures in performance (according to the authors) and, through diverse memory mechanisms, can handle different tasks, showcasing its advantages in processing large context windows.\u003C/p>\u003Cp style=\"display: none;\">\u003Cmp-style-type data-value=\"3\">\u003C/mp-style-type>\u003C/p>\u003C/div>",1752585424225]