[{"data":1,"prerenderedAt":205},["ShallowReactive",2],{"DlFXI4Eibt_Bn9lrEZz1TYbHCWFZj3IvqwHQSEW-Exc":3,"Yj_za7r7QaeuyNotMOhSrymdYpzNwsiygnGOBVj9n24":194},{"code":4,"msg":5,"data":6},0,"",{"category":7,"tag":11,"hot":39,"new":78,"banner":118,"data":143,"cache":193},[8,9,10],"Agent","OpenAI","LLM",[12,14,17,20,23,25,27,30,33,36],{"title":8,"total":13},39,{"title":15,"total":16},"Google",44,{"title":18,"total":19},"Nvidia",13,{"title":21,"total":22},"Claude",11,{"title":9,"total":24},35,{"title":10,"total":26},85,{"title":28,"total":29},"DeepSeek",9,{"title":31,"total":32},"OCR",1,{"title":34,"total":35},"Chat",7,{"title":37,"total":38},"Generator",116,[40,48,55,64,71],{"id":41,"publish_date":42,"is_original":4,"collection":5,"cover_url":43,"cover_url_1_1":44,"title":45,"summary":46,"author":47},557,"2022-04-29","article_res/cover/7a9b1375ed9bb298154981bae42b794d.jpeg","article_res/cover/afa281dd52bc0454e6735daa8e6b0706.jpeg","Translation and summary of Messari Report [2.8 Kristin Smith, Blockchain Association and Katie Haun, a16z]","We need unity and speed right now.","Translation",{"id":49,"publish_date":50,"is_original":4,"collection":5,"cover_url":51,"cover_url_1_1":52,"title":53,"summary":54,"author":47},531,"2022-05-25","article_res/cover/e8362057f8fa189594c60afdfaaeb6e5.jpeg","article_res/cover/8ea08d0d6fa7eee6b57ed4ec61b61ad6.jpeg","Decentralized Society: Finding Web3’s Soul / Decentralized Society: Finding the Soul of Web3 -7","Decentralization through Pluralism When analyzing ecosystems, it's desirable to measure how decentralized it is.",{"id":56,"publish_date":57,"is_original":32,"collection":58,"cover_url":59,"cover_url_1_1":60,"title":61,"summary":62,"author":63},127,"2024-11-14","#Google #AI Game #World Model #AI Story","article_res/cover/0233a875b7ec2debf59779e311547569.jpeg","article_res/cover/6ffddb6ae4914b3c699493311aa9f198.jpeg","Google Launches \"Unbounded\": A Generative Infinite Character Life Simulation Game","Unbounded: A Generative Infinite Game of Character Life Simulation","Renee's Entrepreneurial Journey",{"id":13,"publish_date":65,"is_original":32,"collection":66,"cover_url":67,"cover_url_1_1":68,"title":69,"summary":70,"author":63},"2025-02-14","#Deep Dive into LLMs #Andrej Karpathy #LLM #Tool Use #Hallucination","article_res/cover/11e858ad6b74dfa80f923d549b62855c.jpeg","article_res/cover/615e1b320f1fc163edc1d2d154a6de33.jpeg","Andrej Karpathy's in-depth explanation of LLM (Part 4): Hallucinations","hallucinations, tool use, knowledge/working memory",{"id":72,"publish_date":73,"is_original":4,"collection":5,"cover_url":74,"cover_url_1_1":75,"title":76,"summary":77,"author":47},579,"2022-04-07","article_res/cover/39387376ba28447af1eb40576b9df215.jpeg","article_res/cover/02727ede8551ed49901d0abe6d6305b7.jpeg","Messari Report Translation and Summary 【1-7 Surviving the Winter】","I’d be more cautious here: 10 year and 10 hour thinking only.",[79,87,95,103,111],{"id":80,"publish_date":81,"is_original":32,"collection":82,"cover_url":83,"cover_url_1_1":84,"title":85,"summary":86,"author":63},627,"2025-03-20","#AI Avatar #AI Video Generation","article_res/cover/d95481358f73924989f8c4ee9c75d1c8.jpeg","article_res/cover/b74bc0fab01f8b6a6aa87696c0c3ed8b.jpeg","DisPose: Generating Animated Videos by Driving Video with Reference Images","DisPose is a controllable human image animation method that enhances video generation.",{"id":88,"publish_date":89,"is_original":32,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":63},626,"2025-03-21","#Deep Dive into LLMs #LLM #RL #Andrej Karpathy #AlphaGo","article_res/cover/446553a5c8f8f2f07d97b20eaee84e56.jpeg","article_res/cover/e6c2823409c9b34624064b9acbaca6f1.jpeg","AlphaGo and the Power of Reinforcement Learning - Andrej Karpathy's Deep Dive on LLMs (Part 9)","Simply learning from humans will never surpass human capabilities.",{"id":96,"publish_date":97,"is_original":32,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":63},625,"2025-03-22","#Deep Dive into LLMs #LLM #RL #RLHF #Andrej Karpathy","article_res/cover/8da81d38b1e5cf558a164710fd8a5389.jpeg","article_res/cover/96f028d76c362a99a0dd56389e8f7a9b.jpeg","Reinforcement Learning from Human Feedback (RLHF) - Andrej Karpathy's Deep Dive on LLMs (Part 10)","Fine-Tuning Language Models from Human Preferences",{"id":104,"publish_date":105,"is_original":32,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":63},624,"2025-03-23","#Deep Dive into LLMs #LLM #Andrej Karpathy #AI Agent #MMM","article_res/cover/a5e7c3d48bb09109684d6513287c661d.jpeg","article_res/cover/d3f22b7c0ab8d82fd2da457a299e0773.jpeg","The Future of Large Language Models - Andrej Karpathy's In-Depth Explanation of LLM (Part 11)","preview of things to come",{"id":112,"publish_date":105,"is_original":32,"collection":113,"cover_url":114,"cover_url_1_1":115,"title":116,"summary":117,"author":63},623,"#Google #Voe #AI Video Generation","article_res/cover/c44062fea0f336c2b96b3928292392c2.jpeg","article_res/cover/a041041c69092ad3db191c5bf3ff981b.jpeg","Trial of Google's video generation model VOE2","Our state-of-the-art video generation model",[119,127,135],{"id":120,"publish_date":121,"is_original":32,"collection":122,"cover_url":123,"cover_url_1_1":124,"title":125,"summary":126,"author":63},160,"2024-10-04","#Philosophy","article_res/cover/496990c49211e8b7f996b7d39c18168e.jpeg","article_res/cover/14dbaa1ade9cb4316d5829423a900362.jpeg","Time","The fungus of the morning does not know the waxing and waning of the moon, and the cicada does not know the seasons; this is a short life. To the south of the state of Chu there is a dark spirit which regards five hundred years as spring and five hundred years as autumn. In ancient times there was a great tree called the Ming which regarded eight thousand years as spring and eight thousand years as autumn; this is a long life.",{"id":128,"publish_date":129,"is_original":32,"collection":130,"cover_url":131,"cover_url_1_1":132,"title":133,"summary":134,"author":63},98,"2024-12-17","#AI Video Generator #Sora #Pika","article_res/cover/3b86e85d03fff4f356a3e4cf2bb329c9.jpeg","article_res/cover/5fa5c20ad0b40f8f544d257c0ef02938.jpeg","Pika 2.0 video generation officially released: effect comparison with Sora","今天，我们推出了Pika 2.0模型。卓越的文字对齐效果。惊人的视觉表现。还有✨场景成分✨",{"id":136,"publish_date":137,"is_original":32,"collection":138,"cover_url":139,"cover_url_1_1":140,"title":141,"summary":142,"author":63},71,"2025-01-14","#Nvidia #World Foundation Model #Cosmos #Physical AI #Embodied AI","article_res/cover/feddf8c832dfb45d28804291f6a42a9e.jpeg","article_res/cover/d6bc2f1186d96b78228c2283a17a3645.jpeg","NVIDIA's Cosmos World Model","Cosmos World Foundation Model Platform for Physical AI",[144,163,188],{"title":8,"items":145},[146,147,155],{"id":104,"publish_date":105,"is_original":32,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":63},{"id":148,"publish_date":149,"is_original":32,"collection":150,"cover_url":151,"cover_url_1_1":152,"title":153,"summary":154,"author":63},622,"2025-03-24","#OWL #AI Agent #MAS #MCP #CUA","article_res/cover/cb50ca7f2bf4d1ed50202d7406e1c19a.jpeg","article_res/cover/4aa7aa3badfacf3cc84121334f1050dd.jpeg","OWL: Multi-agent collaboration","OWL: Optimized Workforce Learning for General Multi-Agent Assistance in Real-World Task Automation",{"id":156,"publish_date":157,"is_original":32,"collection":158,"cover_url":159,"cover_url_1_1":160,"title":161,"summary":162,"author":63},620,"2025-03-26","#LLM #Google #Gemini #AI Agent","article_res/cover/53751a6dbbe990b1eb0b63f3b062aed4.jpeg","article_res/cover/031344981f0a212ff82d1f3a64aa5756.jpeg","Gemini 2.5 Pro, claimed to be far ahead of the competition, has been released with great fanfare: comprehensively surpassing other LLMs and topping the global rankings","Gemini 2.5: Our most intelligent AI model",{"title":9,"items":164},[165,172,180],{"id":166,"publish_date":157,"is_original":32,"collection":167,"cover_url":168,"cover_url_1_1":169,"title":170,"summary":171,"author":63},619,"#OpenAI #AI Image Generator #4o #MMM #AR Transformer","article_res/cover/2faffc97fcecf3151552cb0fd3206d89.jpeg","article_res/cover/1133cb4948af44cee2e7fbe79efb69e5.jpeg","The native image function of GPT-4o is officially launched","Introducing 4o Image Generation",{"id":173,"publish_date":174,"is_original":4,"collection":175,"cover_url":176,"cover_url_1_1":177,"title":178,"summary":179,"author":63},434,"2023-07-15","#Anthropic #OpenAI #Google #AI Code Generator #Claude","article_res/cover/e1b6f600a2b9f262a4392684e5f2ce25.jpeg","article_res/cover/6e1772e83f78f9a351ab23d3e414adee.jpeg","Latest Updates on Google Bard /Anthropic Claude2 / ChatGPT Code Interpreter","We want our models to use their programming skills to provide more natural interfaces to the basic functions of our computers.  \n - OpenAI",{"id":181,"publish_date":182,"is_original":4,"collection":183,"cover_url":184,"cover_url_1_1":185,"title":186,"summary":187,"author":63},417,"2023-08-24","#OpenAI","article_res/cover/bccf897d50a88b18364e35f7466387e0.jpeg","article_res/cover/2f871085c1073717c1703ae86e18056f.jpeg","The GPT-3.5 Turbo fine-tuning (fine-tuning function) has been released～","Developers can now bring their own data to customize GPT-3.5 Turbo for their use cases.",{"title":10,"items":189},[190,191,192],{"id":88,"publish_date":89,"is_original":32,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":63},{"id":96,"publish_date":97,"is_original":32,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":63},{"id":104,"publish_date":105,"is_original":32,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":63},true,{"code":4,"msg":5,"data":195},{"id":196,"publish_date":197,"is_original":32,"collection":198,"articles_id":199,"cover_url":200,"cover_url_1_1":201,"title":202,"summary":203,"author":63,"content":204},41,"2025-02-12","#Deep Dive into LLMs #Andrej Karpathy #GPT-2 #LLama #LLM","3vRrRwkHUUmSNyi627wXfQ","article_res/cover/b4bb4f98373186a3310457dc2198da22.jpeg","article_res/cover/2a7698249de29b6392907c437a9ab149.jpeg","Andrej Karpathy Deep Dive on LLM (Part 2): Understanding Training and Inference through GPT-2 and Llama 3.1","GPT-2: training and inference  \nLlama 3.1 base model inference","\u003Cdiv class=\"rich_media_content js_underline_content\n                       autoTypeSetting24psection\n            \" id=\"js_content\">\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>), today we will look at two examples.\u003C/p>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>\u003Cbr>\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 100%;padding: 0px 12px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 100%;display: flex;align-items: center;justify-content: space-between;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;align-items: center;width: 100%;height: 5px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"flex-shrink: 0;width: 6px;height: 6px;background: #4499e7;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr mpa-from-tpl=\"t\">\u003C/section>\u003Csection style=\"width: 100%;border-top: 1px solid #4499e7;height: 1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr mpa-from-tpl=\"t\">\u003C/section>\u003C/section>\u003Csection style=\"flex-shrink: 0;min-width: 79px;text-align: center;background: #4499e7;padding: 5px 15px 4px 15px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cp style=\"color: #ffffff;font-size: 16px;line-height: 17px;word-break: break-word;\" data-mid=\"\">GPT-2\u003C/p>\u003C/section>\u003Csection style=\"display: flex;align-items: center;width: 100%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 100%;border-top: 1px solid #4499e7;height: 1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr mpa-from-tpl=\"t\">\u003C/section>\u003Csection style=\"flex-shrink: 0;width: 6px;height: 6px;background: #4499e7;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr mpa-from-tpl=\"t\">\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\";font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">GPT-2: Training and Inference\u003C/strong>\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>. Its core technology is still in use today, just with a significant increase in scale and computational power.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010105\" data-ratio=\"0.7490740740740741\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770900780.6247644945001574.png\">\u003C/p>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\";font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">Basic parameters of GPT-2\u003C/strong>\u003C/span>\u003C/h3>\u003Cul style='margin-top: 8px;margin-bottom: 8px;;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, GPT-2 is relatively small.\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, while the context window of the latest models has been expanded to\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, it is much smaller.\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">Andrej himself replicated a GPT-2: https://github.com/karpathy/llm.c/discussions/677\u003C/section>\u003C/li>\u003C/ul>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\";font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">GPT-2 Training Process\u003C/strong>\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The essence of GPT-2 training is\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010119\" data-ratio=\"0.4166666666666667\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770901060.2746802975958378.png\">\u003C/p>\u003Col style='margin-top: 8px;margin-bottom: 8px;;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, the initial output is completely random.\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, improve the prediction of tokens in 1 million training data, calculate the error (Loss) of the current prediction.\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, improve the accuracy of the next token prediction.\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, each time updating the model's weights to gradually enhance its predictive ability. Each step takes about 7 seconds.\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010120\" data-ratio=\"0.49537037037037035\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770900780.5173050587231816.png\">\u003C/p>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">\u003Cstrong style=\";color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">As training progresses, the generated text evolves from random characters to coherent and readable content.\u003C/strong>。\u003C/section>\u003C/li>\u003C/ol>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\";font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">Training Cost\u003C/strong>\u003C/span>\u003C/h3>\u003Cul style='margin-top: 8px;margin-bottom: 8px;;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">The estimated cost of training GPT-2 in 2019 was\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, the cost of training a model of the same scale today may have decreased to\u003C/section>\u003C/li>\u003C/ul>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The main reasons for the decrease in training costs:\u003C/p>\u003Col style='margin-top: 8px;margin-bottom: 8px;;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, reducing useless data and improving training efficiency.\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, such as a significant increase in GPU computing power, optimizing training speed.\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, such as more efficient training frameworks, allowing the same computing resources to accomplish more tasks.\u003C/section>\u003C/li>\u003C/ol>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\";font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">Training Progress\u003C/strong>\u003C/span>\u003C/h3>\u003Cul style='margin-top: 8px;margin-bottom: 8px;;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">In the early stages of training, the generated text is\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010107\" data-ratio=\"0.23703703703703705\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770900750.3885157090724545.png\">\u003C/p>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, but still lacks overall logic.\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010106\" data-ratio=\"0.2\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770900760.2548272103576923.png\">\u003C/p>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, with significantly improved accuracy in predicting the next token.\u003C/section>\u003C/li>\u003C/ul>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\";font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">Computing Resources and GPUs\u003C/strong>\u003C/span>\u003C/h3>\u003Cul style='margin-top: 8px;margin-bottom: 8px;;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, which is difficult for personal computers to achieve.\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">Modern AI training relies on cloud-based GPU clusters, for example, Andrej's own replication of GPT-2 used\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010100\" data-ratio=\"0.4675925925925926\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770901010.8735487300800044.png\">\u003C/p>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">\".\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010101\" data-ratio=\"0.4675925925925926\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770901750.6158730316126126.png\">\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cbr>\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-croporisrc=\"https://mmbiz.qpic.cn/sz_mmbiz_png/YdtkbCEBMDG4wLPhVskf1VPNjoiaKVdX210p6f2pp8XDroodAtJDcU2iaXzrbLrgR7SFRu1ANuX79lKABiaEpEXIA/0?wx_fmt=png&amp;from=appmsg\" data-cropx1=\"9.27536231884058\" data-cropx2=\"1280\" data-cropy1=\"0\" data-cropy2=\"962.31884057971\" data-imgfileid=\"100010102\" data-ratio=\"0.7574074074074074\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"width: 548px;height: 415px;\" src=\"https://res.cooltool.vip/article_res/assets/17423770907530.4226726972211483.jpeg\">\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cbr>\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010104\" data-ratio=\"1.0194444444444444\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770906840.4224595279100163.png\">\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cbr>\u003C/p>\u003C/li>\u003C/ul>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 100%;padding: 0px 12px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 100%;display: flex;align-items: center;justify-content: space-between;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;align-items: center;width: 100%;height: 5px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"flex-shrink: 0;width: 6px;height: 6px;background: #4499e7;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr mpa-from-tpl=\"t\">\u003C/section>\u003Csection style=\"width: 100%;border-top: 1px solid #4499e7;height: 1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr mpa-from-tpl=\"t\">\u003C/section>\u003C/section>\u003Csection style=\"flex-shrink: 0;min-width: 79px;text-align: center;background: #4499e7;padding: 5px 15px 4px 15px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cp style=\"color: #ffffff;font-size: 16px;line-height: 17px;word-break: break-word;\" data-mid=\"\">Llama 3.1\u003C/p>\u003C/section>\u003Csection style=\"display: flex;align-items: center;width: 100%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 100%;border-top: 1px solid #4499e7;height: 1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr mpa-from-tpl=\"t\">\u003C/section>\u003Csection style=\"flex-shrink: 0;width: 6px;height: 6px;background: #4499e7;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr mpa-from-tpl=\"t\">\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\";font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">Llama 3.1 Base Model Inference\u003C/strong>\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>because they do not engage in dialogue or execute instructions.\u003C/p>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\";font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">How Base Models Work\u003C/strong>\u003C/span>\u003C/h3>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010108\" data-ratio=\"0.5212962962962963\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770913240.7543267740438659.png\">\u003C/p>\u003Cspan style=\";font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003C/strong>\u003C/span>\u003Cul style='margin-top: 8px;margin-bottom: 8px;;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">that generates token sequences based on the statistical patterns of training data, rather than an interactive assistant.\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">; it merely selects tokens based on probability, similar to\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, but typically requires further fine-tuning for practical applications to become useful assistant models (Assistant Models).\u003C/section>\u003C/li>\u003C/ul>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\";font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">Llama 3.1 Base Model\u003C/strong>\u003C/span>\u003C/h3>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010109\" data-ratio=\"0.40370370370370373\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770900790.7527442999078047.png\">\u003C/p>\u003Cspan style=\";font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003C/strong>\u003C/span>\u003Cul style='margin-top: 8px;margin-bottom: 8px;;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, it is one of the most advanced open-source base models currently available.\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, the scale has significantly increased.\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">far exceeding GPT-2's\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">which can be used as an assistant model.\u003C/section>\u003C/li>\u003C/ul>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\";font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">Base Model Inference Examples\u003C/strong>\u003C/span>\u003C/h3>\u003Ch4 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\";font-size: 18px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">1. Direct Use of Base Model\u003C/strong>\u003C/span>\u003C/h4>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>You can use https://app.hyperbolic.xyz/ to use the Base Model. When using the Base Model, input:\u003C/p>\u003Cpre style='font-size: 16px;font-family: SFMono-Regular, Consolas, \"Liberation Mono\", Menlo, Courier, monospace;margin-top: 10px;margin-bottom: 10px;overflow: auto;;border-radius: 5px;box-shadow: rgba(0, 0, 0, 0.55) 0px 2px 10px;text-align: left;color: rgb(0, 0, 0);letter-spacing: normal;background-color: rgb(255, 255, 255);'>\u003Ccode style=\"font-family: Consolas, Monaco, Menlo, monospace;font-size: 12px;display: -webkit-box;overflow-x: auto;padding: 15px 16px 16px;color: rgb(171, 178, 191);background: rgb(40, 44, 52);;border-radius: 5px;\">\u003Cspan style=\"color: rgb(152, 195, 121);;line-height: 26px;\">\"What is 2 + 2?\"\u003C/span>\u003Cbr style=\";\">\u003C/code>\u003C/pre>\u003Cul style='margin-top: 8px;margin-bottom: 8px;;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">because it is not an assistant model.\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">It only predicts the next most likely token based on the statistical patterns in the training data, possibly outputting random content, such as:\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">rather than truly understanding the question.\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010110\" data-ratio=\"0.46296296296296297\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770908000.19406273296784748.png\">\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cbr>\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010111\" data-ratio=\"0.4361111111111111\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770902570.5690838877921514.png\">\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cbr>\u003C/p>\u003C/li>\u003C/ul>\u003Cp style=\"text-align: left;\">\u003Cspan style='color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>it does not answer your questions like an assistant, this Base Model is still very valuable. Because it has learned a lot of information about the world and stores the knowledge of the web in its parameters, it is a distillation of web information.\u003C/span>\u003C/p>\u003Ch4 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\";font-size: 18px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">2. Generating Knowledge-Based Text\u003C/strong>\u003C/span>\u003C/h4>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>If we input:\u003C/p>\u003Cpre style='font-size: 16px;font-family: SFMono-Regular, Consolas, \"Liberation Mono\", Menlo, Courier, monospace;margin-top: 10px;margin-bottom: 10px;overflow: auto;;border-radius: 5px;box-shadow: rgba(0, 0, 0, 0.55) 0px 2px 10px;text-align: left;color: rgb(0, 0, 0);letter-spacing: normal;background-color: rgb(255, 255, 255);'>\u003Ccode style=\"font-family: Consolas, Monaco, Menlo, monospace;font-size: 12px;display: -webkit-box;overflow-x: auto;padding: 15px 16px 16px;color: rgb(171, 178, 191);background: rgb(40, 44, 52);;border-radius: 5px;\">\u003Cspan style=\"color: rgb(152, 195, 121);;line-height: 26px;\">\"Here is a list of the top 10 landmarks in Paris:\"\u003C/span>\u003Cbr style=\";\">\u003C/code>\u003C/pre>\u003Cul style='margin-top: 8px;margin-bottom: 8px;;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">The Base Model will automatically complete the list and generate possible landmark information.\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, and it is not reliable facts.\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010113\" data-ratio=\"0.4361111111111111\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770916750.09021745979693896.png\">\u003C/p>\u003C/li>\u003C/ul>\u003Ch4 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\";font-size: 18px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">3. Memory and Generalization\u003C/strong>\u003C/span>\u003C/h4>\u003Cul style='margin-top: 8px;margin-bottom: 8px;;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">is called regurgitation of training data. The reason for this phenomenon is that the quality of information on Wikipedia is high, so the model may have seen the article 10 or even 100 times during training, thus memorizing it.\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010114\" data-ratio=\"0.38981481481481484\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770901040.8064321006282431.png\">\u003C/p>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, but it may be incorrect. This phenomenon is called hallucination.\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010116\" data-ratio=\"0.3351851851851852\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770901350.6693269179566057.png\">\u003C/p>\u003C/li>\u003C/ul>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\";font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">How to Turn a Base Model into an Assistant?\u003C/strong>\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>Although the Base Model is not an assistant, it can\u003C/p>\u003Col style='margin-top: 8px;margin-bottom: 8px;;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">\u003Cp style=\";color: rgb(0, 0, 0);line-height: 1.8em;letter-spacing: 0em;text-indent: 0em;padding-top: 8px;padding-bottom: 8px;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">Using few-shot learning, which is\u003C/strong>\u003C/p>\u003C/section>\u003C/li>\u003C/ol>\u003Cul style=\"margin-top: 8px;margin-bottom: 8px;;list-style-type: disc;padding-left: 25px;color: rgb(0, 0, 0);\" class=\"list-paddingleft-1\">\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">For example, providing multiple words and their Korean translations, allowing the model to learn the pattern automatically:\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">The Base Model may correctly complete\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010124\" data-ratio=\"0.4166666666666667\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770900920.8300929975244324.png\">\u003C/p>\u003C/li>\u003C/ul>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">\u003Cp style=\";color: rgb(0, 0, 0);line-height: 1.8em;letter-spacing: 0em;text-indent: 0em;padding-top: 8px;padding-bottom: 8px;\">\u003Cstrong style=\";background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">pretend to be a conversation\u003C/strong>\u003C/p>\u003C/section>\u003C/li>\u003Cul style=\"margin-top: 8px;margin-bottom: 8px;;list-style-type: disc;padding-left: 25px;color: rgb(0, 0, 0);\" class=\"list-paddingleft-1\">\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">, guide the Base Model to act as an assistant:\u003C/section>\u003C/li>\u003Cli style=\";\">\u003Csection style=\";margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">In this way, the Base Model will continue in this format, appearing to provide answers.\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010117\" data-ratio=\"0.39814814814814814\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770900800.8386829581250799.png\">\u003C/p>\u003C/li>\u003Cli style=\";\">\u003Cp style=\"text-align: left;\">\u003Cspan style='color: rgb(1, 1, 1);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>However, sometimes in addition to answering, the Base Model will also\u003C/span>\u003Cspan style='color: rgb(1, 1, 1);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>hallucinate the next human question\u003C/span>\u003Cbr>\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010118\" data-ratio=\"0.7638888888888888\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"https://res.cooltool.vip/article_res/assets/17423770920700.6034130214154587.png\">\u003C/p>\u003C/li>\u003C/ul>\u003Cp style=\"display: none;\">\u003Cmp-style-type data-value=\"3\">\u003C/mp-style-type>\u003C/p>\u003C/div>",1752585428230]