<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"><channel><title>NLP Arxiv Daily</title><description>Daily-refreshed NLP arxiv paper digest</description><link>https://monologg.kr/nlp-arxiv-daily/</link><item><title>The Scientific Contribution Graph: Automated Literature-based Technological Roadmapping at Scale</title><link>http://arxiv.org/abs/2605.15011v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15011v1</guid><description>Peter A. Jansen et al. — arxiv:2605.15011 — NLP</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Conversion of Lexicon-Grammar tables to LMF. Application to French</title><link>http://arxiv.org/abs/2605.14816v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14816v1</guid><description>Eric Laporte et al. — arxiv:2605.14816 — NLP</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Graphs of Research: Citation Evolution Graphs as Supervision for Research Idea Generation</title><link>http://arxiv.org/abs/2605.14790v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14790v1</guid><description>Songyang Gao et al. — arxiv:2605.14790 — NLP</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Are Candidate Models Really Needed for Active Learning?</title><link>http://arxiv.org/abs/2605.14689v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14689v1</guid><description>Harshini Mridula Mohan et al. — arxiv:2605.14689 — NLP</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>SciPaths: Forecasting Pathways to Scientific Discovery</title><link>http://arxiv.org/abs/2605.14600v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14600v1</guid><description>Eric Chamoun et al. — arxiv:2605.14600 — NLP</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>A Formative Study of Brief Affective Text as a Complement to Wearable Sensing for Longitudinal Student Health Monitoring</title><link>http://arxiv.org/abs/2605.14360v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14360v1</guid><description>Tamunotonye Harry et al. — arxiv:2605.14360 — NLP</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>MetaMoE: Diversity-Aware Proxy Selection for Privacy-Preserving Mixture-of-Experts Unification</title><link>http://arxiv.org/abs/2605.14289v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14289v1</guid><description>Weisen Jiang et al. — arxiv:2605.14289 — NLP</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>What Makes Words Hard? Sakura at BEA 2026 Shared Task on Vocabulary Difficulty Prediction</title><link>http://arxiv.org/abs/2605.14257v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14257v1</guid><description>Adam Nohejl et al. — arxiv:2605.14257 — NLP</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Articraft: An Agentic System for Scalable Articulated 3D Asset Generation</title><link>http://arxiv.org/abs/2605.15187v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15187v1</guid><description>Matt Zhou et al. — arxiv:2605.15187 — LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Is Grep All You Need? How Agent Harnesses Reshape Agentic Search</title><link>http://arxiv.org/abs/2605.15184v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15184v1</guid><description>Sahil Sen et al. — arxiv:2605.15184 — LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>OpenDeepThink: Parallel Reasoning via Bradley--Terry Aggregation</title><link>http://arxiv.org/abs/2605.15177v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15177v1</guid><description>Shang Zhou et al. — arxiv:2605.15177 — LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>MetaBackdoor: Exploiting Positional Encoding as a Backdoor Attack Surface in LLMs</title><link>http://arxiv.org/abs/2605.15172v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15172v1</guid><description>Rui Wen et al. — arxiv:2605.15172 — LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Text Knows What, Tables Know When: Clinical Timeline Reconstruction via Retrieval-Augmented Multimodal Alignment</title><link>http://arxiv.org/abs/2605.15168v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15168v1</guid><description>Sayantan Kumar et al. — arxiv:2605.15168 — LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>MeMo: Memory as a Model</title><link>http://arxiv.org/abs/2605.15156v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15156v1</guid><description>Ryan Wei Heng Quek et al. — arxiv:2605.15156 — LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Self-Distilled Agentic Reinforcement Learning</title><link>http://arxiv.org/abs/2605.15155v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15155v1</guid><description>Zhengxi Lu et al. — arxiv:2605.15155 — LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Widening the Gap: Exploiting LLM Quantization via Outlier Injection</title><link>http://arxiv.org/abs/2605.15152v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15152v1</guid><description>Xiaohua Zhan et al. — arxiv:2605.15152 — LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>APWA: A Distributed Architecture for Parallelizable Agentic Workflows</title><link>http://arxiv.org/abs/2605.15132v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15132v1</guid><description>Evan Rose et al. — arxiv:2605.15132 — LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Talk is (Not) Cheap: A Taxonomy and Benchmark Coverage Audit for LLM Attacks</title><link>http://arxiv.org/abs/2605.15118v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15118v1</guid><description>Karthik Raghu Iyer et al. — arxiv:2605.15118 — LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>ATLAS: Agentic or Latent Visual Reasoning? One Word is Enough for Both</title><link>http://arxiv.org/abs/2605.15198v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15198v1</guid><description>Ziyu Guo et al. — arxiv:2605.15198 — LLM Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>FutureSim: Replaying World Events to Evaluate Adaptive Agents</title><link>http://arxiv.org/abs/2605.15188v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15188v1</guid><description>Shashwat Goel et al. — arxiv:2605.15188 — LLM Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Articraft: An Agentic System for Scalable Articulated 3D Asset Generation</title><link>http://arxiv.org/abs/2605.15187v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15187v1</guid><description>Matt Zhou et al. — arxiv:2605.15187 — LLM Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Is Grep All You Need? How Agent Harnesses Reshape Agentic Search</title><link>http://arxiv.org/abs/2605.15184v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15184v1</guid><description>Sahil Sen et al. — arxiv:2605.15184 — LLM Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>From Plans to Pixels: Learning to Plan and Orchestrate for Open-Ended Image Editing</title><link>http://arxiv.org/abs/2605.15181v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15181v1</guid><description>Anirudh Sundara Rajan et al. — arxiv:2605.15181 — LLM Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Position: Behavioural Assurance Cannot Verify the Safety Claims Governance Now Demands</title><link>http://arxiv.org/abs/2605.15164v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15164v1</guid><description>Pratinav Seth et al. — arxiv:2605.15164 — LLM Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Self-Distilled Agentic Reinforcement Learning</title><link>http://arxiv.org/abs/2605.15155v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15155v1</guid><description>Zhengxi Lu et al. — arxiv:2605.15155 — LLM Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Guises and Perspectives: An Intentional and Hyperintensional Sketch</title><link>http://arxiv.org/abs/2605.15144v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15144v1</guid><description>Juan J. Colomina-Alminana et al. — arxiv:2605.15144 — LLM Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>APWA: A Distributed Architecture for Parallelizable Agentic Workflows</title><link>http://arxiv.org/abs/2605.15132v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15132v1</guid><description>Evan Rose et al. — arxiv:2605.15132 — LLM Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>MemEye: A Visual-Centric Evaluation Framework for Multimodal Agent Memory</title><link>http://arxiv.org/abs/2605.15128v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15128v1</guid><description>Minghao Guo et al. — arxiv:2605.15128 — LLM Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>From Plans to Pixels: Learning to Plan and Orchestrate for Open-Ended Image Editing</title><link>http://arxiv.org/abs/2605.15181v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15181v1</guid><description>Anirudh Sundara Rajan et al. — arxiv:2605.15181 — Multi-Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Self-Distilled Agentic Reinforcement Learning</title><link>http://arxiv.org/abs/2605.15155v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15155v1</guid><description>Zhengxi Lu et al. — arxiv:2605.15155 — Multi-Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>APWA: A Distributed Architecture for Parallelizable Agentic Workflows</title><link>http://arxiv.org/abs/2605.15132v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15132v1</guid><description>Evan Rose et al. — arxiv:2605.15132 — Multi-Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Veritas: A Semantically Grounded Agentic Framework for Memory Corruption Vulnerability Detection in Binaries</title><link>http://arxiv.org/abs/2605.15097v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15097v1</guid><description>Xinran Zheng et al. — arxiv:2605.15097 — Multi-Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Orchard: An Open-Source Agentic Modeling Framework</title><link>http://arxiv.org/abs/2605.15040v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15040v1</guid><description>Baolin Peng et al. — arxiv:2605.15040 — Multi-Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>AI Knows When It&apos;s Being Watched: Functional Strategic Action and Contextual Register Modulation in Large Language Models</title><link>http://arxiv.org/abs/2605.15034v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15034v1</guid><description>Vinicius Covas et al. — arxiv:2605.15034 — Multi-Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Multi-Agentic Approach for History Matching of Oil Reservoirs</title><link>http://arxiv.org/abs/2605.15028v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15028v1</guid><description>Linar Samigullin et al. — arxiv:2605.15028 — Multi-Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>COTCAgent: Preventive Consultation via Probabilistic Chain-of-Thought Completion</title><link>http://arxiv.org/abs/2605.15016v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15016v1</guid><description>Zihan Deng et al. — arxiv:2605.15016 — Multi-Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>GraphFlow: An Architecture for Formally Verifiable Visual Workflows Enabling Reliable Agentic AI Automation</title><link>http://arxiv.org/abs/2605.14968v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14968v1</guid><description>Drewry H. Morris et al. — arxiv:2605.14968 — Multi-Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Chrono-Gymnasium: An Open-Source, Gymnasium-Compatible Distributed Simulation Framework</title><link>http://arxiv.org/abs/2605.14911v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14911v1</guid><description>Bocheng Zou et al. — arxiv:2605.14911 — Multi-Agent</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Is Grep All You Need? How Agent Harnesses Reshape Agentic Search</title><link>http://arxiv.org/abs/2605.15184v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15184v1</guid><description>Sahil Sen et al. — arxiv:2605.15184 — RAG</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Why Neighborhoods Matter: Traversal Context and Provenance in Agentic GraphRAG</title><link>http://arxiv.org/abs/2605.15109v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15109v1</guid><description>Riccardo Terrenzi et al. — arxiv:2605.15109 — RAG</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>From Scenes to Elements: Multi-Granularity Evidence Retrieval for Verifiable Multimodal RAG</title><link>http://arxiv.org/abs/2605.15019v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15019v1</guid><description>Guanhua Chen et al. — arxiv:2605.15019 — RAG</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Emotion-Attended Stateful Memory (EASM):The Architecture for Hyper-Personalization at Scale</title><link>http://arxiv.org/abs/2605.14833v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14833v1</guid><description>Vineet Kotecha et al. — arxiv:2605.14833 — RAG</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>AI-assisted cultural heritage dissemination: Comparing NMT and glossary-augmented LLM translation in rock art documents</title><link>http://arxiv.org/abs/2605.14679v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14679v1</guid><description>Vicent Briva-Iglesias et al. — arxiv:2605.14679 — RAG</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Falkor-IRAC: Graph-Constrained Generation for Verified Legal Reasoning in Indian Judicial AI</title><link>http://arxiv.org/abs/2605.14665v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14665v1</guid><description>Joy Bose et al. — arxiv:2605.14665 — RAG</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>A Picture is Worth a Thousand Words? An Empirical Study of Aggregation Strategies for Visual Financial Document Retrieval</title><link>http://arxiv.org/abs/2605.14581v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14581v1</guid><description>Ho Hung Lim et al. — arxiv:2605.14581 — RAG</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Not All RAGs Are Created Equal: A Component-Wise Empirical Study for Software Engineering Tasks</title><link>http://arxiv.org/abs/2605.14503v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14503v1</guid><description>Qiang Ke et al. — arxiv:2605.14503 — RAG</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Deepchecks: Evaluating Retrieval-Augmented Generation (RAG)</title><link>http://arxiv.org/abs/2605.14488v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14488v1</guid><description>Assaf Gerner et al. — arxiv:2605.14488 — RAG</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>When Retrieval Hurts Code Completion: A Diagnostic Study of Stale Repository Context</title><link>http://arxiv.org/abs/2605.14478v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14478v1</guid><description>Haojun Weng et al. — arxiv:2605.14478 — RAG</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>OpenDeepThink: Parallel Reasoning via Bradley--Terry Aggregation</title><link>http://arxiv.org/abs/2605.15177v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15177v1</guid><description>Shang Zhou et al. — arxiv:2605.15177 — Reasoning</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Pelican-Unified 1.0: A Unified Embodied Intelligence Model for Understanding, Reasoning, Imagination and Action</title><link>http://arxiv.org/abs/2605.15153v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15153v1</guid><description>Yi Zhang et al. — arxiv:2605.15153 — Reasoning</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Natural Synthesis: Outperforming Reactive Synthesis Tools with Large Reasoning Models</title><link>http://arxiv.org/abs/2605.15131v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15131v1</guid><description>Frederik Schmitt et al. — arxiv:2605.15131 — Reasoning</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>COTCAgent: Preventive Consultation via Probabilistic Chain-of-Thought Completion</title><link>http://arxiv.org/abs/2605.15016v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15016v1</guid><description>Zihan Deng et al. — arxiv:2605.15016 — Reasoning</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Boosting Reinforcement Learning with Verifiable Rewards via Randomly Selected Few-Shot Guidance</title><link>http://arxiv.org/abs/2605.15012v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15012v1</guid><description>Kai Yan et al. — arxiv:2605.15012 — Reasoning</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>InfoSFT: Learn More and Forget Less with Information-Aware Token Weighting</title><link>http://arxiv.org/abs/2605.14967v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14967v1</guid><description>Mahdi Sabbaghi et al. — arxiv:2605.14967 — Reasoning</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>SteerSeg: Attention Steering for Reasoning Video Segmentation</title><link>http://arxiv.org/abs/2605.14908v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14908v1</guid><description>Ali Cheraghian et al. — arxiv:2605.14908 — Reasoning</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Exploring Vision-Language Models for Online Signature Verification: A Zero-Shot Capability Study</title><link>http://arxiv.org/abs/2605.14845v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14845v1</guid><description>Marta Robledo-Moreno et al. — arxiv:2605.14845 — Reasoning</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>COAL: Counterfactual and Observation-Enhanced Alignment Learning for Discriminative Referring Multi-Object Tracking</title><link>http://arxiv.org/abs/2605.14795v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14795v1</guid><description>Shukun Jia et al. — arxiv:2605.14795 — Reasoning</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Video-Zero: Self-Evolution Video Understanding</title><link>http://arxiv.org/abs/2605.14733v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14733v1</guid><description>Ruixu Zhang et al. — arxiv:2605.14733 — Reasoning</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Hand-in-the-Loop: Improving Dexterous VLA via Seamless Interventional Correction</title><link>http://arxiv.org/abs/2605.15157v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15157v1</guid><description>Zhuohang Li et al. — arxiv:2605.15157 — Tool Use</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Understanding How International Students in the U.S. Are Using Conversational AI to Support Cross-Cultural Adaptation</title><link>http://arxiv.org/abs/2605.15127v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15127v1</guid><description>Laleh Nourian et al. — arxiv:2605.15127 — Tool Use</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>From Text to Voice: A Reproducible and Verifiable Framework for Evaluating Tool Calling LLM Agents</title><link>http://arxiv.org/abs/2605.15104v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15104v1</guid><description>Md Tahmid Rahman Laskar et al. — arxiv:2605.15104 — Tool Use</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Concurrency without Model Changes: Future-based Asynchronous Function Calling for LLMs</title><link>http://arxiv.org/abs/2605.15077v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15077v1</guid><description>Guangyu Feng et al. — arxiv:2605.15077 — Tool Use</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Case-Based Calibration of Adaptive Reasoning and Execution for LLM Tool Use</title><link>http://arxiv.org/abs/2605.15041v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15041v1</guid><description>Renning Pang et al. — arxiv:2605.15041 — Tool Use</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Orchard: An Open-Source Agentic Modeling Framework</title><link>http://arxiv.org/abs/2605.15040v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15040v1</guid><description>Baolin Peng et al. — arxiv:2605.15040 — Tool Use</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Toward Securing AI Agents Like Operating Systems</title><link>http://arxiv.org/abs/2605.14932v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14932v1</guid><description>Lukas Pirch et al. — arxiv:2605.14932 — Tool Use</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Beyond Individual Intelligence: Surveying Collaboration, Failure Attribution, and Self-Evolution in LLM-based Multi-Agent Systems</title><link>http://arxiv.org/abs/2605.14892v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14892v1</guid><description>Shihao Qi et al. — arxiv:2605.14892 — Tool Use</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>SmartWalkCoach: An AI Companion for End-to-End Walking Guidance, Motivation, and Reflection</title><link>http://arxiv.org/abs/2605.14628v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14628v1</guid><description>Xianzhe Zhang et al. — arxiv:2605.14628 — Tool Use</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Prompting Policies for Multi-step Reasoning and Tool-Use in Black-box LLMs with Iterative Distillation of Experience</title><link>http://arxiv.org/abs/2605.14443v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14443v1</guid><description>Krishna Sayana et al. — arxiv:2605.14443 — Tool Use</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Does Synthetic Layered Design Data Benefit Layered Design Decomposition?</title><link>http://arxiv.org/abs/2605.15167v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15167v1</guid><description>Kam Man Wu et al. — arxiv:2605.15167 — Multimodal LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Pelican-Unified 1.0: A Unified Embodied Intelligence Model for Understanding, Reasoning, Imagination and Action</title><link>http://arxiv.org/abs/2605.15153v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15153v1</guid><description>Yi Zhang et al. — arxiv:2605.15153 — Multimodal LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>MemEye: A Visual-Centric Evaluation Framework for Multimodal Agent Memory</title><link>http://arxiv.org/abs/2605.15128v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15128v1</guid><description>Minghao Guo et al. — arxiv:2605.15128 — Multimodal LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>On the Cultural Anachronism and Temporal Reasoning in Vision Language Models</title><link>http://arxiv.org/abs/2605.15071v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15071v1</guid><description>Mukul Ranjan et al. — arxiv:2605.15071 — Multimodal LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>LATERN: Test-Time Context-Aware Explainable Video Anomaly Detection</title><link>http://arxiv.org/abs/2605.15054v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15054v1</guid><description>Mitchell Piehl et al. — arxiv:2605.15054 — Multimodal LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Compositional Video Generation via Inference-Time Guidance</title><link>http://arxiv.org/abs/2605.14988v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14988v1</guid><description>Ariel Shaulov et al. — arxiv:2605.14988 — Multimodal LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>MHSA: A Lightweight Framework for Mitigating Hallucinations via Steered Attention in LVLMs</title><link>http://arxiv.org/abs/2605.14966v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14966v1</guid><description>Wei Ding et al. — arxiv:2605.14966 — Multimodal LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Octopus: History-Free Gradient Orthogonalization for Continual Learning in Multimodal Large Language Models</title><link>http://arxiv.org/abs/2605.14938v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14938v1</guid><description>Yuehao Liu et al. — arxiv:2605.14938 — Multimodal LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Chain-of-Procedure: Hierarchical Visual-Language Reasoning for Procedural QA</title><link>http://arxiv.org/abs/2605.14928v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14928v1</guid><description>Guanhua Chen et al. — arxiv:2605.14928 — Multimodal LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>SceneParser: Hierarchical Scene Parsing for Visual Semantics Understanding</title><link>http://arxiv.org/abs/2605.14923v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14923v1</guid><description>Pengxin Xu et al. — arxiv:2605.14923 — Multimodal LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>SANA-WM: Efficient Minute-Scale World Modeling with Hybrid Linear Diffusion Transformer</title><link>http://arxiv.org/abs/2605.15178v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15178v1</guid><description>Haoyi Zhu et al. — arxiv:2605.15178 — Long Context</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Self-Distilled Agentic Reinforcement Learning</title><link>http://arxiv.org/abs/2605.15155v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15155v1</guid><description>Zhengxi Lu et al. — arxiv:2605.15155 — Long Context</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Significant or Not? The Impact of Randomisation During Data Reduction on Confirming a New Pulsating Ultraluminous X-ray Source Candidate in Centaurus A</title><link>http://arxiv.org/abs/2605.15137v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15137v1</guid><description>Amy H. Knight et al. — arxiv:2605.15137 — Long Context</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Improving Multi-turn Dialogue Consistency with Self-Recall Thinking</title><link>http://arxiv.org/abs/2605.15102v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15102v1</guid><description>Renning Pang et al. — arxiv:2605.15102 — Long Context</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Sophie Germain, mathématicienne extraordinaire: A story stranger than fiction</title><link>http://arxiv.org/abs/2605.15046v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15046v1</guid><description>David Pengelley et al. — arxiv:2605.15046 — Long Context</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>EverAnimate: Minute-Scale Human Animation via Latent Flow Restoration</title><link>http://arxiv.org/abs/2605.15042v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15042v1</guid><description>Wuyang Li et al. — arxiv:2605.15042 — Long Context</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>SemaTune: Semantic-Aware Online OS Tuning with Large Language Models</title><link>http://arxiv.org/abs/2605.15026v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15026v1</guid><description>Georgios Liargkovas et al. — arxiv:2605.15026 — Long Context</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Imaging without visibilities: FAST-Effelsberg scintillometry of PSR B1508+55</title><link>http://arxiv.org/abs/2605.15004v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15004v1</guid><description>Tim Sprenger et al. — arxiv:2605.15004 — Long Context</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>MemLens: Benchmarking Multimodal Long-Term Memory in Large Vision-Language Models</title><link>http://arxiv.org/abs/2605.14906v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14906v1</guid><description>Xiyu Ren et al. — arxiv:2605.14906 — Long Context</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>SurgicalMamba: Dual-Path SSD with State Regramming for Online Surgical Phase Recognition</title><link>http://arxiv.org/abs/2605.14889v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14889v1</guid><description>Sukju Oh et al. — arxiv:2605.14889 — Long Context</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Warp-as-History: Generalizable Camera-Controlled Video Generation from One Training Video</title><link>http://arxiv.org/abs/2605.15182v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15182v1</guid><description>Yifan Wang et al. — arxiv:2605.15182 — LLM Efficiency</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>SANA-WM: Efficient Minute-Scale World Modeling with Hybrid Linear Diffusion Transformer</title><link>http://arxiv.org/abs/2605.15178v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15178v1</guid><description>Haoyi Zhu et al. — arxiv:2605.15178 — LLM Efficiency</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Widening the Gap: Exploiting LLM Quantization via Outlier Injection</title><link>http://arxiv.org/abs/2605.15152v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15152v1</guid><description>Xiaohua Zhan et al. — arxiv:2605.15152 — LLM Efficiency</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Extensive long-range magic in non-Abelian topological orders</title><link>http://arxiv.org/abs/2605.15150v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15150v1</guid><description>Yuzhen Zhang et al. — arxiv:2605.15150 — LLM Efficiency</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Forgetting That Sticks: Quantization-Permanent Unlearning via Circuit Attribution</title><link>http://arxiv.org/abs/2605.15138v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15138v1</guid><description>Saisab Sadhu et al. — arxiv:2605.15138 — LLM Efficiency</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>EverAnimate: Minute-Scale Human Animation via Latent Flow Restoration</title><link>http://arxiv.org/abs/2605.15042v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15042v1</guid><description>Wuyang Li et al. — arxiv:2605.15042 — LLM Efficiency</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Impurity-induced geometric correlations and fractional quantization in quantum Hall systems</title><link>http://arxiv.org/abs/2605.15022v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15022v1</guid><description>M. A. Hidalgo et al. — arxiv:2605.15022 — LLM Efficiency</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>ACE-LoRA: Adaptive Orthogonal Decoupling for Continual Image Editing</title><link>http://arxiv.org/abs/2605.14948v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14948v1</guid><description>Yuehao Liu et al. — arxiv:2605.14948 — LLM Efficiency</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Not All Symbols Are Equal: Importance-Aware Constellation Design for Semantic Communication</title><link>http://arxiv.org/abs/2605.14940v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14940v1</guid><description>Albert Shaju et al. — arxiv:2605.14940 — LLM Efficiency</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>A Hardware-Aware, Per-Layer Methodology for Post-Training Quantization of Large Language Models</title><link>http://arxiv.org/abs/2605.14929v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14929v1</guid><description>Earl Killian et al. — arxiv:2605.14929 — LLM Efficiency</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>From Sycophantic Consensus to Pluralistic Repair: Why AI Alignment Must Surface Disagreement</title><link>http://arxiv.org/abs/2605.14912v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14912v1</guid><description>Varad Vishwarupe et al. — arxiv:2605.14912 — Alignment</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Hierarchical Image Tokenization for Multi-Scale Image Super Resolution</title><link>http://arxiv.org/abs/2605.14891v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14891v1</guid><description>Isma Hadji et al. — arxiv:2605.14891 — Alignment</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>When Are Two Networks the Same? Tensor Similarity for Mechanistic Interpretability</title><link>http://arxiv.org/abs/2605.15183v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15183v1</guid><description>ML Nissen Gonzalez et al. — arxiv:2605.15183 — Hallucination</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Text Knows What, Tables Know When: Clinical Timeline Reconstruction via Retrieval-Augmented Multimodal Alignment</title><link>http://arxiv.org/abs/2605.15168v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15168v1</guid><description>Sayantan Kumar et al. — arxiv:2605.15168 — Hallucination</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Why Neighborhoods Matter: Traversal Context and Provenance in Agentic GraphRAG</title><link>http://arxiv.org/abs/2605.15109v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15109v1</guid><description>Riccardo Terrenzi et al. — arxiv:2605.15109 — Hallucination</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Dual-Dimensional Consistency: Balancing Budget and Quality in Adaptive Inference-Time Scaling</title><link>http://arxiv.org/abs/2605.15100v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15100v1</guid><description>Rongman Xu et al. — arxiv:2605.15100 — Hallucination</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Case-Based Calibration of Adaptive Reasoning and Execution for LLM Tool Use</title><link>http://arxiv.org/abs/2605.15041v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15041v1</guid><description>Renning Pang et al. — arxiv:2605.15041 — Hallucination</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>COTCAgent: Preventive Consultation via Probabilistic Chain-of-Thought Completion</title><link>http://arxiv.org/abs/2605.15016v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15016v1</guid><description>Zihan Deng et al. — arxiv:2605.15016 — Hallucination</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Kadison&apos;s problem for trace-vector orthonormal bases in $\mathrm{II}_1$ factors with separable predual</title><link>http://arxiv.org/abs/2605.15006v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15006v1</guid><description>Yixin He et al. — arxiv:2605.15006 — Hallucination</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Compositional Video Generation via Inference-Time Guidance</title><link>http://arxiv.org/abs/2605.14988v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14988v1</guid><description>Ariel Shaulov et al. — arxiv:2605.14988 — Hallucination</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>MHSA: A Lightweight Framework for Mitigating Hallucinations via Steered Attention in LVLMs</title><link>http://arxiv.org/abs/2605.14966v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14966v1</guid><description>Wei Ding et al. — arxiv:2605.14966 — Hallucination</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Unlocking Complex Visual Generation via Closed-Loop Verified Reasoning</title><link>http://arxiv.org/abs/2605.14876v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14876v1</guid><description>Hanbo Cheng et al. — arxiv:2605.14876 — Hallucination</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Position: Behavioural Assurance Cannot Verify the Safety Claims Governance Now Demands</title><link>http://arxiv.org/abs/2605.15164v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15164v1</guid><description>Pratinav Seth et al. — arxiv:2605.15164 — LLM Safety</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>WARD: Adversarially Robust Defense of Web Agents Against Prompt Injections</title><link>http://arxiv.org/abs/2605.15030v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15030v1</guid><description>Tri Cao et al. — arxiv:2605.15030 — LLM Safety</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Fast Adversarial Attacks with Gradient Prediction</title><link>http://arxiv.org/abs/2605.14868v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14868v1</guid><description>Kamil Ciosek et al. — arxiv:2605.14868 — LLM Safety</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>EVA: Editing for Versatile Alignment against Jailbreaks</title><link>http://arxiv.org/abs/2605.14750v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14750v1</guid><description>Yi Wang et al. — arxiv:2605.14750 — LLM Safety</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>The Great Pretender: A Stochasticity Problem in LLM Jailbreak</title><link>http://arxiv.org/abs/2605.14418v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14418v1</guid><description>Jean-Philippe Monteuuis et al. — arxiv:2605.14418 — LLM Safety</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Guided Diffusion Sampling for Precipitation Forecast Interventions</title><link>http://arxiv.org/abs/2605.14317v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14317v1</guid><description>Ayumu Ueyama et al. — arxiv:2605.14317 — LLM Safety</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>OpenDeepThink: Parallel Reasoning via Bradley--Terry Aggregation</title><link>http://arxiv.org/abs/2605.15177v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15177v1</guid><description>Shang Zhou et al. — arxiv:2605.15177 — LLM Evaluation</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>From Text to Voice: A Reproducible and Verifiable Framework for Evaluating Tool Calling LLM Agents</title><link>http://arxiv.org/abs/2605.15104v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15104v1</guid><description>Md Tahmid Rahman Laskar et al. — arxiv:2605.15104 — LLM Evaluation</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Small, Private Language Models as Teammates for Educational Assessment Design</title><link>http://arxiv.org/abs/2605.15015v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15015v1</guid><description>Chris Davis Jaldi et al. — arxiv:2605.15015 — LLM Evaluation</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Graphs of Research: Citation Evolution Graphs as Supervision for Research Idea Generation</title><link>http://arxiv.org/abs/2605.14790v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14790v1</guid><description>Songyang Gao et al. — arxiv:2605.14790 — LLM Evaluation</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Teaching Large Language Models When Not to Know: Learning Temporal Critique for Ex-Ante Reasoning</title><link>http://arxiv.org/abs/2605.14636v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14636v1</guid><description>Chenlu Ding et al. — arxiv:2605.14636 — LLM Evaluation</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>MultiEmo-Bench: Multi-label Visual Emotion Analysis for Multi-modal Large Language Models</title><link>http://arxiv.org/abs/2605.14635v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14635v1</guid><description>Tianwei Chen et al. — arxiv:2605.14635 — LLM Evaluation</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Sycophancy is an Educational Safety Risk: Why LLM Tutors Need Sycophancy Benchmarks</title><link>http://arxiv.org/abs/2605.14604v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14604v1</guid><description>Enkelejda Kasneci et al. — arxiv:2605.14604 — LLM Evaluation</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Mining Subscenario Refactoring Opportunities in Behaviour-Driven Software Test Suites: ML Classifiers and LLM-Judge Baselines</title><link>http://arxiv.org/abs/2605.14568v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14568v1</guid><description>Ali Hassaan Mughal et al. — arxiv:2605.14568 — LLM Evaluation</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>The Great Pretender: A Stochasticity Problem in LLM Jailbreak</title><link>http://arxiv.org/abs/2605.14418v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14418v1</guid><description>Jean-Philippe Monteuuis et al. — arxiv:2605.14418 — LLM Evaluation</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Latency-Quality Routing for Functionally Equivalent Tools in LLM Agents</title><link>http://arxiv.org/abs/2605.14241v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14241v1</guid><description>Kexin Chu et al. — arxiv:2605.14241 — LLM Evaluation</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Learning from Language Feedback via Variational Policy Distillation</title><link>http://arxiv.org/abs/2605.15113v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15113v1</guid><description>Yang Li et al. — arxiv:2605.15113 — Code LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Adapting AlphaEvolve to Optimize Fully Homomorphic Encryption on TPUs</title><link>http://arxiv.org/abs/2605.14718v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14718v1</guid><description>Shruthi Gorantala et al. — arxiv:2605.14718 — Code LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Learning from Failures: Correction-Oriented Policy Optimization with Verifiable Rewards</title><link>http://arxiv.org/abs/2605.14539v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14539v1</guid><description>Mengjie Ren et al. — arxiv:2605.14539 — Code LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Not All RAGs Are Created Equal: A Component-Wise Empirical Study for Software Engineering Tasks</title><link>http://arxiv.org/abs/2605.14503v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14503v1</guid><description>Qiang Ke et al. — arxiv:2605.14503 — Code LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>When Retrieval Hurts Code Completion: A Diagnostic Study of Stale Repository Context</title><link>http://arxiv.org/abs/2605.14478v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14478v1</guid><description>Haojun Weng et al. — arxiv:2605.14478 — Code LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Test-Time Learning with an Evolving Library</title><link>http://arxiv.org/abs/2605.14477v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14477v1</guid><description>Weijia Xu et al. — arxiv:2605.14477 — Code LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>FuzzAgent: Multi-Agent System for Evolutionary Library Fuzzing</title><link>http://arxiv.org/abs/2605.14431v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14431v1</guid><description>Yunlong Lyu et al. — arxiv:2605.14431 — Code LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Coding Agent Is Good As World Simulator</title><link>http://arxiv.org/abs/2605.14398v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14398v1</guid><description>Hongyu Wang et al. — arxiv:2605.14398 — Code LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>GenCircuit-RL: Reinforcement Learning from Hierarchical Verification for Genetic Circuit Design</title><link>http://arxiv.org/abs/2605.14215v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14215v1</guid><description>Noah Flynn et al. — arxiv:2605.14215 — Code LLM</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Position: Behavioural Assurance Cannot Verify the Safety Claims Governance Now Demands</title><link>http://arxiv.org/abs/2605.15164v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15164v1</guid><description>Pratinav Seth et al. — arxiv:2605.15164 — Legal NLP</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Legal NLP</category></item><item><title>Tokenizer Fertility and Zero-Shot Performance of Foundation Models on Ukrainian Legal Text: A Comparative Study</title><link>http://arxiv.org/abs/2605.14890v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14890v1</guid><description>Volodymyr Ovcharov et al. — arxiv:2605.14890 — Legal NLP</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Legal NLP</category></item><item><title>Quantifying and Mitigating Premature Closure in Frontier LLMs</title><link>http://arxiv.org/abs/2605.15000v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15000v1</guid><description>Rebecca Handler et al. — arxiv:2605.15000 — Medical NLP</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Medical NLP</category></item><item><title>ML-Embed: Inclusive and Efficient Embeddings for a Multilingual World</title><link>http://arxiv.org/abs/2605.15081v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15081v1</guid><description>Ziyin Zhang et al. — arxiv:2605.15081 — Multilingual NLP</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>StyleTextGen: Style-Conditioned Multilingual Scene Text Generation</title><link>http://arxiv.org/abs/2605.14708v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14708v1</guid><description>Zeyu Chen et al. — arxiv:2605.14708 — Multilingual NLP</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Reinforcement Learning with Semantic Rewards Enables Low-Resource Language Expansion without Alignment Tax</title><link>http://arxiv.org/abs/2605.14366v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14366v1</guid><description>Zeli Su et al. — arxiv:2605.14366 — Multilingual NLP</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>A Climate-Constrained Bayesian Inverse Method for JWST Rocky Exoplanet Eclipse Spectra: A Case Study of LTT 1445A b</title><link>http://arxiv.org/abs/2605.14997v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14997v1</guid><description>Nicholas Wogan et al. — arxiv:2605.14997 — Information Extraction</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>MemLens: Benchmarking Multimodal Long-Term Memory in Large Vision-Language Models</title><link>http://arxiv.org/abs/2605.14906v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14906v1</guid><description>Xiyu Ren et al. — arxiv:2605.14906 — Information Extraction</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>DT-Transformer: A Foundation Model for Disease Trajectory Prediction on a Real-world Health System</title><link>http://arxiv.org/abs/2605.14227v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14227v1</guid><description>Yunying Zhu et al. — arxiv:2605.14227 — Text Classification</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>Self-Distilled Agentic Reinforcement Learning</title><link>http://arxiv.org/abs/2605.15155v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15155v1</guid><description>Zhengxi Lu et al. — arxiv:2605.15155 — Question Answering</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Quantifying and Mitigating Premature Closure in Frontier LLMs</title><link>http://arxiv.org/abs/2605.15000v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15000v1</guid><description>Rebecca Handler et al. — arxiv:2605.15000 — Question Answering</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Chain-of-Procedure: Hierarchical Visual-Language Reasoning for Procedural QA</title><link>http://arxiv.org/abs/2605.14928v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14928v1</guid><description>Guanhua Chen et al. — arxiv:2605.14928 — Question Answering</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>COREKG: Coreset-Guided Personalized Summarization of Knowledge Graphs</title><link>http://arxiv.org/abs/2605.14900v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14900v1</guid><description>Sohel Aman Khan et al. — arxiv:2605.14900 — Question Answering</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>A Heterogeneous Temporal Memory Governance Framework for Long-Term LLM Persona Consistency</title><link>http://arxiv.org/abs/2605.14802v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14802v1</guid><description>Zhao Yang et al. — arxiv:2605.14802 — Question Answering</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Video-Zero: Self-Evolution Video Understanding</title><link>http://arxiv.org/abs/2605.14733v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14733v1</guid><description>Ruixu Zhang et al. — arxiv:2605.14733 — Question Answering</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>From Table to Cell: Attention for Better Reasoning with TABALIGN</title><link>http://arxiv.org/abs/2605.14465v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14465v1</guid><description>Tung Sum Thomas Kwok et al. — arxiv:2605.14465 — Question Answering</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>When Answers Stray from Questions: Hallucination Detection via Question-Answer Orthogonal Decomposition</title><link>http://arxiv.org/abs/2605.14449v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14449v1</guid><description>Siyang Yao et al. — arxiv:2605.14449 — Question Answering</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Uncovering the Representation Geometry of Minimal Cores in Overcomplete Reasoning Traces</title><link>http://arxiv.org/abs/2605.14358v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14358v1</guid><description>Sanjoy Chowdhury et al. — arxiv:2605.14358 — Question Answering</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Herculean: An Agentic Benchmark for Financial Intelligence</title><link>http://arxiv.org/abs/2605.14355v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14355v1</guid><description>Xueqing Peng et al. — arxiv:2605.14355 — Question Answering</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Precise Verification of Transformers through ReLU-Catalyzed Abstraction Refinement</title><link>http://arxiv.org/abs/2605.14294v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14294v1</guid><description>Hengjie Liu et al. — arxiv:2605.14294 — Sentiment Analysis</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>Why Neighborhoods Matter: Traversal Context and Provenance in Agentic GraphRAG</title><link>http://arxiv.org/abs/2605.15109v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.15109v1</guid><description>Riccardo Terrenzi et al. — arxiv:2605.15109 — Knowledge Graph</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>KGPFN: Unlocking the Potential of Knowledge Graph Foundation Model via In-Context Learning</title><link>http://arxiv.org/abs/2605.14907v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14907v1</guid><description>Yisen Gao et al. — arxiv:2605.14907 — Knowledge Graph</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>COREKG: Coreset-Guided Personalized Summarization of Knowledge Graphs</title><link>http://arxiv.org/abs/2605.14900v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14900v1</guid><description>Sohel Aman Khan et al. — arxiv:2605.14900 — Knowledge Graph</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Falkor-IRAC: Graph-Constrained Generation for Verified Legal Reasoning in Indian Judicial AI</title><link>http://arxiv.org/abs/2605.14665v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14665v1</guid><description>Joy Bose et al. — arxiv:2605.14665 — Knowledge Graph</description><pubDate>Thu, 14 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Children&apos;s English Reading Story Generation via Supervised Fine-Tuning of Compact LLMs with Controllable Difficulty and Safety</title><link>http://arxiv.org/abs/2605.13709v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13709v1</guid><description>Qian Shen et al. — arxiv:2605.13709 — NLP</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Pretraining Language Models with Subword Regularization: An Empirical Study of BPE Dropout in Low-Resource NLP</title><link>http://arxiv.org/abs/2605.13436v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13436v1</guid><description>Ruan Visser et al. — arxiv:2605.13436 — NLP</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>LLMs as annotators of credibility assessment in Danish asylum decisions: evaluating classification performance and errors beyond aggregated metrics</title><link>http://arxiv.org/abs/2605.13412v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13412v1</guid><description>Galadrielle Humblot-Renaux et al. — arxiv:2605.13412 — NLP</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Retrieval-Augmented Tutoring for Algorithm Tracing and Problem-Solving in AI Education</title><link>http://arxiv.org/abs/2605.12988v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12988v1</guid><description>Mragisha Jain et al. — arxiv:2605.12988 — NLP</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Rethinking Layer Relevance in Large Language Models Beyond Cosine Similarity</title><link>http://arxiv.org/abs/2605.14075v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14075v1</guid><description>Cristian Hinostroza et al. — arxiv:2605.14075 — NLP</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>WARDEN: Endangered Indigenous Language Transcription and Translation with 6 Hours of Training Data</title><link>http://arxiv.org/abs/2605.13846v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13846v1</guid><description>Ziheng Zhang et al. — arxiv:2605.13846 — LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Good Agentic Friends Do Not Just Give Verbal Advice: They Can Update Your Weights</title><link>http://arxiv.org/abs/2605.13839v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13839v1</guid><description>Wenrui Bao et al. — arxiv:2605.13839 — LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Negation Neglect: When models fail to learn negations in training</title><link>http://arxiv.org/abs/2605.13829v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13829v1</guid><description>Harry Mayne et al. — arxiv:2605.13829 — LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>History Anchors: How Prior Behavior Steers LLM Decisions Toward Unsafe Actions</title><link>http://arxiv.org/abs/2605.13825v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13825v1</guid><description>Alberto G. Rodríguez Salgado et al. — arxiv:2605.13825 — LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Neurosymbolic Auditing of Natural-Language Software Requirements</title><link>http://arxiv.org/abs/2605.13817v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13817v1</guid><description>Bethel Hall et al. — arxiv:2605.13817 — LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Improving Reproducibility in Evaluation through Multi-Level Annotator Modeling</title><link>http://arxiv.org/abs/2605.13801v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13801v1</guid><description>Deepak Pandita et al. — arxiv:2605.13801 — LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>An LLM-Based System for Argument Reconstruction</title><link>http://arxiv.org/abs/2605.13793v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13793v1</guid><description>Paulo Pirozelli et al. — arxiv:2605.13793 — LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Attention Once Is All You Need: Efficient Streaming Inference with Stateful Transformers</title><link>http://arxiv.org/abs/2605.13784v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13784v1</guid><description>Victor Norgren et al. — arxiv:2605.13784 — LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>MinT: Managed Infrastructure for Training and Serving Millions of LLMs</title><link>http://arxiv.org/abs/2605.13779v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13779v1</guid><description>Mind Lab et al. — arxiv:2605.13779 — LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>&quot;Like Taking the Path of Least Resistance&quot;: Exploring the Impact of LLM Interaction on the Creative Process of Programming</title><link>http://arxiv.org/abs/2605.13776v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13776v1</guid><description>Zeinabsadat Saghi et al. — arxiv:2605.13776 — LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>EVA-Bench: A New End-to-end Framework for Evaluating Voice Agents</title><link>http://arxiv.org/abs/2605.13841v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13841v1</guid><description>Tara Bogavelli et al. — arxiv:2605.13841 — LLM Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Good Agentic Friends Do Not Just Give Verbal Advice: They Can Update Your Weights</title><link>http://arxiv.org/abs/2605.13839v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13839v1</guid><description>Wenrui Bao et al. — arxiv:2605.13839 — LLM Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Training Long-Context Vision-Language Models Effectively with Generalization Beyond 128K Context</title><link>http://arxiv.org/abs/2605.13831v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13831v1</guid><description>Zhaowei Wang et al. — arxiv:2605.13831 — LLM Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>History Anchors: How Prior Behavior Steers LLM Decisions Toward Unsafe Actions</title><link>http://arxiv.org/abs/2605.13825v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13825v1</guid><description>Alberto G. Rodríguez Salgado et al. — arxiv:2605.13825 — LLM Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Harnessing Agentic Evolution</title><link>http://arxiv.org/abs/2605.13821v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13821v1</guid><description>Jiayi Zhang et al. — arxiv:2605.13821 — LLM Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>EvoGround: Self-Evolving Video Agents for Video Temporal Grounding</title><link>http://arxiv.org/abs/2605.13803v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13803v1</guid><description>Minjoon Jung et al. — arxiv:2605.13803 — LLM Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>EconAI: Dynamic Persona Evolution and Memory-Aware Agents in Evolving Economic Environments</title><link>http://arxiv.org/abs/2605.13762v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13762v1</guid><description>Annie Liu et al. — arxiv:2605.13762 — LLM Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>The Co-evolution of Costly Signaling and Cooperation in Social Dilemmas</title><link>http://arxiv.org/abs/2605.13750v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13750v1</guid><description>Mahdi Abolhasani et al. — arxiv:2605.13750 — LLM Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Learning POMDP World Models from Observations with Language-Model Priors</title><link>http://arxiv.org/abs/2605.13740v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13740v1</guid><description>Valentin Six et al. — arxiv:2605.13740 — LLM Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Senses Wide Shut: A Representation-Action Gap in Omnimodal LLMs</title><link>http://arxiv.org/abs/2605.13737v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13737v1</guid><description>Trung Nguyen Quang et al. — arxiv:2605.13737 — LLM Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>EVA-Bench: A New End-to-end Framework for Evaluating Voice Agents</title><link>http://arxiv.org/abs/2605.13841v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13841v1</guid><description>Tara Bogavelli et al. — arxiv:2605.13841 — Multi-Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Good Agentic Friends Do Not Just Give Verbal Advice: They Can Update Your Weights</title><link>http://arxiv.org/abs/2605.13839v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13839v1</guid><description>Wenrui Bao et al. — arxiv:2605.13839 — Multi-Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Training Long-Context Vision-Language Models Effectively with Generalization Beyond 128K Context</title><link>http://arxiv.org/abs/2605.13831v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13831v1</guid><description>Zhaowei Wang et al. — arxiv:2605.13831 — Multi-Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>ScioMind: Cognitively Grounded Multi-Agent Social Simulation with Anchoring-Based Belief Dynamics and Dynamic Profiles</title><link>http://arxiv.org/abs/2605.13725v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13725v1</guid><description>Yitian Yang et al. — arxiv:2605.13725 — Multi-Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>SkillOps: Managing LLM Agent Skill Libraries as Self-Maintaining Software Ecosystems</title><link>http://arxiv.org/abs/2605.13716v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13716v1</guid><description>Hongji Pu et al. — arxiv:2605.13716 — Multi-Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Learning Equilibria in Coordination Games via Minorization-Maximization</title><link>http://arxiv.org/abs/2605.13644v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13644v1</guid><description>Ashok Krishnan K. S. et al. — arxiv:2605.13644 — Multi-Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>OpenAaaS: An Open Agent-as-a-Service Framework for Distributed Materials-Informatics Research</title><link>http://arxiv.org/abs/2605.13618v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13618v1</guid><description>Peng Kang et al. — arxiv:2605.13618 — Multi-Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Self-Supervised On-Policy Reinforcement Learning via Contrastive Proximal Policy Optimisation</title><link>http://arxiv.org/abs/2605.13554v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13554v1</guid><description>Asim Osman et al. — arxiv:2605.13554 — Multi-Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Scaling Retrieval-Augmented Reasoning with Parallel Search and Explicit Merging</title><link>http://arxiv.org/abs/2605.13534v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13534v1</guid><description>Jiabei Liu et al. — arxiv:2605.13534 — Multi-Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>MMSkills: Towards Multimodal Skills for General Visual Agents</title><link>http://arxiv.org/abs/2605.13527v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13527v1</guid><description>Kangning Zhang et al. — arxiv:2605.13527 — Multi-Agent</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>VectorSmuggle: Steganographic Exfiltration in Embedding Stores and a Cryptographic Provenance Defense</title><link>http://arxiv.org/abs/2605.13764v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13764v1</guid><description>Jascha Wanger et al. — arxiv:2605.13764 — RAG</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>OpenAaaS: An Open Agent-as-a-Service Framework for Distributed Materials-Informatics Research</title><link>http://arxiv.org/abs/2605.13618v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13618v1</guid><description>Peng Kang et al. — arxiv:2605.13618 — RAG</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>PersonalAI 2.0: Enhancing knowledge graph traversal/retrieval with planning mechanism for Personalized LLM Agents</title><link>http://arxiv.org/abs/2605.13481v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13481v1</guid><description>Mikhail Menschikov et al. — arxiv:2605.13481 — RAG</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>RS-Claw: Progressive Active Tool Exploration via Hierarchical Skill Trees for Remote Sensing Agents</title><link>http://arxiv.org/abs/2605.13391v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13391v1</guid><description>Liangtian Liu et al. — arxiv:2605.13391 — RAG</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>CANTANTE: Optimizing Agentic Systems via Contrastive Credit Attribution</title><link>http://arxiv.org/abs/2605.13295v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13295v1</guid><description>Tom Zehle et al. — arxiv:2605.13295 — RAG</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Utility-Oriented Visual Evidence Selection for Multimodal Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.13277v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13277v1</guid><description>Weiqing Luo et al. — arxiv:2605.13277 — RAG</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>An Agentic AI Framework with Large Language Models and Chain-of-Thought for UAV-Assisted Logistics Scheduling with Mobile Edge Computing</title><link>http://arxiv.org/abs/2605.13221v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13221v1</guid><description>Hanwen Zhang et al. — arxiv:2605.13221 — RAG</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Pyramid Forcing: Head-Aware Pyramid KV Cache Policy for High-Quality Long Video Generation</title><link>http://arxiv.org/abs/2605.13111v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13111v1</guid><description>Jiayu Chen et al. — arxiv:2605.13111 — RAG</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>RAG-Enhanced Large Language Models for Dynamic Content Expiration Prediction in Web Search</title><link>http://arxiv.org/abs/2605.13052v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13052v1</guid><description>Tingyu Chen et al. — arxiv:2605.13052 — RAG</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Retrieval-Augmented Tutoring for Algorithm Tracing and Problem-Solving in AI Education</title><link>http://arxiv.org/abs/2605.12988v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12988v1</guid><description>Mragisha Jain et al. — arxiv:2605.12988 — RAG</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>A Hierarchical Language Model with Predictable Scaling Laws and Provable Benefits of Reasoning</title><link>http://arxiv.org/abs/2605.13687v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13687v1</guid><description>Jason Gaitonde et al. — arxiv:2605.13687 — Reasoning</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Guide, Think, Act: Interactive Embodied Reasoning in Vision-Language-Action Models</title><link>http://arxiv.org/abs/2605.13632v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13632v1</guid><description>Yiran Ling et al. — arxiv:2605.13632 — Reasoning</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Many-Shot CoT-ICL: Making In-Context Learning Truly Learn</title><link>http://arxiv.org/abs/2605.13511v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13511v1</guid><description>Tsz Ting Chung et al. — arxiv:2605.13511 — Reasoning</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Inducing Overthink: Hierarchical Genetic Algorithm-based DoS Attack on Black-Box Large Language Reasoning Models</title><link>http://arxiv.org/abs/2605.13338v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13338v1</guid><description>Shuqiang Wang et al. — arxiv:2605.13338 — Reasoning</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Achieving Gold-Medal-Level Olympiad Reasoning via Simple and Unified Scaling</title><link>http://arxiv.org/abs/2605.13301v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13301v1</guid><description>Yafu Li et al. — arxiv:2605.13301 — Reasoning</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>What properties of reasoning supervision are associated with improved downstream model quality?</title><link>http://arxiv.org/abs/2605.13290v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13290v1</guid><description>Mikołaj Langner et al. — arxiv:2605.13290 — Reasoning</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Respecting Self-Uncertainty in On-Policy Self-Distillation for Efficient LLM Reasoning</title><link>http://arxiv.org/abs/2605.13255v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13255v1</guid><description>Junlong Ke et al. — arxiv:2605.13255 — Reasoning</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>A Hybrid Framework for Natural Language Querying of IFC Models with Relational and Graph Representations</title><link>http://arxiv.org/abs/2605.13236v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13236v1</guid><description>Rabindra Lamsal et al. — arxiv:2605.13236 — Reasoning</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>An Agentic AI Framework with Large Language Models and Chain-of-Thought for UAV-Assisted Logistics Scheduling with Mobile Edge Computing</title><link>http://arxiv.org/abs/2605.13221v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13221v1</guid><description>Hanwen Zhang et al. — arxiv:2605.13221 — Reasoning</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>STOP: Structured On-Policy Pruning of Long-Form Reasoning in Low-Data Regimes</title><link>http://arxiv.org/abs/2605.13165v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13165v1</guid><description>Chenjun Xu et al. — arxiv:2605.13165 — Reasoning</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Training Long-Context Vision-Language Models Effectively with Generalization Beyond 128K Context</title><link>http://arxiv.org/abs/2605.13831v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13831v1</guid><description>Zhaowei Wang et al. — arxiv:2605.13831 — Tool Use</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Porting the Nonlinear Optimization Library HiOp to Accelerator-Based Hardware Architectures</title><link>http://arxiv.org/abs/2605.13736v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13736v1</guid><description>Slaven Peles et al. — arxiv:2605.13736 — Tool Use</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>ReTool-Video: Recursive Tool-Using Video Agents with Meta-Augmented Tool Grounding</title><link>http://arxiv.org/abs/2605.13228v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13228v1</guid><description>Xiao Liu et al. — arxiv:2605.13228 — Tool Use</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>When Does Hierarchy Help? Benchmarking Agent Coordination in Event-Driven Industrial Scheduling</title><link>http://arxiv.org/abs/2605.13172v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13172v1</guid><description>Ziqi Wang et al. — arxiv:2605.13172 — Tool Use</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Unlocking Patch-Level Features for CLIP-Based Class-Incremental Learning</title><link>http://arxiv.org/abs/2605.13835v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13835v1</guid><description>Hao Sun et al. — arxiv:2605.13835 — Multimodal LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Training Long-Context Vision-Language Models Effectively with Generalization Beyond 128K Context</title><link>http://arxiv.org/abs/2605.13831v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13831v1</guid><description>Zhaowei Wang et al. — arxiv:2605.13831 — Multimodal LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>RoboEvolve: Co-Evolving Planner-Simulator for Robotic Manipulation with Limited Data</title><link>http://arxiv.org/abs/2605.13775v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13775v1</guid><description>Harold Haodong Chen et al. — arxiv:2605.13775 — Multimodal LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>SceneGraphVLM: Dynamic Scene Graph Generation from Video with Vision-Language Models</title><link>http://arxiv.org/abs/2605.13667v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13667v1</guid><description>Vladislav Makarov et al. — arxiv:2605.13667 — Multimodal LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>ProjGuard: Safety Monitoring for Computer-Use Agents via Low-Dimensional Projections</title><link>http://arxiv.org/abs/2605.13631v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13631v1</guid><description>Kebin Contreras et al. — arxiv:2605.13631 — Multimodal LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Towards Unified Surgical Scene Understanding:Bridging Reasoning and Grounding via MLLMs</title><link>http://arxiv.org/abs/2605.13530v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13530v1</guid><description>Jincai Huang et al. — arxiv:2605.13530 — Multimodal LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>RotVLA: Rotational Latent Action for Vision-Language-Action Model</title><link>http://arxiv.org/abs/2605.13403v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13403v1</guid><description>Qiwei Li et al. — arxiv:2605.13403 — Multimodal LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>RS-Claw: Progressive Active Tool Exploration via Hierarchical Skill Trees for Remote Sensing Agents</title><link>http://arxiv.org/abs/2605.13391v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13391v1</guid><description>Liangtian Liu et al. — arxiv:2605.13391 — Multimodal LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>GRIP-VLM: Group-Relative Importance Pruning for Efficient Vision-Language Models</title><link>http://arxiv.org/abs/2605.13375v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13375v1</guid><description>Mingzhe Huang et al. — arxiv:2605.13375 — Multimodal LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>GeoFlowVLM: Geometry-Aware Joint Uncertainty for Frozen Vision-Language Embedding</title><link>http://arxiv.org/abs/2605.13352v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13352v1</guid><description>Mayank Nautiyal et al. — arxiv:2605.13352 — Multimodal LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>QLAM: A Quantum Long-Attention Memory Approach to Long-Sequence Token Modeling</title><link>http://arxiv.org/abs/2605.13833v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13833v1</guid><description>Hoang-Quan Nguyen et al. — arxiv:2605.13833 — Long Context</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Training Long-Context Vision-Language Models Effectively with Generalization Beyond 128K Context</title><link>http://arxiv.org/abs/2605.13831v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13831v1</guid><description>Zhaowei Wang et al. — arxiv:2605.13831 — Long Context</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Harnessing Agentic Evolution</title><link>http://arxiv.org/abs/2605.13821v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13821v1</guid><description>Jiayi Zhang et al. — arxiv:2605.13821 — Long Context</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>SceneGraphVLM: Dynamic Scene Graph Generation from Video with Vision-Language Models</title><link>http://arxiv.org/abs/2605.13667v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13667v1</guid><description>Vladislav Makarov et al. — arxiv:2605.13667 — Long Context</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>RealICU: Do LLM Agents Understand Long-Context ICU Data? A Benchmark Beyond Behavior Imitation</title><link>http://arxiv.org/abs/2605.13542v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13542v1</guid><description>Chengzhi Shen et al. — arxiv:2605.13542 — Long Context</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Granite Embedding Multilingual R2 Models</title><link>http://arxiv.org/abs/2605.13521v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13521v1</guid><description>Parul Awasthy et al. — arxiv:2605.13521 — Long Context</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Many-Shot CoT-ICL: Making In-Context Learning Truly Learn</title><link>http://arxiv.org/abs/2605.13511v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13511v1</guid><description>Tsz Ting Chung et al. — arxiv:2605.13511 — Long Context</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>LongBEL: Long-Context and Document-Consistent Biomedical Entity Linking</title><link>http://arxiv.org/abs/2605.13451v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13451v1</guid><description>Adam Remaki et al. — arxiv:2605.13451 — Long Context</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>RS-Claw: Progressive Active Tool Exploration via Hierarchical Skill Trees for Remote Sensing Agents</title><link>http://arxiv.org/abs/2605.13391v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13391v1</guid><description>Liangtian Liu et al. — arxiv:2605.13391 — Long Context</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Phasor Memory Networks: Stable Backpropagation Through Time for Scalable Explicit Memory</title><link>http://arxiv.org/abs/2605.13370v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13370v1</guid><description>Sungwoo Goo et al. — arxiv:2605.13370 — Long Context</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Good Agentic Friends Do Not Just Give Verbal Advice: They Can Update Your Weights</title><link>http://arxiv.org/abs/2605.13839v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13839v1</guid><description>Wenrui Bao et al. — arxiv:2605.13839 — LLM Efficiency</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Provable Quantization with Randomized Hadamard Transform</title><link>http://arxiv.org/abs/2605.13810v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13810v1</guid><description>Ying Feng et al. — arxiv:2605.13810 — LLM Efficiency</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>MinT: Managed Infrastructure for Training and Serving Millions of LLMs</title><link>http://arxiv.org/abs/2605.13779v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13779v1</guid><description>Mind Lab et al. — arxiv:2605.13779 — LLM Efficiency</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>High-Rate Quantized Matrix Multiplication II</title><link>http://arxiv.org/abs/2605.13768v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13768v1</guid><description>Or Ordentlich et al. — arxiv:2605.13768 — LLM Efficiency</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Decoherence of spatial superpositions along stationary worldlines</title><link>http://arxiv.org/abs/2605.13677v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13677v1</guid><description>Clemens Jakubec et al. — arxiv:2605.13677 — LLM Efficiency</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Locale-Conditioned Few-Shot Prompting Mitigates Demonstration Regurgitation in On-Device PII Substitution with Small Language Models</title><link>http://arxiv.org/abs/2605.13538v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13538v1</guid><description>Anuj Sadani et al. — arxiv:2605.13538 — LLM Efficiency</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>ArcVQ-VAE: A Spherical Vector Quantization Framework with ArcCosine Additive Margin</title><link>http://arxiv.org/abs/2605.13517v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13517v1</guid><description>Jaeyung Kim et al. — arxiv:2605.13517 — LLM Efficiency</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>TurboGR: An Accelerated Training System for Large-Scale Generative Recommendation</title><link>http://arxiv.org/abs/2605.13433v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13433v1</guid><description>Huichao Chai et al. — arxiv:2605.13433 — LLM Efficiency</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Vector-Quantized Discrete Latent Factors Meet Financial Priors: Dynamic Cross-Sectional Stock Ranking Prediction for Portfolio Construction</title><link>http://arxiv.org/abs/2605.13407v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13407v1</guid><description>Namhyoung Kim et al. — arxiv:2605.13407 — LLM Efficiency</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>RotVLA: Rotational Latent Action for Vision-Language-Action Model</title><link>http://arxiv.org/abs/2605.13403v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13403v1</guid><description>Qiwei Li et al. — arxiv:2605.13403 — LLM Efficiency</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>PRISM-X: Experiments on Personalised Fine-Tuning with Human and Simulated Users</title><link>http://arxiv.org/abs/2605.13307v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13307v1</guid><description>Hannah Rose Kirk et al. — arxiv:2605.13307 — Alignment</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Not Just RLHF: Why Alignment Alone Won&apos;t Fix Multi-Agent Sycophancy</title><link>http://arxiv.org/abs/2605.12991v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12991v1</guid><description>Adarsh Kumarappan et al. — arxiv:2605.12991 — Alignment</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>EVA-Bench: A New End-to-end Framework for Evaluating Voice Agents</title><link>http://arxiv.org/abs/2605.13841v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13841v1</guid><description>Tara Bogavelli et al. — arxiv:2605.13841 — Hallucination</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Negation Neglect: When models fail to learn negations in training</title><link>http://arxiv.org/abs/2605.13829v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13829v1</guid><description>Harry Mayne et al. — arxiv:2605.13829 — Hallucination</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>RoboEvolve: Co-Evolving Planner-Simulator for Robotic Manipulation with Limited Data</title><link>http://arxiv.org/abs/2605.13775v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13775v1</guid><description>Harold Haodong Chen et al. — arxiv:2605.13775 — Hallucination</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Where Does Reasoning Break? Step-Level Hallucination Detection via Hidden-State Transport Geometry</title><link>http://arxiv.org/abs/2605.13772v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13772v1</guid><description>Tyler Alvarez et al. — arxiv:2605.13772 — Hallucination</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>SceneGraphVLM: Dynamic Scene Graph Generation from Video with Vision-Language Models</title><link>http://arxiv.org/abs/2605.13667v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13667v1</guid><description>Vladislav Makarov et al. — arxiv:2605.13667 — Hallucination</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>PersonalAI 2.0: Enhancing knowledge graph traversal/retrieval with planning mechanism for Personalized LLM Agents</title><link>http://arxiv.org/abs/2605.13481v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13481v1</guid><description>Mikhail Menschikov et al. — arxiv:2605.13481 — Hallucination</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>FIND: Toward Multimodal Financial Reasoning and Question Answering for Indic Languages</title><link>http://arxiv.org/abs/2605.13330v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13330v1</guid><description>Sarmistha Das et al. — arxiv:2605.13330 — Hallucination</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Twisted Alexander vanishing groups of knots</title><link>http://arxiv.org/abs/2605.13291v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13291v1</guid><description>Katsumi Ishikawa et al. — arxiv:2605.13291 — Hallucination</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Chem-GMNet: A Sphere-Native Geometric Transformer for Molecular Property Prediction</title><link>http://arxiv.org/abs/2605.13262v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13262v1</guid><description>Deepak Warrier et al. — arxiv:2605.13262 — Hallucination</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>GeoBuildBench: A Benchmark for Interactive and Executable Geometry Construction from Natural Language</title><link>http://arxiv.org/abs/2605.13167v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13167v1</guid><description>Jinwoong Kim et al. — arxiv:2605.13167 — Hallucination</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Quantitative Linear Logic for Neuro-Symbolic Learning and Verification</title><link>http://arxiv.org/abs/2605.13845v2</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13845v2</guid><description>Thomas Flinkow et al. — arxiv:2605.13845 — LLM Safety</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Model-Agnostic Lifelong LLM Safety via Externalized Attack-Defense Co-Evolution</title><link>http://arxiv.org/abs/2605.13411v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13411v1</guid><description>Xiaozhe Zhang et al. — arxiv:2605.13411 — LLM Safety</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Backbone is All You Need: Assessing Vulnerabilities of Frozen Foundation Models in Synthetic Image Forensics</title><link>http://arxiv.org/abs/2605.13381v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13381v1</guid><description>Chiara Musso et al. — arxiv:2605.13381 — LLM Safety</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Hierarchical Attacks for Multi-Modal Multi-Agent Reasoning</title><link>http://arxiv.org/abs/2605.13213v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13213v1</guid><description>Hao Zhou et al. — arxiv:2605.13213 — LLM Safety</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Finding the Weakest Link: Adversarial Attack against Multi-Agent Communications</title><link>http://arxiv.org/abs/2605.13170v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13170v1</guid><description>Maxwell Standen et al. — arxiv:2605.13170 — LLM Safety</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Adaptive Steering and Remasking for Safe Generation in Diffusion Language Models</title><link>http://arxiv.org/abs/2605.13043v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13043v1</guid><description>Yejin Lee et al. — arxiv:2605.13043 — LLM Safety</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Quantifying LLM Safety Degradation Under Repeated Attacks Using Survival Analysis</title><link>http://arxiv.org/abs/2605.12869v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12869v1</guid><description>Zvi Topol et al. — arxiv:2605.12869 — LLM Safety</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>AgentTrap: Measuring Runtime Trust Failures in Third-Party Agent Skills</title><link>http://arxiv.org/abs/2605.13940v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13940v1</guid><description>Haomin Zhuang et al. — arxiv:2605.13940 — LLM Safety</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>RTLC -- Research, Teach-to-Learn, Critique: A three-stage prompting paradigm inspired by the Feynman Learning Technique that lifts LLM-as-judge accuracy on JudgeBench with no fine-tuning</title><link>http://arxiv.org/abs/2605.13695v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13695v1</guid><description>Andrea Morandi et al. — arxiv:2605.13695 — LLM Evaluation</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Creativity Bias: How Machine Evaluation Struggles with Creativity in Literary Translations</title><link>http://arxiv.org/abs/2605.13596v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13596v1</guid><description>Kyo Gerrits et al. — arxiv:2605.13596 — LLM Evaluation</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Discovery of Hidden Miscalibration Regimes</title><link>http://arxiv.org/abs/2605.13484v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13484v1</guid><description>Katarzyna Kobalczyk et al. — arxiv:2605.13484 — LLM Evaluation</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>PersonalAI 2.0: Enhancing knowledge graph traversal/retrieval with planning mechanism for Personalized LLM Agents</title><link>http://arxiv.org/abs/2605.13481v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13481v1</guid><description>Mikhail Menschikov et al. — arxiv:2605.13481 — LLM Evaluation</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>From Rosetta to Match-Up: A Paired Corpus of Linguistic Puzzles with Human and LLM Benchmarks</title><link>http://arxiv.org/abs/2605.13408v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13408v1</guid><description>Neh Majmudar et al. — arxiv:2605.13408 — LLM Evaluation</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>LLM-Based Persuasion Enables Guardrail Override in Frontier LLMs</title><link>http://arxiv.org/abs/2605.13334v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13334v1</guid><description>Rodrigo Nogueira et al. — arxiv:2605.13334 — LLM Evaluation</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>VERA-MH: Validation of Ethical and Responsible AI in Mental Health</title><link>http://arxiv.org/abs/2605.13318v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13318v1</guid><description>Luca Belli et al. — arxiv:2605.13318 — LLM Evaluation</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>SWE-Cycle: Benchmarking Code Agents across the Complete Issue Resolution Cycle</title><link>http://arxiv.org/abs/2605.13139v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13139v1</guid><description>Hao Guan et al. — arxiv:2605.13139 — LLM Evaluation</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Multimodal Hidden Markov Models for Persistent Emotional State Tracking</title><link>http://arxiv.org/abs/2605.12838v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12838v1</guid><description>Anamika Ragu et al. — arxiv:2605.12838 — LLM Evaluation</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>HLS-Seek: QoR-Aware Code Generation for High-Level Synthesis via Proxy Comparative Reward Reinforcement Learning</title><link>http://arxiv.org/abs/2605.13536v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13536v1</guid><description>Qingyun Zou et al. — arxiv:2605.13536 — Code LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>TRIAGE: Evaluating Prospective Metacognitive Control in LLMs under Resource Constraints</title><link>http://arxiv.org/abs/2605.13414v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13414v1</guid><description>Zabir Al Nazi et al. — arxiv:2605.13414 — Code LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>AI Harness Engineering: A Runtime Substrate for Foundation-Model Software Agents</title><link>http://arxiv.org/abs/2605.13357v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13357v1</guid><description>Hailin Zhong et al. — arxiv:2605.13357 — Code LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>The Readability Spectrum: Patterns, Issues, and Prompt Effects in LLM-Generated Code</title><link>http://arxiv.org/abs/2605.13280v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13280v1</guid><description>Hengzhi Ye et al. — arxiv:2605.13280 — Code LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>UIBenchKit: A unified toolkit for design-to-code model evaluation</title><link>http://arxiv.org/abs/2605.13141v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13141v1</guid><description>Chinh T. Le et al. — arxiv:2605.13141 — Code LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Protocol-Driven Development: Governing Generated Software Through Invariants and Evidence</title><link>http://arxiv.org/abs/2605.12981v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12981v1</guid><description>Jun He et al. — arxiv:2605.12981 — Code LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Retrieval is Cheap, Show Me the Code: Executable Multi-Hop Reasoning for Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.12975v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12975v1</guid><description>Jiashuo Sun et al. — arxiv:2605.12975 — Code LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>ChipMATE: Multi-Agent Training via Reinforcement Learning for Enhanced RTL Generation</title><link>http://arxiv.org/abs/2605.12857v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12857v1</guid><description>Zhongkai Yu et al. — arxiv:2605.12857 — Code LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>The KnotMosaics Package for SageMath</title><link>http://arxiv.org/abs/2605.14189v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.14189v1</guid><description>Mary Y. Deng et al. — arxiv:2605.14189 — Code LLM</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>LLMs as annotators of credibility assessment in Danish asylum decisions: evaluating classification performance and errors beyond aggregated metrics</title><link>http://arxiv.org/abs/2605.13412v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13412v1</guid><description>Galadrielle Humblot-Renaux et al. — arxiv:2605.13412 — Legal NLP</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Legal NLP</category></item><item><title>Granite Embedding Multilingual R2 Models</title><link>http://arxiv.org/abs/2605.13521v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13521v1</guid><description>Parul Awasthy et al. — arxiv:2605.13521 — Multilingual NLP</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Improving Code Translation with Syntax-Guided and Semantic-aware Preference Optimization</title><link>http://arxiv.org/abs/2605.13229v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13229v1</guid><description>Yuhan Wu et al. — arxiv:2605.13229 — Multilingual NLP</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Vividh-ASR: A Complexity-Tiered Benchmark and Optimization Dynamics for Robust Indic Speech Recognition</title><link>http://arxiv.org/abs/2605.13087v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13087v1</guid><description>Kush Juvekar et al. — arxiv:2605.13087 — Multilingual NLP</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>DiM\textsuperscript{3}: Bridging Multilingual and Multimodal Models via Direction- and Magnitude-Aware Merging</title><link>http://arxiv.org/abs/2605.12960v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12960v1</guid><description>Zijing Wang et al. — arxiv:2605.12960 — Multilingual NLP</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Locale-Conditioned Few-Shot Prompting Mitigates Demonstration Regurgitation in On-Device PII Substitution with Small Language Models</title><link>http://arxiv.org/abs/2605.13538v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13538v1</guid><description>Anuj Sadani et al. — arxiv:2605.13538 — Named Entity Recognition</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Named Entity Recognition</category></item><item><title>Multimodal Graph-based Classification of Esophageal Motility Disorders</title><link>http://arxiv.org/abs/2605.13623v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13623v1</guid><description>Alexander Geiger et al. — arxiv:2605.13623 — Information Extraction</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>LLMs as annotators of credibility assessment in Danish asylum decisions: evaluating classification performance and errors beyond aggregated metrics</title><link>http://arxiv.org/abs/2605.13412v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13412v1</guid><description>Galadrielle Humblot-Renaux et al. — arxiv:2605.13412 — Text Classification</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>Neurosymbolic Auditing of Natural-Language Software Requirements</title><link>http://arxiv.org/abs/2605.13817v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13817v1</guid><description>Bethel Hall et al. — arxiv:2605.13817 — Question Answering</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Scaling Retrieval-Augmented Reasoning with Parallel Search and Explicit Merging</title><link>http://arxiv.org/abs/2605.13534v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13534v1</guid><description>Jiabei Liu et al. — arxiv:2605.13534 — Question Answering</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>FIND: Toward Multimodal Financial Reasoning and Question Answering for Indic Languages</title><link>http://arxiv.org/abs/2605.13330v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13330v1</guid><description>Sarmistha Das et al. — arxiv:2605.13330 — Question Answering</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>CANTANTE: Optimizing Agentic Systems via Contrastive Credit Attribution</title><link>http://arxiv.org/abs/2605.13295v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13295v1</guid><description>Tom Zehle et al. — arxiv:2605.13295 — Question Answering</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>IndicMedDialog: A Parallel Multi-Turn Medical Dialogue Dataset for Accessible Healthcare in Indic Languages</title><link>http://arxiv.org/abs/2605.13292v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13292v1</guid><description>Shubham Kumar Nigam et al. — arxiv:2605.13292 — Question Answering</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>ReTool-Video: Recursive Tool-Using Video Agents with Meta-Augmented Tool Grounding</title><link>http://arxiv.org/abs/2605.13228v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13228v1</guid><description>Xiao Liu et al. — arxiv:2605.13228 — Question Answering</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Skill-Aligned Annotation for Reliable Evaluation in Text-to-Image Generation</title><link>http://arxiv.org/abs/2605.13223v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13223v1</guid><description>Abdelrahman Eldesokey et al. — arxiv:2605.13223 — Question Answering</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>AcquisitionSynthesis: Targeted Data Generation using Acquisition Functions</title><link>http://arxiv.org/abs/2605.13149v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13149v1</guid><description>Ishika Agarwal et al. — arxiv:2605.13149 — Question Answering</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>F-GRPO: Factorized Group-Relative Policy Optimization for Unified Candidate Generation and Ranking</title><link>http://arxiv.org/abs/2605.12995v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12995v1</guid><description>Rohan Surana et al. — arxiv:2605.12995 — Question Answering</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Retrieval is Cheap, Show Me the Code: Executable Multi-Hop Reasoning for Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.12975v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12975v1</guid><description>Jiashuo Sun et al. — arxiv:2605.12975 — Question Answering</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>PersonalAI 2.0: Enhancing knowledge graph traversal/retrieval with planning mechanism for Personalized LLM Agents</title><link>http://arxiv.org/abs/2605.13481v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13481v1</guid><description>Mikhail Menschikov et al. — arxiv:2605.13481 — Knowledge Graph</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>IdeaForge: A Knowledge Graph-Grounded Multi-Agent Framework for Cross-Methodology Innovation Analysis and Patent Claim Generation</title><link>http://arxiv.org/abs/2605.13311v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13311v1</guid><description>Joy Bose et al. — arxiv:2605.13311 — Knowledge Graph</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>SemRepo: A Knowledge Graph for Research Software and Its Scholarly Ecosystem</title><link>http://arxiv.org/abs/2605.13310v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13310v1</guid><description>Abdul Rafay et al. — arxiv:2605.13310 — Knowledge Graph</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Strikingness-Aware Evaluation for Temporal Knowledge Graph Reasoning</title><link>http://arxiv.org/abs/2605.13153v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.13153v1</guid><description>Rikui Huang et al. — arxiv:2605.13153 — Knowledge Graph</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>CommonWhy: A Dataset for Evaluating Entity-Based Causal Commonsense Reasoning in Large Language Models</title><link>http://arxiv.org/abs/2605.12918v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12918v1</guid><description>Armin Toroghi et al. — arxiv:2605.12918 — Knowledge Graph</description><pubDate>Wed, 13 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Mind the Pause: Disfluency-Aware Objective Tuning for Multilingual Speech Correction with LLMs</title><link>http://arxiv.org/abs/2605.12242v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12242v1</guid><description>Deepak Kumar et al. — arxiv:2605.12242 — NLP</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Mechanistic Interpretability of ASR models using Sparse Autoencoders</title><link>http://arxiv.org/abs/2605.12225v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12225v1</guid><description>Dan Pluth et al. — arxiv:2605.12225 — NLP</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>A microservices-based endpoint monitoring platform with predictive NLP models for real-time security and hate-speech risk alerting</title><link>http://arxiv.org/abs/2605.11997v2</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11997v2</guid><description>Darlan Noetzold et al. — arxiv:2605.11997 — NLP</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Rethinking Positional Encoding for Neural Vehicle Routing</title><link>http://arxiv.org/abs/2605.11910v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11910v1</guid><description>Chuanbo Hua et al. — arxiv:2605.11910 — NLP</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>GAR: Carbon-Aware Routing for LLM Inference via Constrained Optimization</title><link>http://arxiv.org/abs/2605.11603v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11603v1</guid><description>Disha Sheshanarayana et al. — arxiv:2605.11603 — NLP</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>String Diagrams for Quantum Foundations, Computing and Natural Language Processing</title><link>http://arxiv.org/abs/2605.11417v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11417v1</guid><description>Muhammad Hamza Waseem et al. — arxiv:2605.11417 — NLP</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Covering Human Action Space for Computer Use: Data Synthesis and Benchmark</title><link>http://arxiv.org/abs/2605.12501v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12501v1</guid><description>Miaosen Zhang et al. — arxiv:2605.12501 — LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>AlphaGRPO: Unlocking Self-Reflective Multimodal Generation in UMMs via Decompositional Verifiable Reward</title><link>http://arxiv.org/abs/2605.12495v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12495v1</guid><description>Runhui Huang et al. — arxiv:2605.12495 — LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Pion: A Spectrum-Preserving Optimizer via Orthogonal Equivalence Transformation</title><link>http://arxiv.org/abs/2605.12492v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12492v1</guid><description>Kexuan Shi et al. — arxiv:2605.12492 — LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Task-Adaptive Embedding Refinement via Test-time LLM Guidance</title><link>http://arxiv.org/abs/2605.12487v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12487v1</guid><description>Ariel Gera et al. — arxiv:2605.12487 — LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Learning, Fast and Slow: Towards LLMs That Adapt Continually</title><link>http://arxiv.org/abs/2605.12484v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12484v1</guid><description>Rishabh Tiwari et al. — arxiv:2605.12484 — LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>MEME: Multi-entity &amp; Evolving Memory Evaluation</title><link>http://arxiv.org/abs/2605.12477v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12477v1</guid><description>Seokwon Jung et al. — arxiv:2605.12477 — LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Multi-Stream LLMs: Unblocking Language Models with Parallel Streams of Thoughts, Inputs and Outputs</title><link>http://arxiv.org/abs/2605.12460v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12460v1</guid><description>Guinan Su et al. — arxiv:2605.12460 — LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>TextSeal: A Localized LLM Watermark for Provenance &amp; Distillation Protection</title><link>http://arxiv.org/abs/2605.12456v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12456v1</guid><description>Tom Sander et al. — arxiv:2605.12456 — LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>The Algorithmic Caricature: Auditing LLM-Generated Political Discourse Across Crisis Events</title><link>http://arxiv.org/abs/2605.12452v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12452v1</guid><description>Gunjan et al. — arxiv:2605.12452 — LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>LychSim: A Controllable and Interactive Simulation Framework for Vision Research</title><link>http://arxiv.org/abs/2605.12449v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12449v1</guid><description>Wufei Ma et al. — arxiv:2605.12449 — LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Covering Human Action Space for Computer Use: Data Synthesis and Benchmark</title><link>http://arxiv.org/abs/2605.12501v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12501v1</guid><description>Miaosen Zhang et al. — arxiv:2605.12501 — LLM Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>SenseNova-U1: Unifying Multimodal Understanding and Generation with NEO-unify Architecture</title><link>http://arxiv.org/abs/2605.12500v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12500v1</guid><description>Haiwen Diao et al. — arxiv:2605.12500 — LLM Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>From Web to Pixels: Bringing Agentic Search into Visual Perception</title><link>http://arxiv.org/abs/2605.12497v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12497v1</guid><description>Bokang Yang et al. — arxiv:2605.12497 — LLM Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>LongMemEval-V2: Evaluating Long-Term Agent Memory Toward Experienced Colleagues</title><link>http://arxiv.org/abs/2605.12493v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12493v1</guid><description>Di Wu et al. — arxiv:2605.12493 — LLM Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Letting the neural code speak: Automated characterization of monkey visual neurons through human language</title><link>http://arxiv.org/abs/2605.12485v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12485v1</guid><description>Vedang Lad et al. — arxiv:2605.12485 — LLM Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>ToolCUA: Towards Optimal GUI-Tool Path Orchestration for Computer Use Agents</title><link>http://arxiv.org/abs/2605.12481v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12481v1</guid><description>Xuhao Hu et al. — arxiv:2605.12481 — LLM Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>MEME: Multi-entity &amp; Evolving Memory Evaluation</title><link>http://arxiv.org/abs/2605.12477v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12477v1</guid><description>Seokwon Jung et al. — arxiv:2605.12477 — LLM Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>KV-Fold: One-Step KV-Cache Recurrence for Long-Context Inference</title><link>http://arxiv.org/abs/2605.12471v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12471v1</guid><description>Alireza Nadali et al. — arxiv:2605.12471 — LLM Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Multi-Stream LLMs: Unblocking Language Models with Parallel Streams of Thoughts, Inputs and Outputs</title><link>http://arxiv.org/abs/2605.12460v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12460v1</guid><description>Guinan Su et al. — arxiv:2605.12460 — LLM Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>LychSim: A Controllable and Interactive Simulation Framework for Vision Research</title><link>http://arxiv.org/abs/2605.12449v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12449v1</guid><description>Wufei Ma et al. — arxiv:2605.12449 — LLM Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>From Web to Pixels: Bringing Agentic Search into Visual Perception</title><link>http://arxiv.org/abs/2605.12497v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12497v1</guid><description>Bokang Yang et al. — arxiv:2605.12497 — Multi-Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>MEME: Multi-entity &amp; Evolving Memory Evaluation</title><link>http://arxiv.org/abs/2605.12477v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12477v1</guid><description>Seokwon Jung et al. — arxiv:2605.12477 — Multi-Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>KV-Fold: One-Step KV-Cache Recurrence for Long-Context Inference</title><link>http://arxiv.org/abs/2605.12471v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12471v1</guid><description>Alireza Nadali et al. — arxiv:2605.12471 — Multi-Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Events as Triggers for Behavioral Diversity in Multi-Agent Reinforcement Learning</title><link>http://arxiv.org/abs/2605.12388v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12388v1</guid><description>Hannes Büchi et al. — arxiv:2605.12388 — Multi-Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>ProfiliTable: Profiling-Driven Tabular Data Processing via Agentic Workflows</title><link>http://arxiv.org/abs/2605.12376v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12376v1</guid><description>Wei Liu et al. — arxiv:2605.12376 — Multi-Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>LISA: Cognitive Arbitration for Signal-Free Autonomous Intersection Management</title><link>http://arxiv.org/abs/2605.12321v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12321v1</guid><description>Abderrahmane Lakas et al. — arxiv:2605.12321 — Multi-Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Executable Agentic Memory for GUI Agent</title><link>http://arxiv.org/abs/2605.12294v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12294v1</guid><description>Zerui Qin et al. — arxiv:2605.12294 — Multi-Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Iterative Audit Convergence in LLM-Managed Multi-Agent Systems: A Case Study in Prompt Engineering Quality Assurance</title><link>http://arxiv.org/abs/2605.12280v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12280v1</guid><description>Elias Calboreanu et al. — arxiv:2605.12280 — Multi-Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>No Action Without a NOD: A Heterogeneous Multi-Agent Architecture for Reliable Service Agents</title><link>http://arxiv.org/abs/2605.12240v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12240v1</guid><description>Zixu Yang et al. — arxiv:2605.12240 — Multi-Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Goal-Oriented Reasoning for RAG-based Memory in Conversational Agentic LLM Systems</title><link>http://arxiv.org/abs/2605.12213v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12213v1</guid><description>Jiazhou Liang et al. — arxiv:2605.12213 — Multi-Agent</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>LongMemEval-V2: Evaluating Long-Term Agent Memory Toward Experienced Colleagues</title><link>http://arxiv.org/abs/2605.12493v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12493v1</guid><description>Di Wu et al. — arxiv:2605.12493 — RAG</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Overview of the MedHopQA track at BioCreative IX: track description, participation and evaluation of systems for multi-hop medical question answering</title><link>http://arxiv.org/abs/2605.12313v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12313v1</guid><description>Rezarta Islamaj et al. — arxiv:2605.12313 — RAG</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Goal-Oriented Reasoning for RAG-based Memory in Conversational Agentic LLM Systems</title><link>http://arxiv.org/abs/2605.12213v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12213v1</guid><description>Jiazhou Liang et al. — arxiv:2605.12213 — RAG</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>SAGE: A Self-Evolving Agentic Graph-Memory Engine for Structure-Aware Associative Memory</title><link>http://arxiv.org/abs/2605.12061v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12061v1</guid><description>Juntong Wang et al. — arxiv:2605.12061 — RAG</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>LegalCheck: Retrieval- and Context-Augmented Generation for Drafting Municipal Legal Advice Letters</title><link>http://arxiv.org/abs/2605.12012v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12012v1</guid><description>Virgill van der Meer et al. — arxiv:2605.12012 — RAG</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Towards Order Fairness: Mitigating LLMs Order Sensitivity through Dual Group Advantage Optimization</title><link>http://arxiv.org/abs/2605.11974v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11974v1</guid><description>Xu Chu et al. — arxiv:2605.11974 — RAG</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Very Efficient Listwise Multimodal Reranking for Long Documents</title><link>http://arxiv.org/abs/2605.11864v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11864v1</guid><description>Yiqun Sun et al. — arxiv:2605.11864 — RAG</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Persistent and Conversational Multi-Method Explainability for Trustworthy Financial AI</title><link>http://arxiv.org/abs/2605.11687v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11687v1</guid><description>Georgios Makridis et al. — arxiv:2605.11687 — RAG</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>CuSearch: Curriculum Rollout Sampling via Search Depth for Agentic RAG</title><link>http://arxiv.org/abs/2605.11611v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11611v1</guid><description>Jianghan Shen et al. — arxiv:2605.11611 — RAG</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Agents Should Replace Narrow Predictive AI as the Orchestrator in 6G AI-RAN</title><link>http://arxiv.org/abs/2605.11516v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11516v1</guid><description>Pranshav Gajjar et al. — arxiv:2605.11516 — RAG</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Multi-Stream LLMs: Unblocking Language Models with Parallel Streams of Thoughts, Inputs and Outputs</title><link>http://arxiv.org/abs/2605.12460v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12460v1</guid><description>Guinan Su et al. — arxiv:2605.12460 — Reasoning</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Geometric Factual Recall in Transformers</title><link>http://arxiv.org/abs/2605.12426v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12426v1</guid><description>Shauli Ravfogel et al. — arxiv:2605.12426 — Reasoning</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>OGLS-SD: On-Policy Self-Distillation with Outcome-Guided Logit Steering for LLM Reasoning</title><link>http://arxiv.org/abs/2605.12400v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12400v1</guid><description>Yuxiao Yang et al. — arxiv:2605.12400 — Reasoning</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Scalable Token-Level Hallucination Detection in Large Language Models</title><link>http://arxiv.org/abs/2605.12384v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12384v1</guid><description>Rui Min et al. — arxiv:2605.12384 — Reasoning</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Self-Consistent Latent Reasoning: Long Latent Sequence Reasoning for Vision-Language Model</title><link>http://arxiv.org/abs/2605.12163v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12163v1</guid><description>Chenfeng Wang et al. — arxiv:2605.12163 — Reasoning</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>To Whom Do Language Models Align? Measuring Principal Hierarchies Under High-Stakes Competing Demands</title><link>http://arxiv.org/abs/2605.12120v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12120v1</guid><description>Fangyi Yu et al. — arxiv:2605.12120 — Reasoning</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Intermediate Artifacts as First-Class Citizens: A Data Model for Durable Intermediate Artifacts in Agentic Systems</title><link>http://arxiv.org/abs/2605.12087v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12087v1</guid><description>Josh Rosen et al. — arxiv:2605.12087 — Reasoning</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>On the Limitations of Large Language Models for Conceptual Database Modeling</title><link>http://arxiv.org/abs/2605.11986v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11986v1</guid><description>Arthur F. Siqueira et al. — arxiv:2605.11986 — Reasoning</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>From Noise to Diversity: Random Embedding Injection in LLM Reasoning</title><link>http://arxiv.org/abs/2605.11936v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11936v1</guid><description>Heejun Kim et al. — arxiv:2605.11936 — Reasoning</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Procedural-skill SFT across capacity tiers: A W-Shaped pre-SFT Trajectory and Regime-Asymmetric Mechanism on 0.8B-4B Qwen3.5 Models</title><link>http://arxiv.org/abs/2605.11907v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11907v1</guid><description>Igor Strozzi et al. — arxiv:2605.11907 — Reasoning</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>ToolCUA: Towards Optimal GUI-Tool Path Orchestration for Computer Use Agents</title><link>http://arxiv.org/abs/2605.12481v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12481v1</guid><description>Xuhao Hu et al. — arxiv:2605.12481 — Tool Use</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Rollout Cards: A Reproducibility Standard for Agent Research</title><link>http://arxiv.org/abs/2605.12131v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12131v1</guid><description>Charlie Masters et al. — arxiv:2605.12131 — Tool Use</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Property-Level Reconstructability of Agent Decisions: An Anchor-Level Pilot Across Vendor SDK Adapter Regimes</title><link>http://arxiv.org/abs/2605.12078v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12078v1</guid><description>Oleg Solozobov et al. — arxiv:2605.12078 — Tool Use</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>The SiMPL Method for Multi-Material Topology Optimization</title><link>http://arxiv.org/abs/2605.11994v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11994v1</guid><description>Peter Gangl et al. — arxiv:2605.11994 — Tool Use</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>When Simulation Lies: A Sim-to-Real Benchmark and Domain-Randomized RL Recipe for Tool-Use Agents</title><link>http://arxiv.org/abs/2605.11928v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11928v1</guid><description>Xiaolin Zhou et al. — arxiv:2605.11928 — Tool Use</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>On-Policy Self-Evolution via Failure Trajectories for Agentic Safety Alignment</title><link>http://arxiv.org/abs/2605.11882v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11882v1</guid><description>Bo Yin et al. — arxiv:2605.11882 — Tool Use</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>GEAR: Granularity-Adaptive Advantage Reweighting for LLM Agents via Self-Distillation</title><link>http://arxiv.org/abs/2605.11853v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11853v1</guid><description>Sijia Li et al. — arxiv:2605.11853 — Tool Use</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Can LLM Agents Respond to Disasters? Benchmarking Heterogeneous Geospatial Reasoning in Emergency Operations</title><link>http://arxiv.org/abs/2605.11633v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11633v1</guid><description>Junjue Wang et al. — arxiv:2605.11633 — Tool Use</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>From Generic Correlation to Input-Specific Credit in On-Policy Self Distillation</title><link>http://arxiv.org/abs/2605.11613v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11613v1</guid><description>Guobin Shen et al. — arxiv:2605.11613 — Tool Use</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Decaf: Improving Neural Decompilation with Automatic Feedback and Search</title><link>http://arxiv.org/abs/2605.11501v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11501v1</guid><description>Alexander Shypula et al. — arxiv:2605.11501 — Tool Use</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Large Language Models for Agentic NetOps and AIOps: Architectures, Evaluation, and Safety</title><link>http://arxiv.org/abs/2605.12729v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12729v1</guid><description>Muhammad Bilal et al. — arxiv:2605.12729 — Tool Use</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Multi-Rollout On-Policy Distillation via Peer Successes and Failures</title><link>http://arxiv.org/abs/2605.12652v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12652v1</guid><description>Weichen Yu et al. — arxiv:2605.12652 — Tool Use</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>SenseNova-U1: Unifying Multimodal Understanding and Generation with NEO-unify Architecture</title><link>http://arxiv.org/abs/2605.12500v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12500v1</guid><description>Haiwen Diao et al. — arxiv:2605.12500 — Multimodal LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>AlphaGRPO: Unlocking Self-Reflective Multimodal Generation in UMMs via Decompositional Verifiable Reward</title><link>http://arxiv.org/abs/2605.12495v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12495v1</guid><description>Runhui Huang et al. — arxiv:2605.12495 — Multimodal LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Beyond Localization: A Comprehensive Diagnosis of Perspective-Conditioned Spatial Reasoning in MLLMs from Omnidirectional Images</title><link>http://arxiv.org/abs/2605.12413v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12413v1</guid><description>Yuangong Chen et al. — arxiv:2605.12413 — Multimodal LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Fill the GAP: A Granular Alignment Paradigm for Visual Reasoning in Multimodal Large Language Models</title><link>http://arxiv.org/abs/2605.12374v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12374v1</guid><description>Yanting Miao et al. — arxiv:2605.12374 — Multimodal LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>GuidedVLA: Specifying Task-Relevant Factors via Plug-and-Play Action Attention Specialization</title><link>http://arxiv.org/abs/2605.12369v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12369v1</guid><description>Xiaosong Jia et al. — arxiv:2605.12369 — Multimodal LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Reinforcing VLAs in Task-Agnostic World Models</title><link>http://arxiv.org/abs/2605.12334v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12334v1</guid><description>Yucen Wang et al. — arxiv:2605.12334 — Multimodal LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Towards Automated Air Traffic Safety Assessment Around Non-Towered Airports Using Large Language Models</title><link>http://arxiv.org/abs/2605.12332v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12332v1</guid><description>Torsten Darrell et al. — arxiv:2605.12332 — Multimodal LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>G$^2$TR: Generation-Guided Visual Token Reduction for Separate-Encoder Unified Multimodal Models</title><link>http://arxiv.org/abs/2605.12309v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12309v1</guid><description>Junxian Li et al. — arxiv:2605.12309 — Multimodal LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Images in Sentences: Scaling Interleaved Instructions for Unified Visual Generation</title><link>http://arxiv.org/abs/2605.12305v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12305v1</guid><description>Yabo Zhang et al. — arxiv:2605.12305 — Multimodal LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Large-Small Model Collaboration for Farmland Semantic Change Detection</title><link>http://arxiv.org/abs/2605.12282v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12282v1</guid><description>Xinjia Li et al. — arxiv:2605.12282 — Multimodal LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>CausalCine: Real-Time Autoregressive Generation for Multi-Shot Video Narratives</title><link>http://arxiv.org/abs/2605.12496v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12496v1</guid><description>Yihao Meng et al. — arxiv:2605.12496 — Long Context</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>LongMemEval-V2: Evaluating Long-Term Agent Memory Toward Experienced Colleagues</title><link>http://arxiv.org/abs/2605.12493v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12493v1</guid><description>Di Wu et al. — arxiv:2605.12493 — Long Context</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>KV-Fold: One-Step KV-Cache Recurrence for Long-Context Inference</title><link>http://arxiv.org/abs/2605.12471v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12471v1</guid><description>Alireza Nadali et al. — arxiv:2605.12471 — Long Context</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Classifier Context Rot: Monitor Performance Degrades with Context Length</title><link>http://arxiv.org/abs/2605.12366v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12366v1</guid><description>Sam Martin et al. — arxiv:2605.12366 — Long Context</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>$δ$-mem: Efficient Online Memory for Large Language Models</title><link>http://arxiv.org/abs/2605.12357v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12357v1</guid><description>Jingdi Lei et al. — arxiv:2605.12357 — Long Context</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>EHR-RAGp: Retrieval-Augmented Prototype-Guided Foundation Model for Electronic Health Records</title><link>http://arxiv.org/abs/2605.12335v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12335v1</guid><description>Saeed Shurrab et al. — arxiv:2605.12335 — Long Context</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>PRISM: Pareto-Efficient Retrieval over Intent-Aware Structured Memory for Long-Horizon Agents</title><link>http://arxiv.org/abs/2605.12260v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12260v1</guid><description>Jingyi Peng et al. — arxiv:2605.12260 — Long Context</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>No Action Without a NOD: A Heterogeneous Multi-Agent Architecture for Reliable Service Agents</title><link>http://arxiv.org/abs/2605.12240v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12240v1</guid><description>Zixu Yang et al. — arxiv:2605.12240 — Long Context</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Combining On-Policy Optimization and Distillation for Long-Context Reasoning in Large Language Models</title><link>http://arxiv.org/abs/2605.12227v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12227v1</guid><description>Miguel Moura Ramos et al. — arxiv:2605.12227 — Long Context</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Goal-Oriented Reasoning for RAG-based Memory in Conversational Agentic LLM Systems</title><link>http://arxiv.org/abs/2605.12213v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12213v1</guid><description>Jiazhou Liang et al. — arxiv:2605.12213 — Long Context</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Search Your Block Floating Point Scales!</title><link>http://arxiv.org/abs/2605.12464v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12464v1</guid><description>Tanmaey Gupta et al. — arxiv:2605.12464 — LLM Efficiency</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Equivariant Space Group and Hamiltonian for Collinear Magnetic Systems</title><link>http://arxiv.org/abs/2605.12440v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12440v1</guid><description>Chaoxi Cui et al. — arxiv:2605.12440 — LLM Efficiency</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>NCCLZ: Compression-Enabled GPU Collectives with Decoupled Quantization and Entropy Coding</title><link>http://arxiv.org/abs/2605.12396v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12396v1</guid><description>Jiamin Wang et al. — arxiv:2605.12396 — LLM Efficiency</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Events as Triggers for Behavioral Diversity in Multi-Agent Reinforcement Learning</title><link>http://arxiv.org/abs/2605.12388v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12388v1</guid><description>Hannes Büchi et al. — arxiv:2605.12388 — LLM Efficiency</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Output Composability of QLoRA PEFT Modules for Plug-and-Play Attribute-Controlled Text Generation</title><link>http://arxiv.org/abs/2605.12345v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12345v1</guid><description>Michela Lorandi et al. — arxiv:2605.12345 — LLM Efficiency</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Grid Games: The Power of Multiple Grids for Quantizing Large Language Models</title><link>http://arxiv.org/abs/2605.12327v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12327v1</guid><description>Vage Egiazarian et al. — arxiv:2605.12327 — LLM Efficiency</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>SOAR: Scale Optimization for Accurate Reconstruction in NVFP4 Quantization</title><link>http://arxiv.org/abs/2605.12245v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12245v1</guid><description>Chengzhu Bao et al. — arxiv:2605.12245 — LLM Efficiency</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Neural Network-Based Virtual Wheel-Speed Sensor for Enhanced Low-Velocity State Estimation</title><link>http://arxiv.org/abs/2605.12230v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12230v1</guid><description>Hendrik Schäfke et al. — arxiv:2605.12230 — LLM Efficiency</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Not How Many, But Which: Parameter Placement in Low-Rank Adaptation</title><link>http://arxiv.org/abs/2605.12207v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12207v1</guid><description>Arijit Sehanobish et al. — arxiv:2605.12207 — LLM Efficiency</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Cross-Modal-Domain Generalization Through Semantically Aligned Discrete Representations</title><link>http://arxiv.org/abs/2605.12145v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12145v1</guid><description>Souptik Sen et al. — arxiv:2605.12145 — LLM Efficiency</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Semantic Reward Collapse and the Preservation of Epistemic Integrity in Adaptive AI Systems</title><link>http://arxiv.org/abs/2605.12406v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12406v1</guid><description>William Parris et al. — arxiv:2605.12406 — Alignment</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>TokenRatio: Principled Token-Level Preference Optimization via Ratio Matching</title><link>http://arxiv.org/abs/2605.12288v2</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12288v2</guid><description>Truong Nguyen et al. — arxiv:2605.12288 — Alignment</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>SyncDPO: Enhancing Temporal Synchronization in Video-Audio Joint Generation via Preference Learning</title><link>http://arxiv.org/abs/2605.12179v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12179v1</guid><description>Xin Cheng et al. — arxiv:2605.12179 — Alignment</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>When Policy Entropy Constraint Fails: Preserving Diversity in Flow-based RLHF via Perceptual Entropy</title><link>http://arxiv.org/abs/2605.12112v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12112v1</guid><description>Xiaofeng Tan et al. — arxiv:2605.12112 — Alignment</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Learn to Think: Improving Multimodal Reasoning through Vision-Aware Self-Improvement Training</title><link>http://arxiv.org/abs/2605.11931v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11931v1</guid><description>Qihuang Zhong et al. — arxiv:2605.11931 — Alignment</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>YFPO: A Preliminary Study of Yoked Feature Preference Optimization with Neuron-Guided Rewards for Mathematical Reasoning</title><link>http://arxiv.org/abs/2605.11906v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11906v1</guid><description>Yifan Le et al. — arxiv:2605.11906 — Alignment</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Variance-aware Reward Modeling with Anchor Guidance</title><link>http://arxiv.org/abs/2605.11865v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11865v1</guid><description>Shuxing Fang et al. — arxiv:2605.11865 — Alignment</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Enhancing Multilingual Counterfactual Generation through Alignment-as-Preference Optimization</title><link>http://arxiv.org/abs/2605.11632v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11632v1</guid><description>Yilong Wang et al. — arxiv:2605.11632 — Alignment</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>DocAtlas: Multilingual Document Understanding Across 80+ Languages</title><link>http://arxiv.org/abs/2605.12623v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12623v1</guid><description>Ahmed Heakl et al. — arxiv:2605.12623 — Alignment</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Letting the neural code speak: Automated characterization of monkey visual neurons through human language</title><link>http://arxiv.org/abs/2605.12485v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12485v1</guid><description>Vedang Lad et al. — arxiv:2605.12485 — Hallucination</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Reward Hacking in Rubric-Based Reinforcement Learning</title><link>http://arxiv.org/abs/2605.12474v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12474v1</guid><description>Anas Mahmoud et al. — arxiv:2605.12474 — Hallucination</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>CAAFC: Chronological Actionable Automated Fact-Checker for misinformation / non-factual hallucination detection and correction</title><link>http://arxiv.org/abs/2605.12436v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12436v1</guid><description>Islam Eldifrawi et al. — arxiv:2605.12436 — Hallucination</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Geometric Factual Recall in Transformers</title><link>http://arxiv.org/abs/2605.12426v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12426v1</guid><description>Shauli Ravfogel et al. — arxiv:2605.12426 — Hallucination</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Semantic Reward Collapse and the Preservation of Epistemic Integrity in Adaptive AI Systems</title><link>http://arxiv.org/abs/2605.12406v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12406v1</guid><description>William Parris et al. — arxiv:2605.12406 — Hallucination</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Scalable Token-Level Hallucination Detection in Large Language Models</title><link>http://arxiv.org/abs/2605.12384v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12384v1</guid><description>Rui Min et al. — arxiv:2605.12384 — Hallucination</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Reinforcing VLAs in Task-Agnostic World Models</title><link>http://arxiv.org/abs/2605.12334v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12334v1</guid><description>Yucen Wang et al. — arxiv:2605.12334 — Hallucination</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>GKnow: Measuring the Entanglement of Gender Bias and Factual Gender</title><link>http://arxiv.org/abs/2605.12299v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12299v1</guid><description>Leonor Veloso et al. — arxiv:2605.12299 — Hallucination</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Instruction Lens Score: Your Instruction Contributes a Powerful Object Hallucination Detector for Multimodal Large Language Models</title><link>http://arxiv.org/abs/2605.12258v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12258v1</guid><description>Runhe Lai et al. — arxiv:2605.12258 — Hallucination</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Why Conclusions Diverge from the Same Observations: Formalizing World-Model Non-Identifiability via an Inference</title><link>http://arxiv.org/abs/2605.12255v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12255v1</guid><description>Toru Takahashi et al. — arxiv:2605.12255 — Hallucination</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Targeted Neuron Modulation via Contrastive Pair Search</title><link>http://arxiv.org/abs/2605.12290v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12290v1</guid><description>Sam Herring et al. — arxiv:2605.12290 — LLM Safety</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Metaphor Is Not All Attention Needs</title><link>http://arxiv.org/abs/2605.12128v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12128v1</guid><description>Olga Sorokoletova et al. — arxiv:2605.12128 — LLM Safety</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Proteus: A Self-Evolving Red Team for Agent Skill Ecosystems</title><link>http://arxiv.org/abs/2605.11891v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11891v1</guid><description>Zhaojiacheng Zhou et al. — arxiv:2605.11891 — LLM Safety</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>IPI-proxy: An Intercepting Proxy for Red-Teaming Web-Browsing AI Agents Against Indirect Prompt Injection</title><link>http://arxiv.org/abs/2605.11868v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11868v1</guid><description>Chia-Pei et al. — arxiv:2605.11868 — LLM Safety</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Persona-Conditioned Adversarial Prompting: Multi-Identity Red-Teaming for Adversarial Discovery and Mitigation</title><link>http://arxiv.org/abs/2605.11730v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11730v1</guid><description>Cristian Morasso et al. — arxiv:2605.11730 — LLM Safety</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>SafeSteer: A Decoding-level Defense Mechanism for Multimodal Large Language Models</title><link>http://arxiv.org/abs/2605.11716v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11716v1</guid><description>Xinyi Zeng et al. — arxiv:2605.11716 — LLM Safety</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Safety Context Injection: Inference-Time Safety Alignment via Static Filtering and Agentic Analysis</title><link>http://arxiv.org/abs/2605.11664v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11664v1</guid><description>Zhenhao Xu et al. — arxiv:2605.11664 — LLM Safety</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>A Mimetic Detector for Adversarial Image Perturbations</title><link>http://arxiv.org/abs/2605.11492v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11492v1</guid><description>Johnny Corbino et al. — arxiv:2605.11492 — LLM Safety</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>REALISTA: Realistic Latent Adversarial Attacks that Elicit LLM Hallucinations</title><link>http://arxiv.org/abs/2605.12813v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12813v1</guid><description>Buyun Liang et al. — arxiv:2605.12813 — LLM Safety</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Still Camouflage, Moving Illusion: View-Induced Trajectory Manipulation in Autonomous Driving</title><link>http://arxiv.org/abs/2605.12743v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12743v1</guid><description>Shuo Ju et al. — arxiv:2605.12743 — LLM Safety</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Before the Last Token: Diagnosing Final-Token Safety Probe Failures</title><link>http://arxiv.org/abs/2605.12726v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12726v1</guid><description>Shravan Doda et al. — arxiv:2605.12726 — LLM Safety</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Predicting Disagreement with Human Raters in LLM-as-a-Judge Difficulty Assessment without Using Generation-Time Probability Signals</title><link>http://arxiv.org/abs/2605.12422v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12422v1</guid><description>Yo Ehara et al. — arxiv:2605.12422 — LLM Evaluation</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>MedHopQA: A Disease-Centered Multi-Hop Reasoning Benchmark and Evaluation Framework for LLM-Based Biomedical Question Answering</title><link>http://arxiv.org/abs/2605.12361v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12361v1</guid><description>Rezarta Islamaj et al. — arxiv:2605.12361 — LLM Evaluation</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>PRISM: Pareto-Efficient Retrieval over Intent-Aware Structured Memory for Long-Horizon Agents</title><link>http://arxiv.org/abs/2605.12260v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12260v1</guid><description>Jingyi Peng et al. — arxiv:2605.12260 — LLM Evaluation</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Procedural-skill SFT across capacity tiers: A W-Shaped pre-SFT Trajectory and Regime-Asymmetric Mechanism on 0.8B-4B Qwen3.5 Models</title><link>http://arxiv.org/abs/2605.11907v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11907v1</guid><description>Igor Strozzi et al. — arxiv:2605.11907 — LLM Evaluation</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Allegory of the Cave: Measurement-Grounded Vision-Language Learning</title><link>http://arxiv.org/abs/2605.11727v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11727v1</guid><description>Kepeng Xu et al. — arxiv:2605.11727 — LLM Evaluation</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Human-Grounded Multimodal Benchmark with 900K-Scale Aggregated Student Response Distributions from Japan&apos;s National Assessment of Academic Ability</title><link>http://arxiv.org/abs/2605.11663v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11663v1</guid><description>Kyosuke Takami et al. — arxiv:2605.11663 — LLM Evaluation</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Read, Grep, and Synthesize: Diagnosing Cross-Domain Seed Exposure for LLM Research Ideation</title><link>http://arxiv.org/abs/2605.11532v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11532v1</guid><description>Yunju Choi et al. — arxiv:2605.11532 — LLM Evaluation</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Neurodata Without Boredom: Benchmarking Agentic AI for Data Reuse</title><link>http://arxiv.org/abs/2605.12808v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12808v1</guid><description>Ling-Qi Zhang et al. — arxiv:2605.12808 — LLM Evaluation</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Scalable Packed Layouts for Vector-Length-Agnostic ML Code Generation</title><link>http://arxiv.org/abs/2605.12445v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12445v1</guid><description>Ege Beysel et al. — arxiv:2605.12445 — Code LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Uncertainty Quantification for LLM-based Code Generation</title><link>http://arxiv.org/abs/2605.12201v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12201v1</guid><description>Senrong Xu et al. — arxiv:2605.12201 — Code LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Rollout Cards: A Reproducibility Standard for Agent Research</title><link>http://arxiv.org/abs/2605.12131v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12131v1</guid><description>Charlie Masters et al. — arxiv:2605.12131 — Code LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>StepCodeReasoner: Aligning Code Reasoning with Stepwise Execution Traces via Reinforcement Learning</title><link>http://arxiv.org/abs/2605.11922v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11922v1</guid><description>Hao Wang et al. — arxiv:2605.11922 — Code LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>AgentDisCo: Towards Disentanglement and Collaboration in Open-ended Deep Research Agents</title><link>http://arxiv.org/abs/2605.11732v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11732v1</guid><description>Jiarui Jin et al. — arxiv:2605.11732 — Code LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>CoT-Guard: Small Models for Strong Monitoring</title><link>http://arxiv.org/abs/2605.12746v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12746v1</guid><description>Nirav Diwan et al. — arxiv:2605.12746 — Code LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>3D Primitives are a Spatial Language for VLMs</title><link>http://arxiv.org/abs/2605.12586v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12586v1</guid><description>Junze Liu et al. — arxiv:2605.12586 — Code LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Towards Fine-Grained Multi-Dimensional Speech Understanding: Data Pipeline, Benchmark, and Model</title><link>http://arxiv.org/abs/2605.12036v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12036v1</guid><description>Guojian Li et al. — arxiv:2605.12036 — Speech LLM</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Speech LLM</category></item><item><title>Poly-SVC: Polyphony-Aware Singing Voice Conversion with Harmonic Modeling</title><link>http://arxiv.org/abs/2605.12310v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12310v1</guid><description>Chen Geng et al. — arxiv:2605.12310 — Multilingual NLP</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Mechanistic Interpretability of ASR models using Sparse Autoencoders</title><link>http://arxiv.org/abs/2605.12225v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12225v1</guid><description>Dan Pluth et al. — arxiv:2605.12225 — Multilingual NLP</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Sign Language Recognition and Translation for Low-Resource Languages: Challenges and Pathways Forward</title><link>http://arxiv.org/abs/2605.12096v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12096v1</guid><description>Nigar Alishzade et al. — arxiv:2605.12096 — Multilingual NLP</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Enhancing Multilingual Counterfactual Generation through Alignment-as-Preference Optimization</title><link>http://arxiv.org/abs/2605.11632v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11632v1</guid><description>Yilong Wang et al. — arxiv:2605.11632 — Multilingual NLP</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Scaling Laws for Mixture Pretraining Under Data Constraints</title><link>http://arxiv.org/abs/2605.12715v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12715v1</guid><description>Anastasiia Sedova et al. — arxiv:2605.12715 — Multilingual NLP</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>DocAtlas: Multilingual Document Understanding Across 80+ Languages</title><link>http://arxiv.org/abs/2605.12623v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12623v1</guid><description>Ahmed Heakl et al. — arxiv:2605.12623 — Multilingual NLP</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Concordance Comparison as a Means of Assembling Local Grammars</title><link>http://arxiv.org/abs/2605.11862v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11862v1</guid><description>Juliana Pirovani et al. — arxiv:2605.11862 — Named Entity Recognition</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Named Entity Recognition</category></item><item><title>Concordance Comparison as a Means of Assembling Local Grammars</title><link>http://arxiv.org/abs/2605.11862v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11862v1</guid><description>Juliana Pirovani et al. — arxiv:2605.11862 — Information Extraction</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>Purification of a monitored qubit: exact path-integral solution</title><link>http://arxiv.org/abs/2605.12783v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12783v1</guid><description>Matheus M. R. Poltronieri Martins et al. — arxiv:2605.12783 — Information Extraction</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>How Useful Is Cross-Domain Generalization for Training LLM Monitors?</title><link>http://arxiv.org/abs/2605.12265v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12265v1</guid><description>Sam Martin et al. — arxiv:2605.12265 — Text Classification</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>A microservices-based endpoint monitoring platform with predictive NLP models for real-time security and hate-speech risk alerting</title><link>http://arxiv.org/abs/2605.11997v2</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11997v2</guid><description>Darlan Noetzold et al. — arxiv:2605.11997 — Text Classification</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>From Web to Pixels: Bringing Agentic Search into Visual Perception</title><link>http://arxiv.org/abs/2605.12497v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12497v1</guid><description>Bokang Yang et al. — arxiv:2605.12497 — Question Answering</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>LongMemEval-V2: Evaluating Long-Term Agent Memory Toward Experienced Colleagues</title><link>http://arxiv.org/abs/2605.12493v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12493v1</guid><description>Di Wu et al. — arxiv:2605.12493 — Question Answering</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>ORCE: Order-Aware Alignment of Verbalized Confidence in Large Language Models</title><link>http://arxiv.org/abs/2605.12446v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12446v1</guid><description>Chen Li et al. — arxiv:2605.12446 — Question Answering</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Extending QuAK with Nested Quantitative Automata</title><link>http://arxiv.org/abs/2605.12418v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12418v1</guid><description>Thomas A. Henzinger et al. — arxiv:2605.12418 — Question Answering</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Beyond Localization: A Comprehensive Diagnosis of Perspective-Conditioned Spatial Reasoning in MLLMs from Omnidirectional Images</title><link>http://arxiv.org/abs/2605.12413v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12413v1</guid><description>Yuangong Chen et al. — arxiv:2605.12413 — Question Answering</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Question Difficulty Estimation for Large Language Models via Answer Plausibility Scoring</title><link>http://arxiv.org/abs/2605.12398v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12398v1</guid><description>Jamshid Mozafari et al. — arxiv:2605.12398 — Question Answering</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Context Convergence Improves Answering Inferential Questions</title><link>http://arxiv.org/abs/2605.12370v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12370v1</guid><description>Jamshid Mozafari et al. — arxiv:2605.12370 — Question Answering</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>MedHopQA: A Disease-Centered Multi-Hop Reasoning Benchmark and Evaluation Framework for LLM-Based Biomedical Question Answering</title><link>http://arxiv.org/abs/2605.12361v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12361v1</guid><description>Rezarta Islamaj et al. — arxiv:2605.12361 — Question Answering</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Overview of the MedHopQA track at BioCreative IX: track description, participation and evaluation of systems for multi-hop medical question answering</title><link>http://arxiv.org/abs/2605.12313v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12313v1</guid><description>Rezarta Islamaj et al. — arxiv:2605.12313 — Question Answering</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Mitigating Context-Memory Conflicts in LLMs through Dynamic Cognitive Reconciliation Decoding</title><link>http://arxiv.org/abs/2605.12185v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12185v1</guid><description>Yigeng Zhou et al. — arxiv:2605.12185 — Question Answering</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Persistent and Conversational Multi-Method Explainability for Trustworthy Financial AI</title><link>http://arxiv.org/abs/2605.11687v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11687v1</guid><description>Georgios Makridis et al. — arxiv:2605.11687 — Sentiment Analysis</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>Executable Agentic Memory for GUI Agent</title><link>http://arxiv.org/abs/2605.12294v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12294v1</guid><description>Zerui Qin et al. — arxiv:2605.12294 — Knowledge Graph</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Graph-Grounded Optimization: Rao-Family Metaheuristics, Classical OR, and SLM-Driven Formulation over Knowledge Graphs</title><link>http://arxiv.org/abs/2605.12204v2</link><guid isPermaLink="true">http://arxiv.org/abs/2605.12204v2</guid><description>Madhulatha Mandarapu et al. — arxiv:2605.12204 — Knowledge Graph</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>BadSKP: Backdoor Attacks on Knowledge Graph-Enhanced LLMs with Soft Prompts</title><link>http://arxiv.org/abs/2605.11996v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11996v1</guid><description>Xiaoting Lyu et al. — arxiv:2605.11996 — Knowledge Graph</description><pubDate>Tue, 12 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Qwen Goes Brrr: Off-the-Shelf RAG for Ukrainian Multi-Domain Document Understanding</title><link>http://arxiv.org/abs/2605.10296v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10296v1</guid><description>Anton Bazdyrev et al. — arxiv:2605.10296 — NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Building Korean linguistic resource for NLU data generation of banking app CS dialog system</title><link>http://arxiv.org/abs/2605.10241v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10241v1</guid><description>Jeongwoo Yoon et al. — arxiv:2605.10241 — NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>GLiNER-Relex: A Unified Framework for Joint Named Entity Recognition and Relation Extraction</title><link>http://arxiv.org/abs/2605.10108v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10108v1</guid><description>Ihor Stepanov et al. — arxiv:2605.10108 — NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>H-MAPS: Hierarchical Memory-Augmented Proactive Search Assistant for Scientific Literature</title><link>http://arxiv.org/abs/2605.10097v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10097v1</guid><description>Koji Nishikawa et al. — arxiv:2605.10097 — NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Beyond Majority Voting: Agreement-Based Clustering to Model Annotator Perspectives in Subjective NLP Tasks</title><link>http://arxiv.org/abs/2605.09955v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09955v1</guid><description>Tadesse Destaw Belay et al. — arxiv:2605.09955 — NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Yield Curve Forecasting using Machine Learning and Econometrics: A Comparative Analysis</title><link>http://arxiv.org/abs/2605.09842v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09842v1</guid><description>Aman Singh et al. — arxiv:2605.09842 — NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Beyond the Last Layer: Multi-Layer Representation Fusion for Visual Tokenization</title><link>http://arxiv.org/abs/2605.10780v2</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10780v2</guid><description>Xuanyu Zhu et al. — arxiv:2605.10780 — NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Why Low-Resource NLP Needs More Than Cross-Lingual Transfer: Lessons Learned from Luxembourgish</title><link>http://arxiv.org/abs/2605.10714v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10714v1</guid><description>Fred Philippy et al. — arxiv:2605.10714 — NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Measuring Embedding Sensitivity to Authorial Style in French: Comparing Literary Texts with Language Model Rewritings</title><link>http://arxiv.org/abs/2605.10606v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10606v1</guid><description>Benjamin Icard et al. — arxiv:2605.10606 — NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>ThreatCore: A Benchmark for Explicit and Implicit Threat Detection</title><link>http://arxiv.org/abs/2605.10563v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10563v1</guid><description>Davide Bruni et al. — arxiv:2605.10563 — NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>ICT-NLP at SemEval-2026 Task 3: Less Is More -- Multilingual Encoder with Joint Training and Adaptive Ensemble for Dimensional Aspect Sentiment Regression</title><link>http://arxiv.org/abs/2605.10560v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10560v1</guid><description>Liyuan Huang et al. — arxiv:2605.10560 — NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>HEBATRON: A Hebrew-Specialized Open-Weight Mixture-of-Experts Language Model</title><link>http://arxiv.org/abs/2605.11255v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11255v1</guid><description>Noam Kayzer et al. — arxiv:2605.11255 — NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>The Scaling Law of Evaluation Failure: Why Simple Averaging Collapses Under Data Sparsity and Item Difficulty Gaps, and How Item Response Theory Recovers Ground Truth Across Domains</title><link>http://arxiv.org/abs/2605.11205v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11205v1</guid><description>Jung Min Kang et al. — arxiv:2605.11205 — NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>ClinicalBench: Stress-Testing Assertion-Aware Retrieval for Cross-Admission Clinical QA on MIMIC-IV</title><link>http://arxiv.org/abs/2605.11143v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11143v1</guid><description>Alex Stinard et al. — arxiv:2605.11143 — NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Extending Confidence-Based Text2Cypher with Grammar and Schema Aware Filtering</title><link>http://arxiv.org/abs/2605.10318v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10318v1</guid><description>Makbule Gulcin Ozsoy et al. — arxiv:2605.10318 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Positive Alignment: Artificial Intelligence for Human Flourishing</title><link>http://arxiv.org/abs/2605.10310v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10310v1</guid><description>Ruben Laukkonen et al. — arxiv:2605.10310 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>AgentRx: A Benchmark Study of LLM Agents for Multimodal Clinical Prediction Tasks</title><link>http://arxiv.org/abs/2605.10286v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10286v1</guid><description>Baraa Al Jorf et al. — arxiv:2605.10286 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>DP-LAC: Lightweight Adaptive Clipping for Differentially Private Federated Fine-tuning of Language Models</title><link>http://arxiv.org/abs/2605.10272v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10272v1</guid><description>Haaris Mehmood et al. — arxiv:2605.10272 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>IndustryBench: Probing the Industrial Knowledge Boundaries of LLMs</title><link>http://arxiv.org/abs/2605.10267v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10267v1</guid><description>Songlin Bai et al. — arxiv:2605.10267 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Knowledge Poisoning Attacks on Medical Multi-Modal Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.10253v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10253v1</guid><description>Peiru Yang et al. — arxiv:2605.10253 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Teaching LLMs to See Graphs: Unifying Text and Structural Reasoning</title><link>http://arxiv.org/abs/2605.10247v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10247v1</guid><description>Dario Vajda et al. — arxiv:2605.10247 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>SciIntegrity-Bench: A Benchmark for Evaluating Academic Integrity in AI Scientist Systems</title><link>http://arxiv.org/abs/2605.10246v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10246v1</guid><description>Zonglin Yang et al. — arxiv:2605.10246 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Route Before Retrieve: Activating Latent Routing Abilities of LLMs for RAG vs. Long-Context Selection</title><link>http://arxiv.org/abs/2605.10235v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10235v1</guid><description>Yiwen Chen et al. — arxiv:2605.10235 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Social Policy of Large Language Models: How GPT, Claude, DeepSeek and Grok Allocate Social Budgets in Spain and Germany</title><link>http://arxiv.org/abs/2605.10234v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10234v1</guid><description>Claudia Benavides Cantos et al. — arxiv:2605.10234 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Evaluating the False Trust engendered by LLM Explanations</title><link>http://arxiv.org/abs/2605.10930v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10930v1</guid><description>Vardhan Palod et al. — arxiv:2605.10930 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Dynamic Skill Lifecycle Management for Agentic Reinforcement Learning</title><link>http://arxiv.org/abs/2605.10923v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10923v1</guid><description>Junhao Shen et al. — arxiv:2605.10923 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>WildClawBench: A Benchmark for Real-World, Long-Horizon Agent Evaluation</title><link>http://arxiv.org/abs/2605.10912v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10912v1</guid><description>Shuangrui Ding et al. — arxiv:2605.10912 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Beyond Red-Teaming: Formal Guarantees of LLM Guardrail Classifiers</title><link>http://arxiv.org/abs/2605.10901v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10901v1</guid><description>Nikita Kezins et al. — arxiv:2605.10901 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>V4FinBench: Benchmarking Tabular Foundation Models, LLMs, and Standard Methods on Corporate Bankruptcy Prediction</title><link>http://arxiv.org/abs/2605.10896v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10896v1</guid><description>Marcin Kostrzewa et al. — arxiv:2605.10896 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>CppPerf: An Automated Pipeline and Dataset for Performance-Improving C++ Commits</title><link>http://arxiv.org/abs/2605.10890v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10890v1</guid><description>Tommy Ho et al. — arxiv:2605.10890 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Count Anything at Any Granularity</title><link>http://arxiv.org/abs/2605.10887v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10887v1</guid><description>Chang Liu et al. — arxiv:2605.10887 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>LoKA: Low-precision Kernel Applications for Recommendation Models At Scale</title><link>http://arxiv.org/abs/2605.10886v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10886v1</guid><description>Liang Luo et al. — arxiv:2605.10886 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>AssayBench: An Assay-Level Virtual Cell Benchmark for LLMs and Agents</title><link>http://arxiv.org/abs/2605.10876v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10876v1</guid><description>Edward De Brouwer et al. — arxiv:2605.10876 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Compute Where it Counts: Self Optimizing Language Models</title><link>http://arxiv.org/abs/2605.10875v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10875v1</guid><description>Yash Akhauri et al. — arxiv:2605.10875 — LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Positive Alignment: Artificial Intelligence for Human Flourishing</title><link>http://arxiv.org/abs/2605.10310v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10310v1</guid><description>Ruben Laukkonen et al. — arxiv:2605.10310 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>AgentRx: A Benchmark Study of LLM Agents for Multimodal Clinical Prediction Tasks</title><link>http://arxiv.org/abs/2605.10286v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10286v1</guid><description>Baraa Al Jorf et al. — arxiv:2605.10286 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>MemReread: Enhancing Agentic Long-Context Reasoning via Memory-Guided Rereading</title><link>http://arxiv.org/abs/2605.10268v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10268v1</guid><description>Baibei Ji et al. — arxiv:2605.10268 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Towards Autonomous Railway Operations: A Semi-Hierarchical Deep Reinforcement Learning Approach to the Vehicle Rescheduling Problem</title><link>http://arxiv.org/abs/2605.10257v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10257v1</guid><description>Alberto Castagna et al. — arxiv:2605.10257 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Beyond Autonomy: A Dynamic Tiered AgentRunner Framework for Governable and Resilient Enterprise AI Execution</title><link>http://arxiv.org/abs/2605.10223v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10223v1</guid><description>Kai Pan et al. — arxiv:2605.10223 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>V-ABS: Action-Observer Driven Beam Search for Dynamic Visual Reasoning</title><link>http://arxiv.org/abs/2605.10172v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10172v1</guid><description>Zhiwei Ning et al. — arxiv:2605.10172 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>When Reviews Disagree: Fine-Grained Contradiction Analysis in Scientific Peer Reviews</title><link>http://arxiv.org/abs/2605.10171v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10171v1</guid><description>Sandeep Kumar et al. — arxiv:2605.10171 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Balancing Efficiency and Fairness in Traffic Light Control through Deep Reinforcement Learning</title><link>http://arxiv.org/abs/2605.10170v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10170v1</guid><description>Matteo Cederle et al. — arxiv:2605.10170 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>NyayaAI: An AI-Powered Legal Assistant Using Multi-Agent Architecture and Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.10155v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10155v1</guid><description>Deepanshu et al. — arxiv:2605.10155 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Is DRL-based MAC Ready for Underwater Acoustic Networks? Exploring Its Practicality in Real Field Experiments</title><link>http://arxiv.org/abs/2605.10144v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10144v1</guid><description>Jiani Guo et al. — arxiv:2605.10144 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Personal Visual Context Learning in Large Multimodal Models</title><link>http://arxiv.org/abs/2605.10936v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10936v1</guid><description>Zihui Xue et al. — arxiv:2605.10936 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Dynamic Skill Lifecycle Management for Agentic Reinforcement Learning</title><link>http://arxiv.org/abs/2605.10923v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10923v1</guid><description>Junhao Shen et al. — arxiv:2605.10923 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Optimal and Scalable MAPF via Multi-Marginal Optimal Transport and Schrödinger Bridges</title><link>http://arxiv.org/abs/2605.10917v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10917v1</guid><description>Usman A. Khan et al. — arxiv:2605.10917 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Shepherd: A Runtime Substrate Empowering Meta-Agents with a Formalized Execution Trace</title><link>http://arxiv.org/abs/2605.10913v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10913v1</guid><description>Simon Yu et al. — arxiv:2605.10913 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>WildClawBench: A Benchmark for Real-World, Long-Horizon Agent Evaluation</title><link>http://arxiv.org/abs/2605.10912v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10912v1</guid><description>Shuangrui Ding et al. — arxiv:2605.10912 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Equivariant Reinforcement Learning for Clifford Quantum Circuit Synthesis</title><link>http://arxiv.org/abs/2605.10910v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10910v1</guid><description>Richie Yeung et al. — arxiv:2605.10910 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Revisiting Policy Gradients for Restricted Policy Classes: Escaping Myopic Local Optima with $k$-step Policy Gradients</title><link>http://arxiv.org/abs/2605.10909v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10909v1</guid><description>Alex DeWeese et al. — arxiv:2605.10909 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Engineering Robustness into Personal Agents with the AI Workflow Store</title><link>http://arxiv.org/abs/2605.10907v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10907v1</guid><description>Roxana Geambasu et al. — arxiv:2605.10907 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>DataMaster: Towards Autonomous Data Engineering for Machine Learning</title><link>http://arxiv.org/abs/2605.10906v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10906v1</guid><description>Yaxin Du et al. — arxiv:2605.10906 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>MDrive: Benchmarking Closed-Loop Cooperative Driving for End-to-End Multi-agent Systems</title><link>http://arxiv.org/abs/2605.10904v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10904v1</guid><description>Marco Coscoy et al. — arxiv:2605.10904 — LLM Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>AgentRx: A Benchmark Study of LLM Agents for Multimodal Clinical Prediction Tasks</title><link>http://arxiv.org/abs/2605.10286v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10286v1</guid><description>Baraa Al Jorf et al. — arxiv:2605.10286 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Towards Autonomous Railway Operations: A Semi-Hierarchical Deep Reinforcement Learning Approach to the Vehicle Rescheduling Problem</title><link>http://arxiv.org/abs/2605.10257v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10257v1</guid><description>Alberto Castagna et al. — arxiv:2605.10257 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Beyond Autonomy: A Dynamic Tiered AgentRunner Framework for Governable and Resilient Enterprise AI Execution</title><link>http://arxiv.org/abs/2605.10223v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10223v1</guid><description>Kai Pan et al. — arxiv:2605.10223 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>V-ABS: Action-Observer Driven Beam Search for Dynamic Visual Reasoning</title><link>http://arxiv.org/abs/2605.10172v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10172v1</guid><description>Zhiwei Ning et al. — arxiv:2605.10172 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>When Reviews Disagree: Fine-Grained Contradiction Analysis in Scientific Peer Reviews</title><link>http://arxiv.org/abs/2605.10171v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10171v1</guid><description>Sandeep Kumar et al. — arxiv:2605.10171 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>NyayaAI: An AI-Powered Legal Assistant Using Multi-Agent Architecture and Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.10155v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10155v1</guid><description>Deepanshu et al. — arxiv:2605.10155 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>SkillRAE: Agent Skill-Based Context Compilation for Retrieval-Augmented Execution</title><link>http://arxiv.org/abs/2605.10114v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10114v1</guid><description>Xiangcheng Meng et al. — arxiv:2605.10114 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>ViSRA: A Video-based Spatial Reasoning Agent for Multi-modal Large Language Models</title><link>http://arxiv.org/abs/2605.10106v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10106v1</guid><description>Tingshu Mou et al. — arxiv:2605.10106 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>RFAmpDesigner: A Self-Evolving Multi-Agent LLM Framework for Automated Radio Frequency Amplifier Design</title><link>http://arxiv.org/abs/2605.10093v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10093v1</guid><description>Hang Lu et al. — arxiv:2605.10093 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Agentic Fuzzing: Opportunities and Challenges</title><link>http://arxiv.org/abs/2605.10074v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10074v1</guid><description>Junyoung Park et al. — arxiv:2605.10074 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Optimal and Scalable MAPF via Multi-Marginal Optimal Transport and Schrödinger Bridges</title><link>http://arxiv.org/abs/2605.10917v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10917v1</guid><description>Usman A. Khan et al. — arxiv:2605.10917 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Revisiting Policy Gradients for Restricted Policy Classes: Escaping Myopic Local Optima with $k$-step Policy Gradients</title><link>http://arxiv.org/abs/2605.10909v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10909v1</guid><description>Alex DeWeese et al. — arxiv:2605.10909 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>MDrive: Benchmarking Closed-Loop Cooperative Driving for End-to-End Multi-agent Systems</title><link>http://arxiv.org/abs/2605.10904v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10904v1</guid><description>Marco Coscoy et al. — arxiv:2605.10904 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>NanoResearch: Co-Evolving Skills, Memory, and Policy for Personalized Research Automation</title><link>http://arxiv.org/abs/2605.10813v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10813v1</guid><description>Jinhang Xu et al. — arxiv:2605.10813 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>LLMs for Secure Hardware Design and Related Problems: Opportunities and Challenges</title><link>http://arxiv.org/abs/2605.10807v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10807v1</guid><description>Johann Knechtel et al. — arxiv:2605.10807 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>LITMUS: Benchmarking Behavioral Jailbreaks of LLM Agents in Real OS Environments</title><link>http://arxiv.org/abs/2605.10779v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10779v1</guid><description>Chiyu Zhang et al. — arxiv:2605.10779 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>MAGS-SLAM: Monocular Multi-Agent Gaussian Splatting SLAM for Geometrically and Photometrically Consistent Reconstruction</title><link>http://arxiv.org/abs/2605.10760v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10760v1</guid><description>Zhihao Cao et al. — arxiv:2605.10760 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Decentralized Contingency MPC based on Safe Sets for Nonlinear Multi-agent Collision Avoidance</title><link>http://arxiv.org/abs/2605.10738v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10738v1</guid><description>Max Studt et al. — arxiv:2605.10738 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Heteroscedastic Diffusion for Multi-Agent Trajectory Modeling</title><link>http://arxiv.org/abs/2605.10717v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10717v1</guid><description>Guillem Capellera et al. — arxiv:2605.10717 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>The Bystander Effect in Multi-Agent Reasoning: Quantifying Cognitive Loafing in Collaborative Interactions</title><link>http://arxiv.org/abs/2605.10698v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10698v1</guid><description>Dahlia Shehata et al. — arxiv:2605.10698 — Multi-Agent</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Qwen Goes Brrr: Off-the-Shelf RAG for Ukrainian Multi-Domain Document Understanding</title><link>http://arxiv.org/abs/2605.10296v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10296v1</guid><description>Anton Bazdyrev et al. — arxiv:2605.10296 — RAG</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Knowledge Poisoning Attacks on Medical Multi-Modal Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.10253v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10253v1</guid><description>Peiru Yang et al. — arxiv:2605.10253 — RAG</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Route Before Retrieve: Activating Latent Routing Abilities of LLMs for RAG vs. Long-Context Selection</title><link>http://arxiv.org/abs/2605.10235v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10235v1</guid><description>Yiwen Chen et al. — arxiv:2605.10235 — RAG</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>ASTRA-QA: A Benchmark for Abstract Question Answering over Documents</title><link>http://arxiv.org/abs/2605.10168v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10168v1</guid><description>Shu Wang et al. — arxiv:2605.10168 — RAG</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>NyayaAI: An AI-Powered Legal Assistant Using Multi-Agent Architecture and Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.10155v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10155v1</guid><description>Deepanshu et al. — arxiv:2605.10155 — RAG</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>RFAmpDesigner: A Self-Evolving Multi-Agent LLM Framework for Automated Radio Frequency Amplifier Design</title><link>http://arxiv.org/abs/2605.10093v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10093v1</guid><description>Hang Lu et al. — arxiv:2605.10093 — RAG</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Merlin: Deterministic Byte-Exact Deduplication for Lossless Context Optimization in Large Language Model Inference</title><link>http://arxiv.org/abs/2605.09990v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09990v1</guid><description>Sietse Schelpe et al. — arxiv:2605.09990 — RAG</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Federated Language Models Under Bandwidth Budgets: Distillation Rates and Conformal Coverage</title><link>http://arxiv.org/abs/2605.09986v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09986v1</guid><description>Prasanjit Dubey et al. — arxiv:2605.09986 — RAG</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Grounded Satirical Generation with RAG</title><link>http://arxiv.org/abs/2605.10853v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10853v1</guid><description>Oona Itkonen et al. — arxiv:2605.10853 — RAG</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>The First Drop of Ink: Nonlinear Impact of Misleading Information in Long-Context Reasoning</title><link>http://arxiv.org/abs/2605.10828v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10828v1</guid><description>Muhan Gao et al. — arxiv:2605.10828 — RAG</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>PathISE: Learning Informative Path Supervision for Knowledge Graph Question Answering</title><link>http://arxiv.org/abs/2605.10791v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10791v1</guid><description>Shengxiang Gao et al. — arxiv:2605.10791 — RAG</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>ComplexMCP: Evaluation of LLM Agents in Dynamic, Interdependent, and Large-Scale Tool Sandbox</title><link>http://arxiv.org/abs/2605.10787v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10787v1</guid><description>Yuanyang Li et al. — arxiv:2605.10787 — RAG</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>PrimeKG-CL: A Continual Graph Learning Benchmark on Evolving Biomedical Knowledge Graphs</title><link>http://arxiv.org/abs/2605.10529v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10529v1</guid><description>Yousef A. Radwan et al. — arxiv:2605.10529 — RAG</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>To Redact, or not to Redact? A Local LLM Approach to Deliberative Process Privilege Classification</title><link>http://arxiv.org/abs/2605.10211v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10211v1</guid><description>Maik Larooij et al. — arxiv:2605.10211 — Reasoning</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>LASAR: Latent Adaptive Semantic Aligned Reasoning for Generative Recommendation</title><link>http://arxiv.org/abs/2605.10207v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10207v1</guid><description>Yiwen Chen et al. — arxiv:2605.10207 — Reasoning</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Breaking the Reward Barrier: Accelerating Tree-of-Thought Reasoning via Speculative Exploration</title><link>http://arxiv.org/abs/2605.10195v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10195v1</guid><description>Shuzhang Zhong et al. — arxiv:2605.10195 — Reasoning</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>TeleResilienceBench: Quantifying Resilience for LLM Reasoning in Telecommunications</title><link>http://arxiv.org/abs/2605.09929v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09929v1</guid><description>Pranshav Gajjar et al. — arxiv:2605.09929 — Reasoning</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Separate First, Fuse Later: Mitigating Cross-Modal Interference in Audio-Visual LLMs Reasoning with Modality-Specific Chain-of-Thought</title><link>http://arxiv.org/abs/2605.09906v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09906v1</guid><description>Xuanchen Li et al. — arxiv:2605.09906 — Reasoning</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Continuous Latent Contexts Enable Efficient Online Learning in Transformers</title><link>http://arxiv.org/abs/2605.09867v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09867v1</guid><description>Emile Anand et al. — arxiv:2605.09867 — Reasoning</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Exploration-Driven Optimization for Test-Time Large Language Model Reasoning</title><link>http://arxiv.org/abs/2605.09853v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09853v1</guid><description>Changhao Li et al. — arxiv:2605.09853 — Reasoning</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Evaluating the False Trust engendered by LLM Explanations</title><link>http://arxiv.org/abs/2605.10930v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10930v1</guid><description>Vardhan Palod et al. — arxiv:2605.10930 — Reasoning</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Unmasking On-Policy Distillation: Where It Helps, Where It Hurts, and Why</title><link>http://arxiv.org/abs/2605.10889v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10889v1</guid><description>Mohammadreza Armandpour et al. — arxiv:2605.10889 — Reasoning</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>The Last Word Often Wins: A Format Confound in Chain-of-Thought Corruption Studies</title><link>http://arxiv.org/abs/2605.10799v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10799v1</guid><description>Gabriel Garcia et al. — arxiv:2605.10799 — Reasoning</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Can You Keep a Secret? Involuntary Information Leakage in Language Model Writing</title><link>http://arxiv.org/abs/2605.10794v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10794v1</guid><description>Ari Holtzman et al. — arxiv:2605.10794 — Reasoning</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>RadThinking: A Dataset for Longitudinal Clinical Reasoning in Radiology</title><link>http://arxiv.org/abs/2605.10761v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10761v1</guid><description>Wenxuan Li et al. — arxiv:2605.10761 — Reasoning</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>C-CoT: Counterfactual Chain-of-Thought with Vision-Language Models for Safe Autonomous Driving</title><link>http://arxiv.org/abs/2605.10744v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10744v1</guid><description>Kefei Tian et al. — arxiv:2605.10744 — Reasoning</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>The Bystander Effect in Multi-Agent Reasoning: Quantifying Cognitive Loafing in Collaborative Interactions</title><link>http://arxiv.org/abs/2605.10698v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10698v1</guid><description>Dahlia Shehata et al. — arxiv:2605.10698 — Reasoning</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>DeepRefine: Agent-Compiled Knowledge Refinement via Reinforcement Learning</title><link>http://arxiv.org/abs/2605.10488v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10488v1</guid><description>Haoyu Huang et al. — arxiv:2605.10488 — Reasoning</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>CoWorld-VLA: Thinking in a Multi-Expert World Model for Autonomous Driving</title><link>http://arxiv.org/abs/2605.10426v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10426v1</guid><description>Minqing Huang et al. — arxiv:2605.10426 — Reasoning</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>V-ABS: Action-Observer Driven Beam Search for Dynamic Visual Reasoning</title><link>http://arxiv.org/abs/2605.10172v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10172v1</guid><description>Zhiwei Ning et al. — arxiv:2605.10172 — Tool Use</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>TimeClaw: A Time-Series AI Agent with Exploratory Execution Learning</title><link>http://arxiv.org/abs/2605.10038v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10038v1</guid><description>Hangchen Liu et al. — arxiv:2605.10038 — Tool Use</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>TRACER: Verifiable Generative Provenance for Multimodal Tool-Using Agents</title><link>http://arxiv.org/abs/2605.09934v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09934v1</guid><description>Bihui Yu et al. — arxiv:2605.09934 — Tool Use</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>FocuSFT: Bilevel Optimization for Dilution-Aware Long-Context Fine-Tuning</title><link>http://arxiv.org/abs/2605.09932v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09932v1</guid><description>Zehua Pei et al. — arxiv:2605.09932 — Tool Use</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>The Association of Transformer-based Sentiment Analysis with Symptom Distress and Deterioration in Routine Psychotherapy Care</title><link>http://arxiv.org/abs/2605.09838v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09838v1</guid><description>Douglas K. Faust et al. — arxiv:2605.09838 — Tool Use</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Rethinking Agentic Search with Pi-Serini: Is Lexical Retrieval Sufficient?</title><link>http://arxiv.org/abs/2605.10848v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10848v1</guid><description>Tz-Huan Hsu et al. — arxiv:2605.10848 — Tool Use</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Towards On-Policy Data Evolution for Visual-Native Multimodal Deep Search Agents</title><link>http://arxiv.org/abs/2605.10832v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10832v1</guid><description>Shijue Huang et al. — arxiv:2605.10832 — Tool Use</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>TrajPrism: A Multi-Task Benchmark for Language-Grounded Urban Trajectory Understanding</title><link>http://arxiv.org/abs/2605.10782v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10782v1</guid><description>Lihuan Li et al. — arxiv:2605.10782 — Tool Use</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>AutoSOUP: Safety-Oriented Unit Proof Generation for Component-level Memory-Safety Verification</title><link>http://arxiv.org/abs/2605.10712v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10712v1</guid><description>Paschal C. Amusuo et al. — arxiv:2605.10712 — Tool Use</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Safe Multi-Agent Behavior Must Be Maintained, Not Merely Asserted: Constraint Drift in LLM-Based Multi-Agent Systems</title><link>http://arxiv.org/abs/2605.10481v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10481v1</guid><description>Tianxiao Li et al. — arxiv:2605.10481 — Tool Use</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Coherency through formalisations of Structured Natural Language, A case study on FRETish</title><link>http://arxiv.org/abs/2605.10462v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10462v1</guid><description>Joost J. Joosten et al. — arxiv:2605.10462 — Tool Use</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>FLARE: Full-Modality Long-Video Audiovisual Retrieval Benchmark with User-Simulated Queries</title><link>http://arxiv.org/abs/2605.10228v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10228v1</guid><description>Qijie You et al. — arxiv:2605.10228 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>SciVQR: A Multidisciplinary Multimodal Benchmark for Advanced Scientific Reasoning Evaluation</title><link>http://arxiv.org/abs/2605.10187v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10187v1</guid><description>Longteng Guo et al. — arxiv:2605.10187 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>V-ABS: Action-Observer Driven Beam Search for Dynamic Visual Reasoning</title><link>http://arxiv.org/abs/2605.10172v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10172v1</guid><description>Zhiwei Ning et al. — arxiv:2605.10172 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>MicroWorld: Empowering Multimodal Large Language Models to Bridge the Microscopic Domain Gap with Multimodal Attribute Graph</title><link>http://arxiv.org/abs/2605.10120v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10120v1</guid><description>Manyu Li et al. — arxiv:2605.10120 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Plan in Sandbox, Navigate in Open Worlds: Learning Physics-Grounded Abstracted Experience for Embodied Navigation</title><link>http://arxiv.org/abs/2605.10118v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10118v1</guid><description>Zhixuan Shen et al. — arxiv:2605.10118 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>ViSRA: A Video-based Spatial Reasoning Agent for Multi-modal Large Language Models</title><link>http://arxiv.org/abs/2605.10106v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10106v1</guid><description>Tingshu Mou et al. — arxiv:2605.10106 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>SocialDirector: Training-Free Social Interaction Control for Multi-Person Video Generation</title><link>http://arxiv.org/abs/2605.10079v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10079v1</guid><description>Liangyang Ouyang et al. — arxiv:2605.10079 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Sketch-based Access Control: A Multimodal Interface for Translating User Preferences into Intent-Aligned Policies</title><link>http://arxiv.org/abs/2605.10012v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10012v1</guid><description>Kyzyl Monteiro et al. — arxiv:2605.10012 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Med-StepBench: A Hierarchical Reasoning Framework for Evaluating Hallucinations in Medical Vision-Language Models</title><link>http://arxiv.org/abs/2605.10002v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10002v1</guid><description>Minh Khoi Nguyen et al. — arxiv:2605.10002 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>ERASE: Eliminating Redundant Visual Tokens via Adaptive Two-Stage Token Pruning</title><link>http://arxiv.org/abs/2605.09982v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09982v1</guid><description>Yuna Lee et al. — arxiv:2605.09982 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>PriorVLA: Prior-Preserving Adaptation for Vision-Language-Action Models</title><link>http://arxiv.org/abs/2605.10925v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10925v1</guid><description>Xinyu Guo et al. — arxiv:2605.10925 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>RoboMemArena: A Comprehensive and Challenging Robotic Memory Benchmark</title><link>http://arxiv.org/abs/2605.10921v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10921v1</guid><description>Huashuo Lei et al. — arxiv:2605.10921 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>WildClawBench: A Benchmark for Real-World, Long-Horizon Agent Evaluation</title><link>http://arxiv.org/abs/2605.10912v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10912v1</guid><description>Shuangrui Ding et al. — arxiv:2605.10912 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Grounded or Guessing? LVLM Confidence Estimation via Blind-Image Contrastive Ranking</title><link>http://arxiv.org/abs/2605.10893v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10893v1</guid><description>Reza Khanmohammadi et al. — arxiv:2605.10893 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Count Anything at Any Granularity</title><link>http://arxiv.org/abs/2605.10887v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10887v1</guid><description>Chang Liu et al. — arxiv:2605.10887 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>CADBench: A Multimodal Benchmark for AI-Assisted CAD Program Generation</title><link>http://arxiv.org/abs/2605.10873v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10873v1</guid><description>Anna C. Doris et al. — arxiv:2605.10873 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>BenchCAD: A Comprehensive, Industry-Standard Benchmark for Programmatic CAD</title><link>http://arxiv.org/abs/2605.10865v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10865v1</guid><description>Haozhe Zhang et al. — arxiv:2605.10865 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Learning More from Less: Exploiting Counterfactuals for Data-Efficient Chart Understanding</title><link>http://arxiv.org/abs/2605.10855v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10855v1</guid><description>Jianzhu Bao et al. — arxiv:2605.10855 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Verification Mirage: Mapping the Reliability Boundary of Self-Verification in Medical VQA</title><link>http://arxiv.org/abs/2605.10850v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10850v1</guid><description>Ruinan Jin et al. — arxiv:2605.10850 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>BabelDOC: Better Layout-Preserving PDF Translation via Intermediate Representation</title><link>http://arxiv.org/abs/2605.10845v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10845v1</guid><description>Qi Yang et al. — arxiv:2605.10845 — Multimodal LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>MemReread: Enhancing Agentic Long-Context Reasoning via Memory-Guided Rereading</title><link>http://arxiv.org/abs/2605.10268v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10268v1</guid><description>Baibei Ji et al. — arxiv:2605.10268 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Efficient Hybrid CNN-GNN Architecture for Monocular Depth Estimation</title><link>http://arxiv.org/abs/2605.10251v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10251v1</guid><description>Ishan Narayan et al. — arxiv:2605.10251 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Route Before Retrieve: Activating Latent Routing Abilities of LLMs for RAG vs. Long-Context Selection</title><link>http://arxiv.org/abs/2605.10235v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10235v1</guid><description>Yiwen Chen et al. — arxiv:2605.10235 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>TRACE: Distilling Where It Matters via Token-Routed Self On-Policy Alignment</title><link>http://arxiv.org/abs/2605.10194v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10194v1</guid><description>Jiaxuan Wang et al. — arxiv:2605.10194 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Cyclotron Line Variability and Accretion Dynamics in Vela X-1</title><link>http://arxiv.org/abs/2605.10103v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10103v1</guid><description>Mohammed Tobrej et al. — arxiv:2605.10103 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Bridging the Cognitive Gap: A Unified Memory Paradigm for 6G Agentic AI-RAN</title><link>http://arxiv.org/abs/2605.10036v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10036v1</guid><description>Xijun Wang et al. — arxiv:2605.10036 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Continual Harness: Online Adaptation for Self-Improving Foundation Agents</title><link>http://arxiv.org/abs/2605.09998v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09998v1</guid><description>Seth Karten et al. — arxiv:2605.09998 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Attention Drift: What Autoregressive Speculative Decoding Models Learn</title><link>http://arxiv.org/abs/2605.09992v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09992v1</guid><description>Doğaç Eldenk et al. — arxiv:2605.09992 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>HiDrive: A Closed-Loop Benchmark for High-Level Autonomous Driving</title><link>http://arxiv.org/abs/2605.09972v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09972v1</guid><description>Zhongyu Xia et al. — arxiv:2605.09972 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>FocuSFT: Bilevel Optimization for Dilution-Aware Long-Context Fine-Tuning</title><link>http://arxiv.org/abs/2605.09932v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09932v1</guid><description>Zehua Pei et al. — arxiv:2605.09932 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Unmasking On-Policy Distillation: Where It Helps, Where It Hurts, and Why</title><link>http://arxiv.org/abs/2605.10889v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10889v1</guid><description>Mohammadreza Armandpour et al. — arxiv:2605.10889 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>The First Drop of Ink: Nonlinear Impact of Misleading Information in Long-Context Reasoning</title><link>http://arxiv.org/abs/2605.10828v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10828v1</guid><description>Muhan Gao et al. — arxiv:2605.10828 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>CLEF: EEG Foundation Model for Learning Clinical Semantics</title><link>http://arxiv.org/abs/2605.10817v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10817v1</guid><description>Peng Cao et al. — arxiv:2605.10817 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Where Does Long-Context Supervision Actually Go? Effective-Context Exposure Balancing</title><link>http://arxiv.org/abs/2605.10544v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10544v1</guid><description>Jinchang Zhu et al. — arxiv:2605.10544 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>OpenSGA: Efficient 3D Scene Graph Alignment in the Open World</title><link>http://arxiv.org/abs/2605.10484v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10484v1</guid><description>Gang Chen et al. — arxiv:2605.10484 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Safe Multi-Agent Behavior Must Be Maintained, Not Merely Asserted: Constraint Drift in LLM-Based Multi-Agent Systems</title><link>http://arxiv.org/abs/2605.10481v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10481v1</guid><description>Tianxiao Li et al. — arxiv:2605.10481 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Self-Attention as a Covariance Readout: A Unified View of In-Context Learning and Repetition</title><link>http://arxiv.org/abs/2605.10466v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10466v1</guid><description>Haoren Xu et al. — arxiv:2605.10466 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Toward an Engineering of Science: Rebalancing Generation and Verification in the Age of AI</title><link>http://arxiv.org/abs/2605.10425v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10425v1</guid><description>Jiaqi W. Ma et al. — arxiv:2605.10425 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Remember to Forget: Gated Adaptive Positional Encoding</title><link>http://arxiv.org/abs/2605.10414v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10414v1</guid><description>Riccardo Ali et al. — arxiv:2605.10414 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Phoenix-VL 1.5 Medium Technical Report</title><link>http://arxiv.org/abs/2605.10391v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10391v1</guid><description>Team Phoenix et al. — arxiv:2605.10391 — Long Context</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Low-Cost GNSS Anti-Jamming Through 2-Bit Phase Shift Beamforming with Machine Learning</title><link>http://arxiv.org/abs/2605.10264v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10264v1</guid><description>Burak Soner et al. — arxiv:2605.10264 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Bulk-Edge Correspondence via Higher Gauge Theory</title><link>http://arxiv.org/abs/2605.10232v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10232v1</guid><description>Hisham Sati et al. — arxiv:2605.10232 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Nano-U: Efficient Terrain Segmentation for Tiny Robot Navigation</title><link>http://arxiv.org/abs/2605.10210v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10210v1</guid><description>Federico Pizzolato et al. — arxiv:2605.10210 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Measurement-Adapted Eigentask Representations for Photon-Limited Optical Readout</title><link>http://arxiv.org/abs/2605.10008v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10008v1</guid><description>Tianyang Chen et al. — arxiv:2605.10008 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Federated Language Models Under Bandwidth Budgets: Distillation Rates and Conformal Coverage</title><link>http://arxiv.org/abs/2605.09986v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09986v1</guid><description>Prasanjit Dubey et al. — arxiv:2605.09986 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Yeti: A compact protein structure tokenizer for reconstruction and multi-modal generation</title><link>http://arxiv.org/abs/2605.09981v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09981v1</guid><description>Nabin Giri et al. — arxiv:2605.09981 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Frequency Adapter with SAM for Generalized Medical Image Segmentation</title><link>http://arxiv.org/abs/2605.09925v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09925v1</guid><description>Phuoc-Nguyen Bui et al. — arxiv:2605.09925 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Concordia: Self-Improving Synthetic Tables for Federated LLMs</title><link>http://arxiv.org/abs/2605.09855v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09855v1</guid><description>Jimin Huang et al. — arxiv:2605.09855 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Cross-Domain Lossy Compression via Constrained Minimum Entropy Coupling</title><link>http://arxiv.org/abs/2605.09833v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09833v1</guid><description>Nam Nguyen et al. — arxiv:2605.09833 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Fashion Florence: Fine-Tuning Florence-2 for Structured Fashion Attribute Extraction</title><link>http://arxiv.org/abs/2605.09827v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09827v1</guid><description>Anushree Berlia et al. — arxiv:2605.09827 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Compute Where it Counts: Self Optimizing Language Models</title><link>http://arxiv.org/abs/2605.10875v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10875v1</guid><description>Yash Akhauri et al. — arxiv:2605.10875 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>ConQuR: Corner Aligned Activation Quantization via Optimized Rotations for LLMs</title><link>http://arxiv.org/abs/2605.10793v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10793v1</guid><description>Chayne Thrash et al. — arxiv:2605.10793 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Towards a Large Language-Vision Question Answering Model for MSTAR Automatic Target Recognition</title><link>http://arxiv.org/abs/2605.10772v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10772v1</guid><description>David F. Ramirez et al. — arxiv:2605.10772 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Dynamic Cross-Modal Prompt Generation for Multimodal Continual Instruction Tuning</title><link>http://arxiv.org/abs/2605.10765v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10765v1</guid><description>Tao Hu et al. — arxiv:2605.10765 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>AdaPaD: Adaptive Parallel Deflation for PEFT with Self-Correcting Rank Discovery</title><link>http://arxiv.org/abs/2605.10741v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10741v1</guid><description>Barbara Su et al. — arxiv:2605.10741 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>A Simplicial Approach to Higher Geometric Quantization</title><link>http://arxiv.org/abs/2605.10695v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10695v1</guid><description>Qian Zhang et al. — arxiv:2605.10695 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Energy-Efficient Implementation of Spiking Recurrent Cells on FPGA</title><link>http://arxiv.org/abs/2605.10679v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10679v1</guid><description>Pascal Harmeling et al. — arxiv:2605.10679 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Compander-Aligned Query Geometry for Quantized Zeroth-Order Optimization</title><link>http://arxiv.org/abs/2605.10673v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10673v1</guid><description>Yao Shu et al. — arxiv:2605.10673 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>bViT: Investigating Single-Block Recurrence in Vision Transformers for Image Recognition</title><link>http://arxiv.org/abs/2605.10661v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10661v1</guid><description>Michal Byra et al. — arxiv:2605.10661 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>BCJR-QAT: A Differentiable Relaxation of Trellis-Coded Weight Quantization</title><link>http://arxiv.org/abs/2605.10655v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10655v1</guid><description>Venugopalan Iyengar et al. — arxiv:2605.10655 — LLM Efficiency</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>G-Zero: Self-Play for Open-Ended Generation from Zero Data</title><link>http://arxiv.org/abs/2605.09959v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09959v1</guid><description>Chengsong Huang et al. — arxiv:2605.09959 — Alignment</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Structure from Strategic Interaction &amp; Uncertainty Risk Sensitive Games for Robust Preference Learning</title><link>http://arxiv.org/abs/2605.09946v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09946v1</guid><description>Max Horwitz et al. — arxiv:2605.09946 — Alignment</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>MASS-DPO: Multi-negative Active Sample Selection for Direct Policy Optimization</title><link>http://arxiv.org/abs/2605.10784v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10784v1</guid><description>Rohan Surana et al. — arxiv:2605.10784 — Alignment</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>AgentGR: Semantic-aware Agentic Group Decision-Making Simulator for Group Recommendation</title><link>http://arxiv.org/abs/2605.10367v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10367v1</guid><description>Yangtao Zhou et al. — arxiv:2605.10367 — Alignment</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Leveraging RAG for Training-Free Alignment of LLMs</title><link>http://arxiv.org/abs/2605.11217v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11217v1</guid><description>John T. Halloran et al. — arxiv:2605.11217 — Alignment</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Spurious Correlation Learning in Preference Optimization: Mechanisms, Consequences, and Mitigation via Tie Training</title><link>http://arxiv.org/abs/2605.11134v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11134v1</guid><description>Christian Moya et al. — arxiv:2605.11134 — Alignment</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Knowledge Poisoning Attacks on Medical Multi-Modal Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.10253v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10253v1</guid><description>Peiru Yang et al. — arxiv:2605.10253 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>The Vote-Left Equilibrium: A Deterministic Coordination Strategy for the Faithful in The Traitors</title><link>http://arxiv.org/abs/2605.10233v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10233v1</guid><description>Vince Knight et al. — arxiv:2605.10233 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>FORGE: Fragment-Oriented Ranking and Generation for Context-Aware Molecular Optimization</title><link>http://arxiv.org/abs/2605.10230v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10230v1</guid><description>Qingchuan Zhang et al. — arxiv:2605.10230 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>SciVQR: A Multidisciplinary Multimodal Benchmark for Advanced Scientific Reasoning Evaluation</title><link>http://arxiv.org/abs/2605.10187v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10187v1</guid><description>Longteng Guo et al. — arxiv:2605.10187 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>ASTRA-QA: A Benchmark for Abstract Question Answering over Documents</title><link>http://arxiv.org/abs/2605.10168v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10168v1</guid><description>Shu Wang et al. — arxiv:2605.10168 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Explanation-Aware Learning for Enhanced Interpretability in Biomedical Imaging</title><link>http://arxiv.org/abs/2605.10054v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10054v1</guid><description>Zubair Faruqui et al. — arxiv:2605.10054 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>EchoPrune: Interpreting Redundancy as Temporal Echoes for Efficient VideoLLMs</title><link>http://arxiv.org/abs/2605.10050v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10050v1</guid><description>Jiameng Li et al. — arxiv:2605.10050 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>TrajDLM: Topology-Aware Block Diffusion Language Model for Trajectory Generation</title><link>http://arxiv.org/abs/2605.10020v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10020v1</guid><description>Wilson Wongso et al. — arxiv:2605.10020 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Med-StepBench: A Hierarchical Reasoning Framework for Evaluating Hallucinations in Medical Vision-Language Models</title><link>http://arxiv.org/abs/2605.10002v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10002v1</guid><description>Minh Khoi Nguyen et al. — arxiv:2605.10002 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Omni-Persona: Systematic Benchmarking and Improving Omnimodal Personalization</title><link>http://arxiv.org/abs/2605.09996v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09996v1</guid><description>Yeongtak Oh et al. — arxiv:2605.09996 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Evaluating the False Trust engendered by LLM Explanations</title><link>http://arxiv.org/abs/2605.10930v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10930v1</guid><description>Vardhan Palod et al. — arxiv:2605.10930 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Pixal3D: Pixel-Aligned 3D Generation from Images</title><link>http://arxiv.org/abs/2605.10922v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10922v1</guid><description>Dong-Yang Li et al. — arxiv:2605.10922 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Grounded or Guessing? LVLM Confidence Estimation via Blind-Image Contrastive Ranking</title><link>http://arxiv.org/abs/2605.10893v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10893v1</guid><description>Reza Khanmohammadi et al. — arxiv:2605.10893 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Neural at ArchEHR-QA 2026: One Method Fits All: Unified Prompt Optimization for Clinical QA over EHRs</title><link>http://arxiv.org/abs/2605.10877v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10877v1</guid><description>Abrar Majeedi et al. — arxiv:2605.10877 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Attractor-Vascular Coupling Theory: Formal Grounding and Empirical Validation for AAMI-Standard Cuffless Blood Pressure Estimation from Smartphone Photoplethysmography</title><link>http://arxiv.org/abs/2605.10871v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10871v1</guid><description>Timothy Oladunni et al. — arxiv:2605.10871 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>BenchCAD: A Comprehensive, Industry-Standard Benchmark for Programmatic CAD</title><link>http://arxiv.org/abs/2605.10865v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10865v1</guid><description>Haozhe Zhang et al. — arxiv:2605.10865 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>BabelDOC: Better Layout-Preserving PDF Translation via Intermediate Representation</title><link>http://arxiv.org/abs/2605.10845v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10845v1</guid><description>Qi Yang et al. — arxiv:2605.10845 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Probing Cross-modal Information Hubs in Audio-Visual LLMs</title><link>http://arxiv.org/abs/2605.10815v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10815v1</guid><description>Jihoo Jung et al. — arxiv:2605.10815 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>New AI-Driven Tools for Enhancing Campus Well-being: A Prevention and Intervention Approach</title><link>http://arxiv.org/abs/2605.10804v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10804v1</guid><description>Jinwen Tang et al. — arxiv:2605.10804 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>The Last Word Often Wins: A Format Confound in Chain-of-Thought Corruption Studies</title><link>http://arxiv.org/abs/2605.10799v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10799v1</guid><description>Gabriel Garcia et al. — arxiv:2605.10799 — Hallucination</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Metis: Learning to Jailbreak LLMs via Self-Evolving Metacognitive Policy Optimization</title><link>http://arxiv.org/abs/2605.10067v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10067v1</guid><description>Huilin Zhou et al. — arxiv:2605.10067 — LLM Safety</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Adversarial Attacks Against MLLMs via Progressive Resolution Processing and Adaptive Feature Alignment</title><link>http://arxiv.org/abs/2605.09902v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09902v1</guid><description>Haobo Wang et al. — arxiv:2605.09902 — LLM Safety</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Beyond Red-Teaming: Formal Guarantees of LLM Guardrail Classifiers</title><link>http://arxiv.org/abs/2605.10901v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10901v1</guid><description>Nikita Kezins et al. — arxiv:2605.10901 — LLM Safety</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>RUBEN: Rule-Based Explanations for Retrieval-Augmented LLM Systems</title><link>http://arxiv.org/abs/2605.10862v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10862v1</guid><description>Joel Rorseth et al. — arxiv:2605.10862 — LLM Safety</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>LLMs for Secure Hardware Design and Related Problems: Opportunities and Challenges</title><link>http://arxiv.org/abs/2605.10807v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10807v1</guid><description>Johann Knechtel et al. — arxiv:2605.10807 — LLM Safety</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>LITMUS: Benchmarking Behavioral Jailbreaks of LLM Agents in Real OS Environments</title><link>http://arxiv.org/abs/2605.10779v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10779v1</guid><description>Chiyu Zhang et al. — arxiv:2605.10779 — LLM Safety</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Break the Brake, Not the Wheel: Untargeted Jailbreak via Entropy Maximization</title><link>http://arxiv.org/abs/2605.10764v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10764v1</guid><description>Mengqi He et al. — arxiv:2605.10764 — LLM Safety</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Re-Triggering Safeguards within LLMs for Jailbreak Detection</title><link>http://arxiv.org/abs/2605.10611v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10611v1</guid><description>Zheng Lin et al. — arxiv:2605.10611 — LLM Safety</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Guaranteed Jailbreaking Defense via Disrupt-and-Rectify Smoothing</title><link>http://arxiv.org/abs/2605.10582v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10582v1</guid><description>Zheng Lin et al. — arxiv:2605.10582 — LLM Safety</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>TourMart: A Parametric Audit Instrument for Commission Steering in LLM Travel Agents</title><link>http://arxiv.org/abs/2605.10440v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10440v1</guid><description>Yao Liu et al. — arxiv:2605.10440 — LLM Safety</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Adversarial SQL Injection Generation with LLM-Based Architectures</title><link>http://arxiv.org/abs/2605.11188v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11188v1</guid><description>Ali Karakoc et al. — arxiv:2605.11188 — LLM Safety</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>IndustryBench: Probing the Industrial Knowledge Boundaries of LLMs</title><link>http://arxiv.org/abs/2605.10267v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10267v1</guid><description>Songlin Bai et al. — arxiv:2605.10267 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Unsupervised Process Reward Models</title><link>http://arxiv.org/abs/2605.10158v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10158v1</guid><description>Artyom Gadetsky et al. — arxiv:2605.10158 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>FormalRewardBench: A Benchmark for Formal Theorem Proving Reward Models</title><link>http://arxiv.org/abs/2605.10141v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10141v1</guid><description>Zeynel A. Uluşan et al. — arxiv:2605.10141 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>G-Zero: Self-Play for Open-Ended Generation from Zero Data</title><link>http://arxiv.org/abs/2605.09959v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09959v1</guid><description>Chengsong Huang et al. — arxiv:2605.09959 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Team-Based Self-Play With Dual Adaptive Weighting for Fine-Tuning LLMs</title><link>http://arxiv.org/abs/2605.09922v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09922v1</guid><description>Wu Li et al. — arxiv:2605.09922 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Nautilus Compass: Black-box Persona Drift Detection for Production LLM Agents</title><link>http://arxiv.org/abs/2605.09863v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09863v1</guid><description>Chunxiao Wang et al. — arxiv:2605.09863 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Evaluating the False Trust engendered by LLM Explanations</title><link>http://arxiv.org/abs/2605.10930v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10930v1</guid><description>Vardhan Palod et al. — arxiv:2605.10930 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>WildClawBench: A Benchmark for Real-World, Long-Horizon Agent Evaluation</title><link>http://arxiv.org/abs/2605.10912v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10912v1</guid><description>Shuangrui Ding et al. — arxiv:2605.10912 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Grounded Satirical Generation with RAG</title><link>http://arxiv.org/abs/2605.10853v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10853v1</guid><description>Oona Itkonen et al. — arxiv:2605.10853 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>BabelDOC: Better Layout-Preserving PDF Translation via Intermediate Representation</title><link>http://arxiv.org/abs/2605.10845v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10845v1</guid><description>Qi Yang et al. — arxiv:2605.10845 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Reasoning Is Not Free: Robust Adaptive Cost-Efficient Routing for LLM-as-a-Judge</title><link>http://arxiv.org/abs/2605.10805v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10805v1</guid><description>Wenbo Zhang et al. — arxiv:2605.10805 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>LITMUS: Benchmarking Behavioral Jailbreaks of LLM Agents in Real OS Environments</title><link>http://arxiv.org/abs/2605.10779v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10779v1</guid><description>Chiyu Zhang et al. — arxiv:2605.10779 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Navigating the Sea of LLM Evaluation: Investigating Bias in Toxicity Benchmarks</title><link>http://arxiv.org/abs/2605.10639v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10639v1</guid><description>Regina Gugg et al. — arxiv:2605.10639 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>PRISM: Generation-Time Detection and Mitigation of Secret Leakage in Multi-Agent LLM Pipelines</title><link>http://arxiv.org/abs/2605.10614v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10614v1</guid><description>Riya Tapwal et al. — arxiv:2605.10614 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>LLARS: Enabling Domain Expert &amp; Developer Collaboration for LLM Prompting, Generation and Evaluation</title><link>http://arxiv.org/abs/2605.10593v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10593v1</guid><description>Philipp Steigerwald et al. — arxiv:2605.10593 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Valid Best-Model Identification for LLM Evaluation via Low-Rank Factorization</title><link>http://arxiv.org/abs/2605.10405v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10405v1</guid><description>Elad Tolochinsky et al. — arxiv:2605.10405 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>VERDI: Single-Call Confidence Estimation for Verification-Based LLM Judges via Decomposed Inference</title><link>http://arxiv.org/abs/2605.11334v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11334v1</guid><description>Jasmine Qi et al. — arxiv:2605.11334 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Rethinking Evaluation for LLM Hallucination Detection: A Desiderata, A New RAG-based Benchmark, New Insights</title><link>http://arxiv.org/abs/2605.11330v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11330v1</guid><description>Wenbo Chen et al. — arxiv:2605.11330 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Rethinking LLMOps for Fraud and AML: Building a Compliance-Grade LLM Serving Stack</title><link>http://arxiv.org/abs/2605.11232v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11232v1</guid><description>Prathamesh Vasudeo Naik et al. — arxiv:2605.11232 — LLM Evaluation</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Usability as a Weapon: Attacking the Safety of LLM-Based Code Generation via Usability Requirements</title><link>http://arxiv.org/abs/2605.10133v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10133v1</guid><description>Yue Li et al. — arxiv:2605.10133 — Code LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Prospective Compression in Human Abstraction Learning</title><link>http://arxiv.org/abs/2605.09985v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09985v1</guid><description>Leonardo Hernandez Cano et al. — arxiv:2605.09985 — Code LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>RADAR: Redundancy-Aware Diffusion for Multi-Agent Communication Structure Generation</title><link>http://arxiv.org/abs/2605.09907v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09907v1</guid><description>Zhen Zhang et al. — arxiv:2605.09907 — Code LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>CADBench: A Multimodal Benchmark for AI-Assisted CAD Program Generation</title><link>http://arxiv.org/abs/2605.10873v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10873v1</guid><description>Anna C. Doris et al. — arxiv:2605.10873 — Code LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>BenchCAD: A Comprehensive, Industry-Standard Benchmark for Programmatic CAD</title><link>http://arxiv.org/abs/2605.10865v2</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10865v2</guid><description>Haozhe Zhang et al. — arxiv:2605.10865 — Code LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>The Agent Use of Agent Beings: Agent Cybernetics Is the Missing Science of Foundation Agents</title><link>http://arxiv.org/abs/2605.10754v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10754v1</guid><description>Xinrun Wang et al. — arxiv:2605.10754 — Code LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>AutoSOUP: Safety-Oriented Unit Proof Generation for Component-level Memory-Safety Verification</title><link>http://arxiv.org/abs/2605.10712v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10712v1</guid><description>Paschal C. Amusuo et al. — arxiv:2605.10712 — Code LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Correct-by-Construction G-Code Generation: A Neuro-Symbolic Approach via Separation Logic</title><link>http://arxiv.org/abs/2605.10568v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10568v1</guid><description>Yeonseok Lee et al. — arxiv:2605.10568 — Code LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Vision2Code: A Multi-Domain Benchmark for Evaluating Image-to-Code Generation</title><link>http://arxiv.org/abs/2605.11307v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11307v1</guid><description>Ajay Vikram Periasami et al. — arxiv:2605.11307 — Code LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Primal Generation, Dual Judgment: Self-Training from Test-Time Scaling</title><link>http://arxiv.org/abs/2605.11299v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11299v1</guid><description>Yizhu Jiao et al. — arxiv:2605.11299 — Code LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Internalizing Curriculum Judgment for LLM Reinforcement Fine-Tuning</title><link>http://arxiv.org/abs/2605.11235v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11235v1</guid><description>Han Zheng et al. — arxiv:2605.11235 — Code LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>AffectCodec: Emotion-Preserving Neural Speech Codec for Expressive Speech Modeling</title><link>http://arxiv.org/abs/2605.11098v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11098v1</guid><description>Jiacheng Shi et al. — arxiv:2605.11098 — Speech LLM</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Speech LLM</category></item><item><title>LegalCiteBench: Evaluating Citation Reliability in Legal Language Models</title><link>http://arxiv.org/abs/2605.10186v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10186v1</guid><description>Sijia Chen et al. — arxiv:2605.10186 — Legal NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Legal NLP</category></item><item><title>NyayaAI: An AI-Powered Legal Assistant Using Multi-Agent Architecture and Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.10155v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10155v1</guid><description>Deepanshu et al. — arxiv:2605.10155 — Legal NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Legal NLP</category></item><item><title>BabelDOC: Better Layout-Preserving PDF Translation via Intermediate Representation</title><link>http://arxiv.org/abs/2605.10845v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10845v1</guid><description>Qi Yang et al. — arxiv:2605.10845 — Multilingual NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Why Low-Resource NLP Needs More Than Cross-Lingual Transfer: Lessons Learned from Luxembourgish</title><link>http://arxiv.org/abs/2605.10714v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10714v1</guid><description>Fred Philippy et al. — arxiv:2605.10714 — Multilingual NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>ICT-NLP at SemEval-2026 Task 3: Less Is More -- Multilingual Encoder with Joint Training and Adaptive Ensemble for Dimensional Aspect Sentiment Regression</title><link>http://arxiv.org/abs/2605.10560v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10560v1</guid><description>Liyuan Huang et al. — arxiv:2605.10560 — Multilingual NLP</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>GLiNER-Relex: A Unified Framework for Joint Named Entity Recognition and Relation Extraction</title><link>http://arxiv.org/abs/2605.10108v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10108v1</guid><description>Ihor Stepanov et al. — arxiv:2605.10108 — Named Entity Recognition</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Named Entity Recognition</category></item><item><title>Interpretable Coreference Resolution Evaluation Using Explicit Semantics</title><link>http://arxiv.org/abs/2605.10627v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10627v1</guid><description>Bruno Gatti et al. — arxiv:2605.10627 — Named Entity Recognition</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Named Entity Recognition</category></item><item><title>Reconstructing rare particle source by femtoscopic correlations</title><link>http://arxiv.org/abs/2605.10167v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10167v1</guid><description>Liang Zhang et al. — arxiv:2605.10167 — Information Extraction</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>OUIDecay: Adaptive Layer-wise Weight Decay for CNNs Using Online Activation Patterns</title><link>http://arxiv.org/abs/2605.10161v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10161v1</guid><description>Alberto Fernández-Hernández et al. — arxiv:2605.10161 — Information Extraction</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>Useful for Exploration, Risky for Precision: Evaluating AI Tools in Academic Research</title><link>http://arxiv.org/abs/2605.10125v2</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10125v2</guid><description>Anthea Dathe et al. — arxiv:2605.10125 — Information Extraction</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>PlantMarkerBench: A Multi-Species Benchmark for Evidence-Grounded Plant Marker Reasoning</title><link>http://arxiv.org/abs/2605.10032v2</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10032v2</guid><description>Sajib Acharjee Dip et al. — arxiv:2605.10032 — Information Extraction</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>GLiNER2-PII: A Multilingual Model for Personally Identifiable Information Extraction</title><link>http://arxiv.org/abs/2605.09973v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09973v1</guid><description>Urchade Zaratiana et al. — arxiv:2605.09973 — Information Extraction</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>Information Extraction of Nested Complex Structure of Quantum Cascade Lasers via Large Language Models</title><link>http://arxiv.org/abs/2605.09927v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09927v1</guid><description>Xiao Fang et al. — arxiv:2605.09927 — Information Extraction</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>VERDI: Single-Call Confidence Estimation for Verification-Based LLM Judges via Decomposed Inference</title><link>http://arxiv.org/abs/2605.11334v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11334v1</guid><description>Jasmine Qi et al. — arxiv:2605.11334 — Text Classification</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>HEPA: A Self-Supervised Horizon-Conditioned Event Predictive Architecture for Time Series</title><link>http://arxiv.org/abs/2605.11130v2</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11130v2</guid><description>Jonas Petersen et al. — arxiv:2605.11130 — Text Classification</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>IndustryBench: Probing the Industrial Knowledge Boundaries of LLMs</title><link>http://arxiv.org/abs/2605.10267v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10267v1</guid><description>Songlin Bai et al. — arxiv:2605.10267 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>How Should LLMs Listen While Speaking? A Study of User-Stream Routing in Full-Duplex Spoken Dialogue</title><link>http://arxiv.org/abs/2605.10199v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10199v1</guid><description>Hui Lu et al. — arxiv:2605.10199 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>LegalCiteBench: Evaluating Citation Reliability in Legal Language Models</title><link>http://arxiv.org/abs/2605.10186v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10186v1</guid><description>Sijia Chen et al. — arxiv:2605.10186 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>ASTRA-QA: A Benchmark for Abstract Question Answering over Documents</title><link>http://arxiv.org/abs/2605.10168v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10168v1</guid><description>Shu Wang et al. — arxiv:2605.10168 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Useful for Exploration, Risky for Precision: Evaluating AI Tools in Academic Research</title><link>http://arxiv.org/abs/2605.10125v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10125v1</guid><description>Anthea Dathe et al. — arxiv:2605.10125 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>MAGE: Multi-Agent Self-Evolution with Co-Evolutionary Knowledge Graphs</title><link>http://arxiv.org/abs/2605.10064v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10064v1</guid><description>Ruiyi Yang et al. — arxiv:2605.10064 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Separate First, Fuse Later: Mitigating Cross-Modal Interference in Audio-Visual LLMs Reasoning with Modality-Specific Chain-of-Thought</title><link>http://arxiv.org/abs/2605.09906v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09906v1</guid><description>Xuanchen Li et al. — arxiv:2605.09906 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>TOC-Bench: A Temporal Object Consistency Benchmark for Video Large Language Models</title><link>http://arxiv.org/abs/2605.09904v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09904v1</guid><description>Junzhe Chen et al. — arxiv:2605.09904 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Grounded or Guessing? LVLM Confidence Estimation via Blind-Image Contrastive Ranking</title><link>http://arxiv.org/abs/2605.10893v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10893v1</guid><description>Reza Khanmohammadi et al. — arxiv:2605.10893 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Neural at ArchEHR-QA 2026: One Method Fits All: Unified Prompt Optimization for Clinical QA over EHRs</title><link>http://arxiv.org/abs/2605.10877v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10877v1</guid><description>Abrar Majeedi et al. — arxiv:2605.10877 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>BenchCAD: A Comprehensive, Industry-Standard Benchmark for Programmatic CAD</title><link>http://arxiv.org/abs/2605.10865v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10865v1</guid><description>Haozhe Zhang et al. — arxiv:2605.10865 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>DGPO: Beyond Pairwise Preferences with Directional Consistent Groupwise Optimization</title><link>http://arxiv.org/abs/2605.10863v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10863v1</guid><description>Mengyi Deng et al. — arxiv:2605.10863 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Verification Mirage: Mapping the Reliability Boundary of Self-Verification in Medical VQA</title><link>http://arxiv.org/abs/2605.10850v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10850v1</guid><description>Ruinan Jin et al. — arxiv:2605.10850 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>PathISE: Learning Informative Path Supervision for Knowledge Graph Question Answering</title><link>http://arxiv.org/abs/2605.10791v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10791v1</guid><description>Shengxiang Gao et al. — arxiv:2605.10791 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>MASS-DPO: Multi-negative Active Sample Selection for Direct Policy Optimization</title><link>http://arxiv.org/abs/2605.10784v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10784v1</guid><description>Rohan Surana et al. — arxiv:2605.10784 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Towards a Large Language-Vision Question Answering Model for MSTAR Automatic Target Recognition</title><link>http://arxiv.org/abs/2605.10772v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10772v1</guid><description>David F. Ramirez et al. — arxiv:2605.10772 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>GridProbe: Posterior-Probing for Adaptive Test-Time Compute in Long-Video VLMs</title><link>http://arxiv.org/abs/2605.10762v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10762v1</guid><description>Mohamed Eltahir et al. — arxiv:2605.10762 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>RadThinking: A Dataset for Longitudinal Clinical Reasoning in Radiology</title><link>http://arxiv.org/abs/2605.10761v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10761v1</guid><description>Wenxuan Li et al. — arxiv:2605.10761 — Question Answering</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>DECO-MWE: building a linguistic resource of Korean multiword expressions for feature-based sentiment analysis</title><link>http://arxiv.org/abs/2605.10295v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10295v1</guid><description>Jaeho Han et al. — arxiv:2605.10295 — Sentiment Analysis</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>Beyond Majority Voting: Agreement-Based Clustering to Model Annotator Perspectives in Subjective NLP Tasks</title><link>http://arxiv.org/abs/2605.09955v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09955v1</guid><description>Tadesse Destaw Belay et al. — arxiv:2605.09955 — Sentiment Analysis</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>The Association of Transformer-based Sentiment Analysis with Symptom Distress and Deterioration in Routine Psychotherapy Care</title><link>http://arxiv.org/abs/2605.09838v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09838v1</guid><description>Douglas K. Faust et al. — arxiv:2605.09838 — Sentiment Analysis</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>Relations Are Channels: Knowledge Graph Embedding via Kraus Decompositions</title><link>http://arxiv.org/abs/2605.10317v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10317v1</guid><description>Sayan Kumar Chaki et al. — arxiv:2605.10317 — Knowledge Graph</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>MicroWorld: Empowering Multimodal Large Language Models to Bridge the Microscopic Domain Gap with Multimodal Attribute Graph</title><link>http://arxiv.org/abs/2605.10120v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10120v1</guid><description>Manyu Li et al. — arxiv:2605.10120 — Knowledge Graph</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>GLiNER-Relex: A Unified Framework for Joint Named Entity Recognition and Relation Extraction</title><link>http://arxiv.org/abs/2605.10108v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10108v1</guid><description>Ihor Stepanov et al. — arxiv:2605.10108 — Knowledge Graph</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>MAGE: Multi-Agent Self-Evolution with Co-Evolutionary Knowledge Graphs</title><link>http://arxiv.org/abs/2605.10064v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10064v1</guid><description>Ruiyi Yang et al. — arxiv:2605.10064 — Knowledge Graph</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>GraphInstruct: A Progressive Benchmark for Diagnosing Capability Gaps in LLM Graph Generation</title><link>http://arxiv.org/abs/2605.09997v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09997v1</guid><description>Zihe Wei et al. — arxiv:2605.09997 — Knowledge Graph</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>PathISE: Learning Informative Path Supervision for Knowledge Graph Question Answering</title><link>http://arxiv.org/abs/2605.10791v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10791v1</guid><description>Shengxiang Gao et al. — arxiv:2605.10791 — Knowledge Graph</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Hierarchical Causal Abduction: A Foundation Framework for Explainable Model Predictive Control</title><link>http://arxiv.org/abs/2605.10624v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10624v1</guid><description>Ramesh Arvind Naagarajan et al. — arxiv:2605.10624 — Knowledge Graph</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Keeping track of errors: A study of SHACL-DS for RDF dataset validation on the ERA RINF Knowledge Graph</title><link>http://arxiv.org/abs/2605.10540v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10540v1</guid><description>Davan Chiem Dao et al. — arxiv:2605.10540 — Knowledge Graph</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>A Reflective Storytelling Agent for Older Adults: Integrating Argumentation Schemes and Argument Mining in LLM-Based Personalised Narratives</title><link>http://arxiv.org/abs/2605.10531v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10531v1</guid><description>Jayalakshmi Baskar et al. — arxiv:2605.10531 — Knowledge Graph</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>PrimeKG-CL: A Continual Graph Learning Benchmark on Evolving Biomedical Knowledge Graphs</title><link>http://arxiv.org/abs/2605.10529v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10529v1</guid><description>Yousef A. Radwan et al. — arxiv:2605.10529 — Knowledge Graph</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>CMKL: Modality-Aware Continual Learning for Evolving Biomedical Knowledge Graphs</title><link>http://arxiv.org/abs/2605.10510v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.10510v1</guid><description>Yousef A. Radwan et al. — arxiv:2605.10510 — Knowledge Graph</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Much of Geospatial Web Search Is Beyond Traditional GIS</title><link>http://arxiv.org/abs/2605.11336v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11336v1</guid><description>Ilya Ilyankou et al. — arxiv:2605.11336 — Knowledge Graph</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>CORE: Cyclic Orthotope Relation Embedding for Knowledge Graph Completion</title><link>http://arxiv.org/abs/2605.11159v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11159v1</guid><description>Yingqi Zeng et al. — arxiv:2605.11159 — Knowledge Graph</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>ClinicalBench: Stress-Testing Assertion-Aware Retrieval for Cross-Admission Clinical QA on MIMIC-IV</title><link>http://arxiv.org/abs/2605.11143v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11143v1</guid><description>Alex Stinard et al. — arxiv:2605.11143 — Knowledge Graph</description><pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>WISTERIA: Learning Clinical Representations from Noisy Supervision via Multi-View Consistency in Electronic Health Records</title><link>http://arxiv.org/abs/2605.09765v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09765v1</guid><description>Ruan Dong et al. — arxiv:2605.09765 — NLP</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Mitigating Multimodal Inconsistency via Cognitive Dual-Pathway Reasoning for Intent Recognition</title><link>http://arxiv.org/abs/2605.09468v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09468v1</guid><description>Yifan Wang et al. — arxiv:2605.09468 — NLP</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>HOME-KGQA: A Benchmark Dataset for Multimodal Knowledge Graph Question Answering on Household Daily Activities</title><link>http://arxiv.org/abs/2605.09348v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09348v1</guid><description>Shusaku Egami et al. — arxiv:2605.09348 — NLP</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>MedMeta: A Benchmark for LLMs in Synthesizing Meta-Analysis Conclusion from Medical Studies</title><link>http://arxiv.org/abs/2605.09661v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09661v1</guid><description>Huy Hoang Ha et al. — arxiv:2605.09661 — RAG</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Byte-Exact Deduplication in Retrieval-Augmented Generation: A Three-Regime Empirical Analysis Across Public Benchmarks</title><link>http://arxiv.org/abs/2605.09611v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09611v1</guid><description>Sietse Schelpe et al. — arxiv:2605.09611 — RAG</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>LEAD: Length-Efficient Adaptive and Dynamic Reasoning for Large Language Models</title><link>http://arxiv.org/abs/2605.09806v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09806v1</guid><description>Songtao Wei et al. — arxiv:2605.09806 — Reasoning</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Distilling 3D Spatial Reasoning into a Lightweight Vision-Language Model with CoT</title><link>http://arxiv.org/abs/2605.09719v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09719v1</guid><description>Alaa Asfour et al. — arxiv:2605.09719 — Reasoning</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Do multimodal models imagine electric sheep?</title><link>http://arxiv.org/abs/2605.09693v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09693v1</guid><description>Santhosh Kumar Ramakrishnan et al. — arxiv:2605.09693 — Reasoning</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Oracle Poisoning: Corrupting Knowledge Graphs to Weaponise AI Agent Reasoning</title><link>http://arxiv.org/abs/2605.09822v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09822v1</guid><description>Ben Kereopa-Yorke et al. — arxiv:2605.09822 — Tool Use</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Evaluating Tool Cloning in Agentic-AI Ecosystems</title><link>http://arxiv.org/abs/2605.09817v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09817v1</guid><description>Taein Kim et al. — arxiv:2605.09817 — Tool Use</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Trajectory Supervision for Continual Tool-Use Learning in LLMs</title><link>http://arxiv.org/abs/2605.09734v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09734v1</guid><description>Vishnu Vardhan Reddy et al. — arxiv:2605.09734 — Tool Use</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>RubricRefine: Improving Tool-Use Agent Reliability with Training-Free Pre-Execution Refinement</title><link>http://arxiv.org/abs/2605.09730v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09730v1</guid><description>Will LeVine et al. — arxiv:2605.09730 — Tool Use</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>MonitoringBench: Semi-Automated Red-Teaming for Agent Monitoring</title><link>http://arxiv.org/abs/2605.09684v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09684v1</guid><description>Monika Jotautaitė et al. — arxiv:2605.09684 — Tool Use</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Learning to Compress Time-to-Control: A Reinforcement Learning Framework for Chronic Disease Management</title><link>http://arxiv.org/abs/2605.09818v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09818v1</guid><description>Prabhjot Singh et al. — arxiv:2605.09818 — Alignment</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>EvoPref: Multi-Objective Evolutionary Optimization Discovers Diverse LLM Alignments Beyond Gradient Descent</title><link>http://arxiv.org/abs/2605.09777v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09777v1</guid><description>Dongxin Guo et al. — arxiv:2605.09777 — Alignment</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Offline Preference Optimization for Rectified Flow with Noise-Tracked Pairs</title><link>http://arxiv.org/abs/2605.09433v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09433v1</guid><description>Yunhong Lu et al. — arxiv:2605.09433 — Alignment</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Near-Optimal Last-Iterate Convergence for Zero-Sum Games with Bandit Feedback and Opponent Actions</title><link>http://arxiv.org/abs/2605.09363v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09363v1</guid><description>Soumita Hait et al. — arxiv:2605.09363 — Alignment</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>CALYREX: Cross-Attention LaYeR EXtended Transformers for System Prompt Anchoring</title><link>http://arxiv.org/abs/2605.09737v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09737v1</guid><description>Li Lixing et al. — arxiv:2605.09737 — LLM Safety</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>MonitoringBench: Semi-Automated Red-Teaming for Agent Monitoring</title><link>http://arxiv.org/abs/2605.09684v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09684v1</guid><description>Monika Jotautaitė et al. — arxiv:2605.09684 — LLM Safety</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Modeling Implicit Conflict Monitoring Mechanisms against Stereotypes in LLMs</title><link>http://arxiv.org/abs/2605.09647v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09647v1</guid><description>Jingshen Zhang et al. — arxiv:2605.09647 — LLM Safety</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>&quot;Training robust watermarking model may hurt authentication!&apos;&apos; Exploring and Mitigating the Identity Leakage in Robust Watermarking</title><link>http://arxiv.org/abs/2605.09646v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09646v1</guid><description>Xinyu Zhang et al. — arxiv:2605.09646 — LLM Safety</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Position: AI Security Policy Should Target Systems, Not Models</title><link>http://arxiv.org/abs/2605.09504v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09504v1</guid><description>Michael A. Riegler et al. — arxiv:2605.09504 — LLM Safety</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>NEXUS: Continual Learning of Symbolic Constraints for Safe and Robust Embodied Planning</title><link>http://arxiv.org/abs/2605.09387v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09387v1</guid><description>Tiehan Cui et al. — arxiv:2605.09387 — LLM Safety</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Calibrate, Don&apos;t Curate: Label-Efficient Estimation from Noisy LLM Judges</title><link>http://arxiv.org/abs/2605.09702v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09702v1</guid><description>Yanran Li et al. — arxiv:2605.09702 — LLM Evaluation</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>MedMeta: A Benchmark for LLMs in Synthesizing Meta-Analysis Conclusion from Medical Studies</title><link>http://arxiv.org/abs/2605.09661v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09661v1</guid><description>Huy Hoang Ha et al. — arxiv:2605.09661 — LLM Evaluation</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>CLR-voyance: Reinforcing Open-Ended Reasoning for Inpatient Clinical Decision Support with Outcome-Aware Rubrics</title><link>http://arxiv.org/abs/2605.09584v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09584v1</guid><description>Aishik Nagar et al. — arxiv:2605.09584 — LLM Evaluation</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>fmxcoders: Factorized Masked Crosscoders for Cross-Layer Feature Discovery</title><link>http://arxiv.org/abs/2605.09438v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09438v1</guid><description>Andreas D. Demou et al. — arxiv:2605.09438 — LLM Evaluation</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Entropy-informed Decoding: Adaptive Information-Driven Branching</title><link>http://arxiv.org/abs/2605.09745v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09745v1</guid><description>Benjamin Patrick Evans et al. — arxiv:2605.09745 — Code LLM</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>CodeClinic: Evaluating Automation of Coding Skills for Clinical Reasoning Agents</title><link>http://arxiv.org/abs/2605.09675v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09675v1</guid><description>Timothy Ossowski et al. — arxiv:2605.09675 — Code LLM</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>PDEAgent-Bench: A Multi-Metric, Multi-Library Benchmark for PDE Solver Generation</title><link>http://arxiv.org/abs/2605.09636v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09636v1</guid><description>Zhen Hang et al. — arxiv:2605.09636 — Code LLM</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Crosslingual On-Policy Self-Distillation for Multilingual Reasoning</title><link>http://arxiv.org/abs/2605.09548v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09548v1</guid><description>Yihong Liu et al. — arxiv:2605.09548 — Multilingual NLP</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>AgentShield: Deception-based Compromise Detection for Tool-using LLM Agents</title><link>http://arxiv.org/abs/2605.11026v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.11026v1</guid><description>Yassin H. Rassul et al. — arxiv:2605.11026 — Multilingual NLP</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>MemPrivacy: Privacy-Preserving Personalized Memory Management for Edge-Cloud Agents</title><link>http://arxiv.org/abs/2605.09530v2</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09530v2</guid><description>Yining Chen et al. — arxiv:2605.09530 — Information Extraction</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>How to count clustered galaxies</title><link>http://arxiv.org/abs/2605.09248v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09248v1</guid><description>Yunting Wang et al. — arxiv:2605.09248 — Information Extraction</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>cantnlp@DravidianLangTech 2026: organic domain adaptation improves multi-class hope speech detection in Tulu</title><link>http://arxiv.org/abs/2605.09795v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09795v1</guid><description>Andrew Li et al. — arxiv:2605.09795 — Text Classification</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>Distilling 3D Spatial Reasoning into a Lightweight Vision-Language Model with CoT</title><link>http://arxiv.org/abs/2605.09719v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09719v1</guid><description>Alaa Asfour et al. — arxiv:2605.09719 — Question Answering</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>DeepTumorVQA: A Hierarchical 3D CT Benchmark for Stage-Wise Evaluation of Medical VLMs and Tool-Augmented Agents</title><link>http://arxiv.org/abs/2605.09679v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09679v1</guid><description>Yixiong Chen et al. — arxiv:2605.09679 — Question Answering</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>FinMoji: A Framework for Emoji-driven Sentiment Analysis in Financial Social Media</title><link>http://arxiv.org/abs/2605.09469v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09469v1</guid><description>Ahmed Mahrous et al. — arxiv:2605.09469 — Sentiment Analysis</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>Oracle Poisoning: Corrupting Knowledge Graphs to Weaponise AI Agent Reasoning</title><link>http://arxiv.org/abs/2605.09822v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09822v1</guid><description>Ben Kereopa-Yorke et al. — arxiv:2605.09822 — Knowledge Graph</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>K12-KGraph: A Curriculum-Aligned Knowledge Graph for Benchmarking and Training Educational LLMs</title><link>http://arxiv.org/abs/2605.09635v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09635v1</guid><description>Hao Liang et al. — arxiv:2605.09635 — Knowledge Graph</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>LLM-Guided Monte Carlo Tree Search over Knowledge Graphs: Composing Mechanistic Explanations for Drug-Disease Pairs</title><link>http://arxiv.org/abs/2605.09542v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09542v1</guid><description>Rishabh Jakhar et al. — arxiv:2605.09542 — Knowledge Graph</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>EpiGraph: A Knowledge Graph and Benchmark for Evidence-Intensive Reasoning in Epilepsy</title><link>http://arxiv.org/abs/2605.09505v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09505v1</guid><description>Yuyang Dai et al. — arxiv:2605.09505 — Knowledge Graph</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>HOME-KGQA: A Benchmark Dataset for Multimodal Knowledge Graph Question Answering on Household Daily Activities</title><link>http://arxiv.org/abs/2605.09348v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09348v1</guid><description>Shusaku Egami et al. — arxiv:2605.09348 — Knowledge Graph</description><pubDate>Sun, 10 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>From Traditional Taggers to LLMs: A Comparative Study of POS Tagging for Medieval Romance Languages</title><link>http://arxiv.org/abs/2605.09147v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09147v1</guid><description>Matthias Schöffel et al. — arxiv:2605.09147 — NLP</description><pubDate>Sat, 09 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>The Art of the Jailbreak: Formulating Jailbreak Attacks for LLM Security Beyond Binary Scoring</title><link>http://arxiv.org/abs/2605.09225v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09225v1</guid><description>Ismail Hossain et al. — arxiv:2605.09225 — Alignment</description><pubDate>Sat, 09 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Learning the Preferences of a Learning Agent</title><link>http://arxiv.org/abs/2605.09217v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09217v1</guid><description>Karim Abdel Sadek et al. — arxiv:2605.09217 — Alignment</description><pubDate>Sat, 09 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>The Grounding Gap: How LLMs Anchor the Meaning of Abstract Concepts Differently from Humans</title><link>http://arxiv.org/abs/2605.08837v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08837v1</guid><description>Odysseas S. Chlapanis et al. — arxiv:2605.08837 — Alignment</description><pubDate>Sat, 09 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Compressed Video Aggregator: Content-driven Module for Efficient Micro-Video Recommendation</title><link>http://arxiv.org/abs/2605.08810v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08810v1</guid><description>Yang Xiao et al. — arxiv:2605.08810 — Alignment</description><pubDate>Sat, 09 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>The Art of the Jailbreak: Formulating Jailbreak Attacks for LLM Security Beyond Binary Scoring</title><link>http://arxiv.org/abs/2605.09225v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09225v1</guid><description>Ismail Hossain et al. — arxiv:2605.09225 — LLM Safety</description><pubDate>Sat, 09 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>AutoRedTrader: Autonomous Red Teaming of Trading Agents through Synthetic Misinformation Injection</title><link>http://arxiv.org/abs/2605.09185v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09185v1</guid><description>Zhiwei Liu et al. — arxiv:2605.09185 — LLM Safety</description><pubDate>Sat, 09 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Evaluating LLM-Generated Code: A Benchmark and Developer Study</title><link>http://arxiv.org/abs/2605.09059v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09059v1</guid><description>Joanna Szych et al. — arxiv:2605.09059 — Code LLM</description><pubDate>Sat, 09 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Using Semantic Distance to Estimate Uncertainty in LLM-Based Code Generation</title><link>http://arxiv.org/abs/2605.09023v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09023v1</guid><description>Weilin He et al. — arxiv:2605.09023 — Code LLM</description><pubDate>Sat, 09 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>MDGYM: Benchmarking AI Agents on Molecular Simulations</title><link>http://arxiv.org/abs/2605.08941v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08941v1</guid><description>Vinay Kumar et al. — arxiv:2605.08941 — Code LLM</description><pubDate>Sat, 09 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>PrepBench: How Far Are We from Natural-Language-Driven Data Preparation?</title><link>http://arxiv.org/abs/2605.08687v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08687v1</guid><description>Jingzhe Xu et al. — arxiv:2605.08687 — Code LLM</description><pubDate>Sat, 09 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>From Traditional Taggers to LLMs: A Comparative Study of POS Tagging for Medieval Romance Languages</title><link>http://arxiv.org/abs/2605.09147v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09147v1</guid><description>Matthias Schöffel et al. — arxiv:2605.09147 — Multilingual NLP</description><pubDate>Sat, 09 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Language-Conditioned Visual Grounding with CLIP Multilingual</title><link>http://arxiv.org/abs/2605.09060v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.09060v1</guid><description>J. de Curtò et al. — arxiv:2605.09060 — Multilingual NLP</description><pubDate>Sat, 09 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Improving Lexical Difficulty Prediction with Context-Aligned Contrastive Learning and Ridge Ensembling</title><link>http://arxiv.org/abs/2605.08950v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08950v1</guid><description>Wicaksono Leksono Muhamad et al. — arxiv:2605.08950 — Multilingual NLP</description><pubDate>Sat, 09 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>When More Parameters Hurt: Foundation Model Priors Amplify Worst-Client Disparity Under Extreme Federated Heterogeneity</title><link>http://arxiv.org/abs/2605.08992v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08992v1</guid><description>Kiran Naseer et al. — arxiv:2605.08992 — Text Classification</description><pubDate>Sat, 09 May 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>Training with Harnesses: On-Policy Harness Self-Distillation for Complex Reasoning</title><link>http://arxiv.org/abs/2605.08741v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08741v1</guid><description>Zhengyang Zhao et al. — arxiv:2605.08741 — Text Classification</description><pubDate>Sat, 09 May 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>Guidance Is Not a Hyperparameter: Learning Dynamic Control in Diffusion Language Models</title><link>http://arxiv.org/abs/2605.07701v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07701v1</guid><description>Fan Zhou et al. — arxiv:2605.07701 — NLP</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Nürnberg NLP at PsyDefDetect: Multi-Axis Voter Ensembles for Psychological Defence Mechanism Classification</title><link>http://arxiv.org/abs/2605.07606v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07606v1</guid><description>Philipp Steigerwald et al. — arxiv:2605.07606 — NLP</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Data Contamination in Neural Hieroglyphic Translation: A Reproducibility Study</title><link>http://arxiv.org/abs/2605.07453v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07453v1</guid><description>Ammar Toutou et al. — arxiv:2605.07453 — NLP</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>SSP-based construction of evaluation-annotated data for fine-grained aspect-based sentiment analysis</title><link>http://arxiv.org/abs/2605.07446v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07446v1</guid><description>Suwon Choi et al. — arxiv:2605.07446 — NLP</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>The Proxy Presumption: From Semantic Embeddings to Valid Social Measures</title><link>http://arxiv.org/abs/2605.07409v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07409v1</guid><description>Baishi Li et al. — arxiv:2605.07409 — NLP</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Zero-Shot Neural Network Evaluation with Sample-Wise Activation Patterns</title><link>http://arxiv.org/abs/2605.07378v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07378v1</guid><description>Yameng Peng et al. — arxiv:2605.07378 — NLP</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>LLMs Improving LLMs: Agentic Discovery for Test-Time Scaling</title><link>http://arxiv.org/abs/2605.08083v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08083v1</guid><description>Tong Zheng et al. — arxiv:2605.08083 — LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>VecCISC: Improving Confidence-Informed Self-Consistency with Reasoning Trace Clustering and Candidate Answer Selection</title><link>http://arxiv.org/abs/2605.08070v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08070v1</guid><description>James Petullo et al. — arxiv:2605.08070 — LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Empirical Bayes Rebiasing</title><link>http://arxiv.org/abs/2605.08069v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08069v1</guid><description>Wanyi Ling et al. — arxiv:2605.08069 — LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Flow-OPD: On-Policy Distillation for Flow Matching Models</title><link>http://arxiv.org/abs/2605.08063v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08063v1</guid><description>Zhen Fang et al. — arxiv:2605.08063 — LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Rubric-Grounded RL: Structured Judge Rewards for Generalizable Reasoning</title><link>http://arxiv.org/abs/2605.08061v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08061v1</guid><description>Manish Bhattarai et al. — arxiv:2605.08061 — LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>The Memory Curse: How Expanded Recall Erodes Cooperative Intent in LLM Agents</title><link>http://arxiv.org/abs/2605.08060v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08060v1</guid><description>Jiayuan Liu et al. — arxiv:2605.08060 — LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>CA-SQL: Complexity-Aware Inference Time Reasoning for Text-to-SQL via Exploration and Compute Budget Allocation</title><link>http://arxiv.org/abs/2605.08057v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08057v1</guid><description>James Petullo et al. — arxiv:2605.08057 — LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Towards Highly-Constrained Human Motion Generation with Retrieval-Guided Diffusion Noise Optimization</title><link>http://arxiv.org/abs/2605.08054v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08054v1</guid><description>Hanchao Liu et al. — arxiv:2605.08054 — LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Uncertainty-Aware Structured Data Extraction from Full CMR Reports via Distilled LLMs</title><link>http://arxiv.org/abs/2605.08045v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08045v1</guid><description>Yi Yu et al. — arxiv:2605.08045 — LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>ECNUClaw: A Learner-Profiled Intelligent Study Companion Framework for K-12 Personalized Education</title><link>http://arxiv.org/abs/2605.08040v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08040v1</guid><description>Yizhou Zhou et al. — arxiv:2605.08040 — LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>LLMs Improving LLMs: Agentic Discovery for Test-Time Scaling</title><link>http://arxiv.org/abs/2605.08083v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08083v1</guid><description>Tong Zheng et al. — arxiv:2605.08083 — LLM Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>The Memory Curse: How Expanded Recall Erodes Cooperative Intent in LLM Agents</title><link>http://arxiv.org/abs/2605.08060v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08060v1</guid><description>Jiayuan Liu et al. — arxiv:2605.08060 — LLM Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Towards Highly-Constrained Human Motion Generation with Retrieval-Guided Diffusion Noise Optimization</title><link>http://arxiv.org/abs/2605.08054v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08054v1</guid><description>Hanchao Liu et al. — arxiv:2605.08054 — LLM Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Reason to Play: Behavioral and Brain Alignment Between Frontier LRMs and Human Game Learners</title><link>http://arxiv.org/abs/2605.08019v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08019v1</guid><description>Botos Csaba et al. — arxiv:2605.08019 — LLM Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Collaborator or Assistnat? How AI Coding Agents Partition Work Across Pull Request Lifecycles</title><link>http://arxiv.org/abs/2605.08017v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08017v1</guid><description>Young Jo et al. — arxiv:2605.08017 — LLM Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Learning CLI Agents with Structured Action Credit under Selective Observation</title><link>http://arxiv.org/abs/2605.08013v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08013v1</guid><description>Haoyang Su et al. — arxiv:2605.08013 — LLM Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Interpreting Reinforcement Learning Agents with Susceptibilities</title><link>http://arxiv.org/abs/2605.08007v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08007v1</guid><description>Chris Elliott et al. — arxiv:2605.08007 — LLM Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Tool Calling is Linearly Readable and Steerable in Language Models</title><link>http://arxiv.org/abs/2605.07990v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07990v1</guid><description>Zekun Wu et al. — arxiv:2605.07990 — LLM Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Graph Representation Learning Augmented Model Manipulation on Federated Fine-Tuning of LLMs</title><link>http://arxiv.org/abs/2605.07961v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07961v1</guid><description>Hanlin Cai et al. — arxiv:2605.07961 — LLM Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Exploring a Virtual Pet to Provide Context Notifications in a Tourism Recommender System: a Pilot Study</title><link>http://arxiv.org/abs/2605.07960v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07960v1</guid><description>Patrícia Alves et al. — arxiv:2605.07960 — LLM Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>The Memory Curse: How Expanded Recall Erodes Cooperative Intent in LLM Agents</title><link>http://arxiv.org/abs/2605.08060v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08060v1</guid><description>Jiayuan Liu et al. — arxiv:2605.08060 — Multi-Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Reason to Play: Behavioral and Brain Alignment Between Frontier LRMs and Human Game Learners</title><link>http://arxiv.org/abs/2605.08019v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08019v1</guid><description>Botos Csaba et al. — arxiv:2605.08019 — Multi-Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Learning CLI Agents with Structured Action Credit under Selective Observation</title><link>http://arxiv.org/abs/2605.08013v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08013v1</guid><description>Haoyang Su et al. — arxiv:2605.08013 — Multi-Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Tool Calling is Linearly Readable and Steerable in Language Models</title><link>http://arxiv.org/abs/2605.07990v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07990v1</guid><description>Zekun Wu et al. — arxiv:2605.07990 — Multi-Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Exploring a Virtual Pet to Provide Context Notifications in a Tourism Recommender System: a Pilot Study</title><link>http://arxiv.org/abs/2605.07960v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07960v1</guid><description>Patrícia Alves et al. — arxiv:2605.07960 — Multi-Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>TraceFix: Repairing Agent Coordination Protocols with TLA+ Counterexamples</title><link>http://arxiv.org/abs/2605.07935v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07935v1</guid><description>Shuren Xia et al. — arxiv:2605.07935 — Multi-Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Many-to-Many Multi-Agent Pickup and Delivery</title><link>http://arxiv.org/abs/2605.07835v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07835v1</guid><description>Ethan Schneider et al. — arxiv:2605.07835 — Multi-Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>SCENE: Recognizing Social Norms and Sanctioning in Group Chats</title><link>http://arxiv.org/abs/2605.07823v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07823v1</guid><description>Mateusz Jacniacki et al. — arxiv:2605.07823 — Multi-Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Is a team only as strong as its weakest link? Quantifying the short-board effect with AI Agents</title><link>http://arxiv.org/abs/2605.07773v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07773v1</guid><description>Xin Xu et al. — arxiv:2605.07773 — Multi-Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Alternating Target-Path Planning for Scalable Multi-Agent Coordination</title><link>http://arxiv.org/abs/2605.07744v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07744v1</guid><description>Yu Kumagai et al. — arxiv:2605.07744 — Multi-Agent</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>FAVOR: Efficient Filter-Agnostic Vector ANNS Based on Selectivity-Aware Exclusion Distances</title><link>http://arxiv.org/abs/2605.07770v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07770v1</guid><description>Junjie Song et al. — arxiv:2605.07770 — RAG</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Characterizing and Mitigating False-Positive Bug Reports in the Linux Kernel</title><link>http://arxiv.org/abs/2605.07678v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07678v1</guid><description>Jiashuo Tian et al. — arxiv:2605.07678 — RAG</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Intent-Driven Semantic ID Generation for Grounded Conversational News Recommendation</title><link>http://arxiv.org/abs/2605.07613v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07613v1</guid><description>Hongyang Su et al. — arxiv:2605.07613 — RAG</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>LARAG: Link-Aware Retrieval Strategy for RAG Systems in Hyperlinked Technical Documentation</title><link>http://arxiv.org/abs/2605.07517v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07517v1</guid><description>Giorgia Bolognesi et al. — arxiv:2605.07517 — RAG</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>CSR: Infinite-Horizon Real-Time Policies with Massive Cached State Representations</title><link>http://arxiv.org/abs/2605.07325v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07325v1</guid><description>Robin Karlsson et al. — arxiv:2605.07325 — RAG</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>BioProVLA-Agent: An Affordable, Protocol-Driven, Vision-Enhanced VLA-Enabled Embodied Multi-Agent System with Closed-Loop-Capable Reasoning for Biological Laboratory Manipulation</title><link>http://arxiv.org/abs/2605.07306v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07306v1</guid><description>Zhaohui Du et al. — arxiv:2605.07306 — RAG</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>From Clouds to Hallucinations: Atmospheric Retrieval Hijacking in Remote Sensing Vision-Language RAG</title><link>http://arxiv.org/abs/2605.07273v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07273v1</guid><description>Jiaju Han et al. — arxiv:2605.07273 — RAG</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>MLAIRE: Multilingual Language-Aware Information Retrieval Evaluation Protocal</title><link>http://arxiv.org/abs/2605.07249v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07249v1</guid><description>Youngjoon Jang et al. — arxiv:2605.07249 — RAG</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Topic Is Not Agenda: A Citation-Community Audit of Text Embeddings</title><link>http://arxiv.org/abs/2605.07158v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07158v1</guid><description>Junseon Yoo et al. — arxiv:2605.07158 — RAG</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>From Standard English to Singlish: A Retrieval-Augmented Approach for Code-Switched Creole Generation in Large Language Models</title><link>http://arxiv.org/abs/2605.07132v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07132v1</guid><description>Foong Ming Lai et al. — arxiv:2605.07132 — RAG</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>The Memory Curse: How Expanded Recall Erodes Cooperative Intent in LLM Agents</title><link>http://arxiv.org/abs/2605.08060v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08060v1</guid><description>Jiayuan Liu et al. — arxiv:2605.08060 — Reasoning</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>CA-SQL: Complexity-Aware Inference Time Reasoning for Text-to-SQL via Exploration and Compute Budget Allocation</title><link>http://arxiv.org/abs/2605.08057v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08057v1</guid><description>James Petullo et al. — arxiv:2605.08057 — Reasoning</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Reason to Play: Behavioral and Brain Alignment Between Frontier LRMs and Human Game Learners</title><link>http://arxiv.org/abs/2605.08019v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08019v1</guid><description>Botos Csaba et al. — arxiv:2605.08019 — Reasoning</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Abductive Reasoning with Probabilistic Commonsense</title><link>http://arxiv.org/abs/2605.08011v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08011v1</guid><description>Joseph Cotnareanu et al. — arxiv:2605.08011 — Reasoning</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Similar Pattern Annotation via Retrieval Knowledge for LLM-Based Test Code Fault Localization</title><link>http://arxiv.org/abs/2605.07957v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07957v1</guid><description>Golnaz Gharachorlu et al. — arxiv:2605.07957 — Reasoning</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>CoCoReviewBench: A Completeness- and Correctness-Oriented Benchmark for AI Reviewers</title><link>http://arxiv.org/abs/2605.07905v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07905v1</guid><description>Hexuan Deng et al. — arxiv:2605.07905 — Reasoning</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Melding LLM and temporal logic for reliable human-swarm collaboration in complex scenarios</title><link>http://arxiv.org/abs/2605.07877v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07877v1</guid><description>Junfeng Chen et al. — arxiv:2605.07877 — Reasoning</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Video Understanding Reward Modeling: A Robust Benchmark and Performant Reward Models</title><link>http://arxiv.org/abs/2605.07872v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07872v1</guid><description>Yuancheng Wei et al. — arxiv:2605.07872 — Reasoning</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Prune-OPD: Efficient and Reliable On-Policy Distillation for Long-Horizon Reasoning</title><link>http://arxiv.org/abs/2605.07804v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07804v1</guid><description>Zhicheng Yang et al. — arxiv:2605.07804 — Reasoning</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Tracing Uncertainty in Language Model &quot;Reasoning&quot;</title><link>http://arxiv.org/abs/2605.07776v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07776v1</guid><description>Nils Grünefeld et al. — arxiv:2605.07776 — Reasoning</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>AgentEscapeBench: Evaluating Out-of-Domain Tool-Grounded Reasoning in LLM Agents</title><link>http://arxiv.org/abs/2605.07926v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07926v1</guid><description>Zhengkang Guo et al. — arxiv:2605.07926 — Tool Use</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>SARC: A Governance-by-Architecture Framework for Agentic AI Systems</title><link>http://arxiv.org/abs/2605.07728v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07728v1</guid><description>Gaston Besanson et al. — arxiv:2605.07728 — Tool Use</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>InterLV-Search: Benchmarking Interleaved Multimodal Agentic Search</title><link>http://arxiv.org/abs/2605.07510v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07510v1</guid><description>Bohan Hou et al. — arxiv:2605.07510 — Tool Use</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>FlightSense: An End-to-End MLOps Platform for Real-Time Flight Delay Prediction via Rotation-Chain Propagation Features and Agentic Conversational AI</title><link>http://arxiv.org/abs/2605.07364v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07364v1</guid><description>Aditi J. Shelke et al. — arxiv:2605.07364 — Tool Use</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Signal Reshaping for GRPO in Weak-Feedback Agentic Code Repair</title><link>http://arxiv.org/abs/2605.07276v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07276v1</guid><description>Jia Li et al. — arxiv:2605.07276 — Tool Use</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>MIPIAD: Multilingual Indirect Prompt Injection Attack Defense with Qwen -- TF-IDF Hybrid and Meta-Ensemble Learning</title><link>http://arxiv.org/abs/2605.07269v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07269v1</guid><description>Al Muhit Muhtadi et al. — arxiv:2605.07269 — Tool Use</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Can Agents Price a Reaction? Evaluating LLMs on Chemical Cost Reasoning</title><link>http://arxiv.org/abs/2605.07251v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07251v1</guid><description>Yuyang Wu et al. — arxiv:2605.07251 — Tool Use</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>HyperEyes: Dual-Grained Efficiency-Aware Reinforcement Learning for Parallel Multimodal Search Agents</title><link>http://arxiv.org/abs/2605.07177v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07177v1</guid><description>Guankai Li et al. — arxiv:2605.07177 — Tool Use</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Switchcraft: AI Model Router for Agentic Tool Calling</title><link>http://arxiv.org/abs/2605.07112v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07112v1</guid><description>Sharad Agarwal et al. — arxiv:2605.07112 — Tool Use</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Proxy3D: Efficient 3D Representations for Vision-Language Models via Semantic Clustering and Alignment</title><link>http://arxiv.org/abs/2605.08064v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08064v1</guid><description>Jerry Jiang et al. — arxiv:2605.08064 — Multimodal LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Object Hallucination-Free Reinforcement Unlearning for Vision-Language Models</title><link>http://arxiv.org/abs/2605.08031v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08031v1</guid><description>Kaidi Jia et al. — arxiv:2605.08031 — Multimodal LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>STARFlow2: Bridging Language Models and Normalizing Flows for Unified Multimodal Generation</title><link>http://arxiv.org/abs/2605.08029v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08029v1</guid><description>Ying Shen et al. — arxiv:2605.08029 — Multimodal LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>SphereVAD: Training-Free Video Anomaly Detection via Geodesic Inference on the Unit Hypersphere</title><link>http://arxiv.org/abs/2605.08003v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08003v1</guid><description>Chao Huang et al. — arxiv:2605.08003 — Multimodal LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>MedVIGIL: Evaluating Trustworthy Medical VLMs Under Broken Visual Evidence</title><link>http://arxiv.org/abs/2605.07919v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07919v1</guid><description>Hanqi Jiang et al. — arxiv:2605.07919 — Multimodal LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Anisotropic Modality Align</title><link>http://arxiv.org/abs/2605.07825v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07825v1</guid><description>Xiaomin Yu et al. — arxiv:2605.07825 — Multimodal LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>GazeVLM: Active Vision via Internal Attention Control for Multimodal Reasoning</title><link>http://arxiv.org/abs/2605.07817v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07817v1</guid><description>Brown Ebouky et al. — arxiv:2605.07817 — Multimodal LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>SARA: Semantically Adaptive Relational Alignment for Video Diffusion Models</title><link>http://arxiv.org/abs/2605.07800v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07800v1</guid><description>Jiesong Lian et al. — arxiv:2605.07800 — Multimodal LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>RuleSafe-VL: Evaluating Rule-Conditioned Decision Reasoning in Vision-Language Content Moderation</title><link>http://arxiv.org/abs/2605.07760v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07760v1</guid><description>Zhifeng Lu et al. — arxiv:2605.07760 — Multimodal LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Operating Within the Operational Design Domain: Zero-Shot Perception with Vision-Language Models</title><link>http://arxiv.org/abs/2605.07649v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07649v1</guid><description>Berkehan Ünal et al. — arxiv:2605.07649 — Multimodal LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Learning CLI Agents with Structured Action Credit under Selective Observation</title><link>http://arxiv.org/abs/2605.08013v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08013v1</guid><description>Haoyang Su et al. — arxiv:2605.08013 — Long Context</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Ask Early, Ask Late, Ask Right: When Does Clarification Timing Matter for Long-Horizon Agents?</title><link>http://arxiv.org/abs/2605.07937v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07937v1</guid><description>Anmol Gulati et al. — arxiv:2605.07937 — Long Context</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>What if AI systems weren&apos;t chatbots?</title><link>http://arxiv.org/abs/2605.07896v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07896v1</guid><description>Sourojit Ghosh et al. — arxiv:2605.07896 — Long Context</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Melding LLM and temporal logic for reliable human-swarm collaboration in complex scenarios</title><link>http://arxiv.org/abs/2605.07877v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07877v1</guid><description>Junfeng Chen et al. — arxiv:2605.07877 — Long Context</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Prune-OPD: Efficient and Reliable On-Policy Distillation for Long-Horizon Reasoning</title><link>http://arxiv.org/abs/2605.07804v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07804v1</guid><description>Zhicheng Yang et al. — arxiv:2605.07804 — Long Context</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>An Efficient Hybrid Sparse Attention with CPU-GPU Parallelism for Long-Context Inference</title><link>http://arxiv.org/abs/2605.07719v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07719v1</guid><description>Feiyu Yao et al. — arxiv:2605.07719 — Long Context</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Spatiotemporal Trust Evaluation for Collaborator Selection via Customized GNN-Mamba</title><link>http://arxiv.org/abs/2605.07658v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07658v1</guid><description>Botao Zhu et al. — arxiv:2605.07658 — Long Context</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>HexiSeq: Accommodating Long Context Training of LLMs over Heterogeneous Hardware</title><link>http://arxiv.org/abs/2605.07569v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07569v1</guid><description>Yan Liang et al. — arxiv:2605.07569 — Long Context</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>EditTransfer++: Toward Faithful and Efficient Visual-Prompt-Guided Image Editing</title><link>http://arxiv.org/abs/2605.07455v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07455v1</guid><description>Lan Chen et al. — arxiv:2605.07455 — Long Context</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>RcLLM: Accelerating Generative Recommendation via Beyond-Prefix KV Caching</title><link>http://arxiv.org/abs/2605.07443v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07443v1</guid><description>Zhan Zhao et al. — arxiv:2605.07443 — Long Context</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>The Memory Curse: How Expanded Recall Erodes Cooperative Intent in LLM Agents</title><link>http://arxiv.org/abs/2605.08060v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08060v1</guid><description>Jiayuan Liu et al. — arxiv:2605.08060 — LLM Efficiency</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Convergent Stochastic Training of Attention and Understanding LoRA</title><link>http://arxiv.org/abs/2605.07959v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07959v1</guid><description>Zhengkai Sun et al. — arxiv:2605.07959 — LLM Efficiency</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Energy-Resolved Quantum Geometry from Středa Response: Driven-Dissipative Bosonic Lattices and Disordered Systems</title><link>http://arxiv.org/abs/2605.07948v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07948v1</guid><description>Anaïs Defossez et al. — arxiv:2605.07948 — LLM Efficiency</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>A Fully Tunable Ultra-Low Power Current-Mode Memory Cell in Standard CMOS Technology</title><link>http://arxiv.org/abs/2605.07936v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07936v1</guid><description>Arthur Fyon et al. — arxiv:2605.07936 — LLM Efficiency</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>One Token Per Frame: Reconsidering Visual Bandwidth in World Models for VLA Policy</title><link>http://arxiv.org/abs/2605.07931v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07931v1</guid><description>Zuojin Tang et al. — arxiv:2605.07931 — LLM Efficiency</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>BeeVe: Unsupervised Acoustic State Discovery in Honey Bee Buzzing</title><link>http://arxiv.org/abs/2605.07903v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07903v1</guid><description>Hamze Hammami et al. — arxiv:2605.07903 — LLM Efficiency</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Bulk-mediated reflection of chirality-protected surface spin waves</title><link>http://arxiv.org/abs/2605.07875v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07875v1</guid><description>Vitaliy I. Vasyuchka et al. — arxiv:2605.07875 — LLM Efficiency</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>MatryoshkaLoRA: Learning Accurate Hierarchical Low-Rank Representations for LLM Fine-Tuning</title><link>http://arxiv.org/abs/2605.07850v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07850v1</guid><description>Ionut-Vlad Modoranu et al. — arxiv:2605.07850 — LLM Efficiency</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Measuring and Mitigating the Distributional Gap Between Real and Simulated User Behaviors</title><link>http://arxiv.org/abs/2605.07847v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07847v1</guid><description>Shuhaib Mehri et al. — arxiv:2605.07847 — LLM Efficiency</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Approximation-Free Differentiable Oblique Decision Trees</title><link>http://arxiv.org/abs/2605.07837v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07837v1</guid><description>Subrat Prasad Panda et al. — arxiv:2605.07837 — LLM Efficiency</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Beyond Pairs: Your Language Model is Secretly Optimizing a Preference Graph</title><link>http://arxiv.org/abs/2605.08037v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08037v1</guid><description>Ning Liu et al. — arxiv:2605.08037 — Alignment</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Interpreting Reinforcement Learning Agents with Susceptibilities</title><link>http://arxiv.org/abs/2605.08007v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08007v1</guid><description>Chris Elliott et al. — arxiv:2605.08007 — Alignment</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Diffusion-APO: Trajectory-Aware Direct Preference Alignment for Video Diffusion Transformers</title><link>http://arxiv.org/abs/2605.07503v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07503v1</guid><description>Jingyuan Zhu et al. — arxiv:2605.07503 — Alignment</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Topology-Enhanced Alignment for Large Language Models: Trajectory Topology Loss and Topological Preference Optimization</title><link>http://arxiv.org/abs/2605.07172v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07172v1</guid><description>Yurui Pan et al. — arxiv:2605.07172 — Alignment</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Dr. Post-Training: A Data Regularization Perspective on LLM Post-Training</title><link>http://arxiv.org/abs/2605.07063v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07063v1</guid><description>Pingbang Hu et al. — arxiv:2605.07063 — Alignment</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>VecCISC: Improving Confidence-Informed Self-Consistency with Reasoning Trace Clustering and Candidate Answer Selection</title><link>http://arxiv.org/abs/2605.08070v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08070v1</guid><description>James Petullo et al. — arxiv:2605.08070 — Hallucination</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Object Hallucination-Free Reinforcement Unlearning for Vision-Language Models</title><link>http://arxiv.org/abs/2605.08031v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08031v1</guid><description>Kaidi Jia et al. — arxiv:2605.08031 — Hallucination</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Position: Mechanistic Interpretability Must Disclose Identification Assumptions for Causal Claims</title><link>http://arxiv.org/abs/2605.08012v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08012v1</guid><description>Zezheng Lin et al. — arxiv:2605.08012 — Hallucination</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Delta-Adapter: Scalable Exemplar-Based Image Editing with Single-Pair Supervision</title><link>http://arxiv.org/abs/2605.07940v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07940v1</guid><description>Jiacheng Chen et al. — arxiv:2605.07940 — Hallucination</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>CoCoReviewBench: A Completeness- and Correctness-Oriented Benchmark for AI Reviewers</title><link>http://arxiv.org/abs/2605.07905v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07905v1</guid><description>Hexuan Deng et al. — arxiv:2605.07905 — Hallucination</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Measuring and Mitigating the Distributional Gap Between Real and Simulated User Behaviors</title><link>http://arxiv.org/abs/2605.07847v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07847v1</guid><description>Shuhaib Mehri et al. — arxiv:2605.07847 — Hallucination</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>GazeVLM: Active Vision via Internal Attention Control for Multimodal Reasoning</title><link>http://arxiv.org/abs/2605.07817v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07817v1</guid><description>Brown Ebouky et al. — arxiv:2605.07817 — Hallucination</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Beam-Aware Radio Map Estimation With Physics-Consistent Parametric Modeling for Unknown Multiple Satellites</title><link>http://arxiv.org/abs/2605.07763v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07763v1</guid><description>Xiucheng Wang et al. — arxiv:2605.07763 — Hallucination</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Securing the Dark Matter: A Semantic-Enhanced Neuro-Symbolic Framework for Supply Chain Analysis of Opaque Industrial Software</title><link>http://arxiv.org/abs/2605.07737v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07737v1</guid><description>Bowei Ning et al. — arxiv:2605.07737 — Hallucination</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>LLM hallucinations in the wild: Large-scale evidence from non-existent citations</title><link>http://arxiv.org/abs/2605.07723v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07723v1</guid><description>Zhenyue Zhao et al. — arxiv:2605.07723 — Hallucination</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>GLiGuard: Schema-Conditioned Classification for LLM Safeguard</title><link>http://arxiv.org/abs/2605.07982v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07982v1</guid><description>Urchade Zaratiana et al. — arxiv:2605.07982 — LLM Safety</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Fortifying Time Series: DTW-Certified Robust Anomaly Detection</title><link>http://arxiv.org/abs/2605.07690v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07690v1</guid><description>Shijie Liu et al. — arxiv:2605.07690 — LLM Safety</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Beyond Defenses: Manifold-Aligned Regularization for Intrinsic 3D Point Cloud Robustness</title><link>http://arxiv.org/abs/2605.07590v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07590v1</guid><description>Pedro Alonso et al. — arxiv:2605.07590 — LLM Safety</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Uncovering Hidden Systematics in Neural Network Models for High Energy Physics</title><link>http://arxiv.org/abs/2605.07470v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07470v1</guid><description>Lucie Flek et al. — arxiv:2605.07470 — LLM Safety</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Sparse Autoencoders as Plug-and-Play Firewalls for Adversarial Attack Detection in VLMs</title><link>http://arxiv.org/abs/2605.07447v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07447v1</guid><description>Hao Wang et al. — arxiv:2605.07447 — LLM Safety</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>OrchJail: Jailbreaking Tool-Calling Text-to-Image Agents by Orchestration-Guided Fuzzing</title><link>http://arxiv.org/abs/2605.07414v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07414v1</guid><description>Jianming Chen et al. — arxiv:2605.07414 — LLM Safety</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>GPO-V: Jailbreak Diffusion Vision Language Model by Global Probability Optimization</title><link>http://arxiv.org/abs/2605.07399v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07399v1</guid><description>Yu Pan et al. — arxiv:2605.07399 — LLM Safety</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Hard to Read, Easy to Jailbreak: How Visual Degradation Bypasses MLLM Safety Alignment</title><link>http://arxiv.org/abs/2605.07250v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07250v1</guid><description>Zhixue Song et al. — arxiv:2605.07250 — LLM Safety</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Rubric-Grounded RL: Structured Judge Rewards for Generalizable Reasoning</title><link>http://arxiv.org/abs/2605.08061v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08061v1</guid><description>Manish Bhattarai et al. — arxiv:2605.08061 — LLM Evaluation</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Abductive Reasoning with Probabilistic Commonsense</title><link>http://arxiv.org/abs/2605.08011v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08011v1</guid><description>Joseph Cotnareanu et al. — arxiv:2605.08011 — LLM Evaluation</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Asymptotically Log-Optimal Bayes-Assisted Confidence Sequences for Bounded Means</title><link>http://arxiv.org/abs/2605.07964v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07964v1</guid><description>Valentin Kilian et al. — arxiv:2605.07964 — LLM Evaluation</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>DRIP-R: A Benchmark for Decision-Making and Reasoning Under Real-World Policy Ambiguity in the Retail Domain</title><link>http://arxiv.org/abs/2605.07699v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07699v1</guid><description>Hsuvas Borkakoty et al. — arxiv:2605.07699 — LLM Evaluation</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>FactoryBench: Evaluating Industrial Machine Understanding</title><link>http://arxiv.org/abs/2605.07675v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07675v1</guid><description>Yanis Merzouki et al. — arxiv:2605.07675 — LLM Evaluation</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>MAVEN: Multi-Agent Verification-Elaboration Network with In-Step Epistemic Auditing</title><link>http://arxiv.org/abs/2605.07646v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07646v1</guid><description>Yinsheng Yao et al. — arxiv:2605.07646 — LLM Evaluation</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Efficient Data Selection for Multimodal Models via Incremental Optimization Utility</title><link>http://arxiv.org/abs/2605.07488v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07488v1</guid><description>Jinhao Jing et al. — arxiv:2605.07488 — LLM Evaluation</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>GameGen-Verifier: Parallel Keypoint-Based Verification for LLM-Generated Games via Runtime State Injection</title><link>http://arxiv.org/abs/2605.07442v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07442v1</guid><description>Chaobo Jia et al. — arxiv:2605.07442 — LLM Evaluation</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Unsolvability Ceiling in Multi-LLM Routing: An Empirical Study of Evaluation Artifacts</title><link>http://arxiv.org/abs/2605.07395v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07395v1</guid><description>Saloni Garg et al. — arxiv:2605.07395 — LLM Evaluation</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Can Agents Price a Reaction? Evaluating LLMs on Chemical Cost Reasoning</title><link>http://arxiv.org/abs/2605.07251v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07251v1</guid><description>Yuyang Wu et al. — arxiv:2605.07251 — LLM Evaluation</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Beyond Pairs: Your Language Model is Secretly Optimizing a Preference Graph</title><link>http://arxiv.org/abs/2605.08037v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08037v1</guid><description>Ning Liu et al. — arxiv:2605.08037 — Code LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>SimCT: Recovering Lost Supervision for Cross-Tokenizer On-Policy Distillation</title><link>http://arxiv.org/abs/2605.07711v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07711v1</guid><description>Jie Sun et al. — arxiv:2605.07711 — Code LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Can LLMs Solve Science or Just Write Code? Evaluating Quantum Solver Generation</title><link>http://arxiv.org/abs/2605.07525v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07525v1</guid><description>Luciano Baresi et al. — arxiv:2605.07525 — Code LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>GameGen-Verifier: Parallel Keypoint-Based Verification for LLM-Generated Games via Runtime State Injection</title><link>http://arxiv.org/abs/2605.07442v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07442v1</guid><description>Chaobo Jia et al. — arxiv:2605.07442 — Code LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Mean-Pooled Cosine Similarity is Not Length-Invariant: Theory and Cross-Domain Evidence for a Length-Invariant Alternative</title><link>http://arxiv.org/abs/2605.07345v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07345v1</guid><description>Sibayan Mitra et al. — arxiv:2605.07345 — Code LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Mage: Multi-Axis Evaluation of LLM-Generated Executable Game Scenes Beyond Compile-Pass Rate</title><link>http://arxiv.org/abs/2605.07342v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07342v1</guid><description>Hugh Xuechen Liu et al. — arxiv:2605.07342 — Code LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>PaT: Planning-after-Trial for Efficient Test-Time Code Generation</title><link>http://arxiv.org/abs/2605.07248v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07248v1</guid><description>Youngsik Yoon et al. — arxiv:2605.07248 — Code LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Coupling Models for One-Step Discrete Generation</title><link>http://arxiv.org/abs/2605.07193v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07193v1</guid><description>Fred Zhangzhi Peng et al. — arxiv:2605.07193 — Code LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>RepoZero: Can LLMs Generate a Code Repository from Scratch?</title><link>http://arxiv.org/abs/2605.07122v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07122v1</guid><description>Zhaoxi Zhang et al. — arxiv:2605.07122 — Code LLM</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>MedAction: Towards Active Multi-turn Clinical Diagnostic LLMs</title><link>http://arxiv.org/abs/2605.07305v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07305v1</guid><description>Hsin-Ling Hsu et al. — arxiv:2605.07305 — Medical NLP</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Medical NLP</category></item><item><title>MedExAgent: Training LLM Agents to Ask, Examine, and Diagnose in Noisy Clinical Environments</title><link>http://arxiv.org/abs/2605.07058v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07058v1</guid><description>Yicheng Gao et al. — arxiv:2605.07058 — Medical NLP</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Medical NLP</category></item><item><title>Boosting Automatic Java-to-Cangjie Translation with Multi-Stage LLM Training and Error Repair</title><link>http://arxiv.org/abs/2605.07403v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07403v1</guid><description>Xinyue Liang et al. — arxiv:2605.07403 — Multilingual NLP</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Mean-Pooled Cosine Similarity is Not Length-Invariant: Theory and Cross-Domain Evidence for a Length-Invariant Alternative</title><link>http://arxiv.org/abs/2605.07345v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07345v1</guid><description>Sibayan Mitra et al. — arxiv:2605.07345 — Multilingual NLP</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>MIPIAD: Multilingual Indirect Prompt Injection Attack Defense with Qwen -- TF-IDF Hybrid and Meta-Ensemble Learning</title><link>http://arxiv.org/abs/2605.07269v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07269v1</guid><description>Al Muhit Muhtadi et al. — arxiv:2605.07269 — Multilingual NLP</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>MLAIRE: Multilingual Language-Aware Information Retrieval Evaluation Protocal</title><link>http://arxiv.org/abs/2605.07249v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07249v1</guid><description>Youngjoon Jang et al. — arxiv:2605.07249 — Multilingual NLP</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>GRaSp: Automatic Example Optimization for In-Context Learning in Low-Data Tasks</title><link>http://arxiv.org/abs/2605.07454v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07454v1</guid><description>Simen Bihaug-Frøyland et al. — arxiv:2605.07454 — Named Entity Recognition</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Named Entity Recognition</category></item><item><title>Learning CLI Agents with Structured Action Credit under Selective Observation</title><link>http://arxiv.org/abs/2605.08013v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08013v1</guid><description>Haoyang Su et al. — arxiv:2605.08013 — Information Extraction</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>TCMIIES: A Browser-Based LLM-Powered Intelligent Information Extraction System for Academic Literature</title><link>http://arxiv.org/abs/2605.07507v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07507v1</guid><description>Hanqing Zhao et al. — arxiv:2605.07507 — Information Extraction</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>CapCLIP: A Vision-Language Representation Alignment Approach for Wireless Capsule Endoscopy Analysis</title><link>http://arxiv.org/abs/2605.08493v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08493v1</guid><description>Haroon Wahab et al. — arxiv:2605.08493 — Text Classification</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>Conformal Path Reasoning: Trustworthy Knowledge Graph Question Answering via Path-Level Calibration</title><link>http://arxiv.org/abs/2605.08077v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08077v1</guid><description>Shuhang Lin et al. — arxiv:2605.08077 — Question Answering</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Proxy3D: Efficient 3D Representations for Vision-Language Models via Semantic Clustering and Alignment</title><link>http://arxiv.org/abs/2605.08064v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08064v1</guid><description>Jerry Jiang et al. — arxiv:2605.08064 — Question Answering</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>How Value Induction Reshapes LLM Behaviour</title><link>http://arxiv.org/abs/2605.07925v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07925v1</guid><description>Arnav Arora et al. — arxiv:2605.07925 — Question Answering</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Beyond GSD-as-Token: Continuous Scale Conditioning for Remote Sensing VLMs</title><link>http://arxiv.org/abs/2605.07562v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07562v1</guid><description>Song Zhang et al. — arxiv:2605.07562 — Question Answering</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>BalCapRL: A Balanced Framework for RL-Based MLLM Image Captioning</title><link>http://arxiv.org/abs/2605.07394v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07394v1</guid><description>Shaokai Ye et al. — arxiv:2605.07394 — Question Answering</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>MIPIAD: Multilingual Indirect Prompt Injection Attack Defense with Qwen -- TF-IDF Hybrid and Meta-Ensemble Learning</title><link>http://arxiv.org/abs/2605.07269v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07269v1</guid><description>Al Muhit Muhtadi et al. — arxiv:2605.07269 — Question Answering</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Beyond Reasoning: Reinforcement Learning Unlocks Parametric Knowledge in LLMs</title><link>http://arxiv.org/abs/2605.07153v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07153v1</guid><description>Wanli Yang et al. — arxiv:2605.07153 — Question Answering</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Beyond LoRA vs. Full Fine-Tuning: Gradient-Guided Optimizer Routing for LLM Adaptation</title><link>http://arxiv.org/abs/2605.07111v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07111v1</guid><description>Haozhan Tang et al. — arxiv:2605.07111 — Question Answering</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Self-Consolidating Language Models: Continual Knowledge Incorporation from Context</title><link>http://arxiv.org/abs/2605.07076v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07076v1</guid><description>Zekun Wang et al. — arxiv:2605.07076 — Question Answering</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>ModelLens: Finding the Best for Your Task from Myriads of Models</title><link>http://arxiv.org/abs/2605.07075v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07075v1</guid><description>Rui Cai et al. — arxiv:2605.07075 — Question Answering</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Hybrid TF--IDF Logistic Regression and MLP Neural Baseline for Indonesian Three-Class Sentiment Analysis on Social Media Text</title><link>http://arxiv.org/abs/2605.07793v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07793v1</guid><description>Allya Nurul Islami Pasha et al. — arxiv:2605.07793 — Sentiment Analysis</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>SSP-based construction of evaluation-annotated data for fine-grained aspect-based sentiment analysis</title><link>http://arxiv.org/abs/2605.07446v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07446v1</guid><description>Suwon Choi et al. — arxiv:2605.07446 — Sentiment Analysis</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>Conformal Path Reasoning: Trustworthy Knowledge Graph Question Answering via Path-Level Calibration</title><link>http://arxiv.org/abs/2605.08077v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.08077v1</guid><description>Shuhang Lin et al. — arxiv:2605.08077 — Knowledge Graph</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Securing the Dark Matter: A Semantic-Enhanced Neuro-Symbolic Framework for Supply Chain Analysis of Opaque Industrial Software</title><link>http://arxiv.org/abs/2605.07737v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07737v1</guid><description>Bowei Ning et al. — arxiv:2605.07737 — Knowledge Graph</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>TRACE: Tourism Recommendation with Accountable Citation Evidence</title><link>http://arxiv.org/abs/2605.07677v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07677v1</guid><description>Zixu Zhao et al. — arxiv:2605.07677 — Knowledge Graph</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Tacit Knowledge Extraction via Logic Augmented Generation and Active Inference</title><link>http://arxiv.org/abs/2605.07639v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07639v1</guid><description>Lorenzo Lamazzi et al. — arxiv:2605.07639 — Knowledge Graph</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>DCGL: Dual-Channel Graph Learning with Large Language Models for Knowledge-Aware Recommendation</title><link>http://arxiv.org/abs/2605.07314v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07314v1</guid><description>Xinchi Zou et al. — arxiv:2605.07314 — Knowledge Graph</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>MedAction: Towards Active Multi-turn Clinical Diagnostic LLMs</title><link>http://arxiv.org/abs/2605.07305v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07305v1</guid><description>Hsin-Ling Hsu et al. — arxiv:2605.07305 — Knowledge Graph</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>AdaTKG: Adaptive Memory for Temporal Knowledge Graph Reasoning</title><link>http://arxiv.org/abs/2605.07121v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07121v1</guid><description>Seunghan Lee et al. — arxiv:2605.07121 — Knowledge Graph</description><pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Who and What? Using Linguistic Features and Annotator Characteristics to Analyze Annotation Variation</title><link>http://arxiv.org/abs/2605.06318v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06318v1</guid><description>Maximilian Maurer et al. — arxiv:2605.06318 — NLP</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Systematic Evaluation of Large Language Models for Post-Discharge Clinical Action Extraction</title><link>http://arxiv.org/abs/2605.06191v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06191v1</guid><description>Shivali Dalmia et al. — arxiv:2605.06191 — NLP</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Visual Fingerprints for LLM Generation Comparison</title><link>http://arxiv.org/abs/2605.06054v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06054v1</guid><description>Amal Alnouri et al. — arxiv:2605.06054 — NLP</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>FastOmniTMAE: Parallel Clause Learning for Scalable and Hardware-Efficient Tsetlin Embeddings</title><link>http://arxiv.org/abs/2605.06982v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06982v1</guid><description>Ahmed K. Kadhim et al. — arxiv:2605.06982 — NLP</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>MultiSoc-4D: A Benchmark for Diagnosing Instruction-Induced Label Collapse in Closed-Set LLM Annotation of Bengali Social Media</title><link>http://arxiv.org/abs/2605.06940v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06940v1</guid><description>Souvik Pramanik et al. — arxiv:2605.06940 — NLP</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Reflections and New Directions for Human-Centered Large Language Models</title><link>http://arxiv.org/abs/2605.06901v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06901v1</guid><description>Caleb Ziems et al. — arxiv:2605.06901 — NLP</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>TajPersLexon: A Tajik-Persian Lexical Resource and Hybrid Model for Cross-Script Low-Resource NLP</title><link>http://arxiv.org/abs/2605.06886v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06886v1</guid><description>Mullosharaf K. Arabov et al. — arxiv:2605.06886 — NLP</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>EMO: Pretraining Mixture of Experts for Emergent Modularity</title><link>http://arxiv.org/abs/2605.06663v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06663v1</guid><description>Ryan Wang et al. — arxiv:2605.06663 — LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Verifier-Backed Hard Problem Generation for Mathematical Reasoning</title><link>http://arxiv.org/abs/2605.06660v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06660v1</guid><description>Yuhang Lai et al. — arxiv:2605.06660 — LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Why Global LLM Leaderboards Are Misleading: Small Portfolios for Heterogeneous Supervised ML</title><link>http://arxiv.org/abs/2605.06656v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06656v1</guid><description>Jai Moondra et al. — arxiv:2605.06656 — LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Optimizer-Model Consistency: Full Finetuning with the Same Optimizer as Pretraining Forgets Less</title><link>http://arxiv.org/abs/2605.06654v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06654v1</guid><description>Yuxing Liu et al. — arxiv:2605.06654 — LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>When No Benchmark Exists: Validating Comparative LLM Safety Scoring Without Ground-Truth Labels</title><link>http://arxiv.org/abs/2605.06652v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06652v1</guid><description>Sushant Gautam et al. — arxiv:2605.06652 — LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Beyond Negative Rollouts: Positive-Only Policy Optimization with Implicit Negative Gradients</title><link>http://arxiv.org/abs/2605.06650v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06650v1</guid><description>Mingwei Xu et al. — arxiv:2605.06650 — LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Superintelligent Retrieval Agent: The Next Frontier of Information Retrieval</title><link>http://arxiv.org/abs/2605.06647v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06647v1</guid><description>Zeyu Yang et al. — arxiv:2605.06647 — LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>StraTA: Incentivizing Agentic Reinforcement Learning with Strategic Trajectory Abstraction</title><link>http://arxiv.org/abs/2605.06642v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06642v1</guid><description>Xiangyuan Xue et al. — arxiv:2605.06642 — LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>GlazyBench: A Benchmark for Ceramic Glaze Property Prediction and Image Generation</title><link>http://arxiv.org/abs/2605.06641v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06641v1</guid><description>Ziyu Zhai et al. — arxiv:2605.06641 — LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Can RL Teach Long-Horizon Reasoning to LLMs? Expressiveness Is Key</title><link>http://arxiv.org/abs/2605.06638v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06638v1</guid><description>Tianle Wang et al. — arxiv:2605.06638 — LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>BAMI: Training-Free Bias Mitigation in GUI Grounding</title><link>http://arxiv.org/abs/2605.06664v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06664v1</guid><description>Borui Zhang et al. — arxiv:2605.06664 — LLM Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>AI Co-Mathematician: Accelerating Mathematicians with Agentic AI</title><link>http://arxiv.org/abs/2605.06651v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06651v1</guid><description>Daniel Zheng et al. — arxiv:2605.06651 — LLM Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Superintelligent Retrieval Agent: The Next Frontier of Information Retrieval</title><link>http://arxiv.org/abs/2605.06647v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06647v1</guid><description>Zeyu Yang et al. — arxiv:2605.06647 — LLM Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>StraTA: Incentivizing Agentic Reinforcement Learning with Strategic Trajectory Abstraction</title><link>http://arxiv.org/abs/2605.06642v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06642v1</guid><description>Xiangyuan Xue et al. — arxiv:2605.06642 — LLM Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Recursive Agent Optimization</title><link>http://arxiv.org/abs/2605.06639v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06639v1</guid><description>Apurva Gandhi et al. — arxiv:2605.06639 — LLM Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Cited but Not Verified: Parsing and Evaluating Source Attribution in LLM Deep Research Agents</title><link>http://arxiv.org/abs/2605.06635v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06635v1</guid><description>Hailey Onweller et al. — arxiv:2605.06635 — LLM Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Quantifying Trade-Offs Between Stability and Goal-Obfuscation</title><link>http://arxiv.org/abs/2605.06630v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06630v1</guid><description>Yixuan Wang et al. — arxiv:2605.06630 — LLM Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>MASPO: Joint Prompt Optimization for LLM-based Multi-Agent Systems</title><link>http://arxiv.org/abs/2605.06623v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06623v1</guid><description>Zhexuan Wang et al. — arxiv:2605.06623 — LLM Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>SkillOS: Learning Skill Curation for Self-Evolving Agents</title><link>http://arxiv.org/abs/2605.06614v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06614v1</guid><description>Siru Ouyang et al. — arxiv:2605.06614 — LLM Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>AI CFD Scientist: Toward Open-Ended Computational Fluid Dynamics Discovery with Physics-Aware AI Agents</title><link>http://arxiv.org/abs/2605.06607v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06607v1</guid><description>Nithin Somasekharan et al. — arxiv:2605.06607 — LLM Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Superintelligent Retrieval Agent: The Next Frontier of Information Retrieval</title><link>http://arxiv.org/abs/2605.06647v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06647v1</guid><description>Zeyu Yang et al. — arxiv:2605.06647 — Multi-Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>MASPO: Joint Prompt Optimization for LLM-based Multi-Agent Systems</title><link>http://arxiv.org/abs/2605.06623v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06623v1</guid><description>Zhexuan Wang et al. — arxiv:2605.06623 — Multi-Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>SkillOS: Learning Skill Curation for Self-Evolving Agents</title><link>http://arxiv.org/abs/2605.06614v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06614v1</guid><description>Siru Ouyang et al. — arxiv:2605.06614 — Multi-Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>How Many Iterations to Jailbreak? Dynamic Budget Allocation for Multi-Turn LLM Evaluation</title><link>http://arxiv.org/abs/2605.06605v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06605v1</guid><description>Shai Feldman et al. — arxiv:2605.06605 — Multi-Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Cross-Modal Navigation with Multi-Agent Reinforcement Learning</title><link>http://arxiv.org/abs/2605.06595v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06595v1</guid><description>Shuo Liu et al. — arxiv:2605.06595 — Multi-Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>NeuroAgent: LLM Agents for Multimodal Neuroimaging Analysis and Research</title><link>http://arxiv.org/abs/2605.06584v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06584v1</guid><description>Lujia Zhong et al. — arxiv:2605.06584 — Multi-Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Coordination Matters: Evaluation of Cooperative Multi-Agent Reinforcement Learning</title><link>http://arxiv.org/abs/2605.06557v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06557v1</guid><description>Maria Ana Cardei et al. — arxiv:2605.06557 — Multi-Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>ROSE: Rollout On Serving GPUs via Cooperative Elasticity for Agentic RL</title><link>http://arxiv.org/abs/2605.06534v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06534v1</guid><description>Wei Gao et al. — arxiv:2605.06534 — Multi-Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Agentic AIs Are the Missing Paradigm for Out-of-Distribution Generalization in Foundation Models</title><link>http://arxiv.org/abs/2605.06522v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06522v1</guid><description>Xin Wang et al. — arxiv:2605.06522 — Multi-Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Autonomous Adversary: Red-Teaming in the age of LLM</title><link>http://arxiv.org/abs/2605.06486v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06486v1</guid><description>Mohammad Mamun et al. — arxiv:2605.06486 — Multi-Agent</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Cited but Not Verified: Parsing and Evaluating Source Attribution in LLM Deep Research Agents</title><link>http://arxiv.org/abs/2605.06635v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06635v1</guid><description>Hailey Onweller et al. — arxiv:2605.06635 — RAG</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>How Many Iterations to Jailbreak? Dynamic Budget Allocation for Multi-Turn LLM Evaluation</title><link>http://arxiv.org/abs/2605.06605v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06605v1</guid><description>Shai Feldman et al. — arxiv:2605.06605 — RAG</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>MiA-Signature: Approximating Global Activation for Long-Context Understanding</title><link>http://arxiv.org/abs/2605.06416v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06416v1</guid><description>Yuqing Li et al. — arxiv:2605.06416 — RAG</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>GATHER: Convergence-Centric Hyper-Entity Retrieval for Zero-Shot Cell-Type Annotation</title><link>http://arxiv.org/abs/2605.06403v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06403v1</guid><description>Zhonghui Zhang et al. — arxiv:2605.06403 — RAG</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>LatentRAG: Latent Reasoning and Retrieval for Efficient Agentic RAG</title><link>http://arxiv.org/abs/2605.06285v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06285v1</guid><description>Yijia Zheng et al. — arxiv:2605.06285 — RAG</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>RobotEQ: Transitioning from Passive Intelligence to Active Intelligence in Embodied AI</title><link>http://arxiv.org/abs/2605.06234v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06234v1</guid><description>Kuofei Fang et al. — arxiv:2605.06234 — RAG</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Event-Causal RAG: A Retrieval-Augmented Generation Framework for Long Video Reasoning in Complex Scenarios</title><link>http://arxiv.org/abs/2605.06185v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06185v1</guid><description>Peizheng Yan et al. — arxiv:2605.06185 — RAG</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Retina-RAG: Retrieval-Augmented Vision-Language Modeling for Joint Retinal Diagnosis and Clinical Report Generation</title><link>http://arxiv.org/abs/2605.06173v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06173v1</guid><description>Abdelrahman Zaian et al. — arxiv:2605.06173 — RAG</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>IRC-Bench: Recognizing Entities from Contextual Cues in First-Person Reminiscences</title><link>http://arxiv.org/abs/2605.06142v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06142v1</guid><description>Yehudit Aperstein et al. — arxiv:2605.06142 — RAG</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Tatarstan Toponyms: A Bilingual Dataset and Hybrid RAG System for Geospatial Question Answering</title><link>http://arxiv.org/abs/2605.05962v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05962v1</guid><description>Mullosharaf K. Arabov et al. — arxiv:2605.05962 — RAG</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Can RL Teach Long-Horizon Reasoning to LLMs? Expressiveness Is Key</title><link>http://arxiv.org/abs/2605.06638v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06638v1</guid><description>Tianle Wang et al. — arxiv:2605.06638 — Reasoning</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>SCRuB: Social Concept Reasoning under Rubric-Based Evaluation</title><link>http://arxiv.org/abs/2605.06444v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06444v1</guid><description>Jamelle Watson-Daniels et al. — arxiv:2605.06444 — Reasoning</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>GATHER: Convergence-Centric Hyper-Entity Retrieval for Zero-Shot Cell-Type Annotation</title><link>http://arxiv.org/abs/2605.06403v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06403v1</guid><description>Zhonghui Zhang et al. — arxiv:2605.06403 — Reasoning</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Measuring Black-Box Confidence via Reasoning Trajectories: Geometry, Coverage, and Verbalization</title><link>http://arxiv.org/abs/2605.06308v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06308v1</guid><description>Marc Boubnovski Martell et al. — arxiv:2605.06308 — Reasoning</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Rethinking RL for LLM Reasoning: It&apos;s Sparse Policy Selection, Not Capability Learning</title><link>http://arxiv.org/abs/2605.06241v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06241v1</guid><description>Ömer Faruk Akgül et al. — arxiv:2605.06241 — Reasoning</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>OPSD Compresses What RLVR Teaches: A Post-RL Compaction Stage for Reasoning Models</title><link>http://arxiv.org/abs/2605.06188v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06188v1</guid><description>Jaehoon Kim et al. — arxiv:2605.06188 — Reasoning</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Teaching LLMs Program Semantics via Symbolic Execution Traces</title><link>http://arxiv.org/abs/2605.06184v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06184v1</guid><description>Jonas Bayer et al. — arxiv:2605.06184 — Reasoning</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Pest-Thinker: Learning to Think and Reason like Entomologists via Reinforcement Learning</title><link>http://arxiv.org/abs/2605.06121v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06121v1</guid><description>Xueheng Li et al. — arxiv:2605.06121 — Reasoning</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Policy-Guided Stepwise Model Routing for Cost-Effective Reasoning</title><link>http://arxiv.org/abs/2605.06116v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06116v1</guid><description>Wenwen Si et al. — arxiv:2605.06116 — Reasoning</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Novelty-based Tree-of-Thought Search for LLM Reasoning and Planning</title><link>http://arxiv.org/abs/2605.06040v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06040v1</guid><description>Leon Hamm et al. — arxiv:2605.06040 — Reasoning</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>ROSE: Rollout On Serving GPUs via Cooperative Elasticity for Agentic RL</title><link>http://arxiv.org/abs/2605.06534v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06534v1</guid><description>Wei Gao et al. — arxiv:2605.06534 — Tool Use</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>ReasonSTL: Bridging Natural Language and Signal Temporal Logic via Tool-Augmented Process-Rewarded Learning</title><link>http://arxiv.org/abs/2605.06483v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06483v1</guid><description>Bowen Ye et al. — arxiv:2605.06483 — Tool Use</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>PrefixGuard: From LLM-Agent Traces to Online Failure-Warning Monitors</title><link>http://arxiv.org/abs/2605.06455v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06455v1</guid><description>Xinmiao Huang et al. — arxiv:2605.06455 — Tool Use</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Asymmetric On-Policy Distillation: Bridging Exploitation and Imitation at the Token Level</title><link>http://arxiv.org/abs/2605.06387v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06387v1</guid><description>Nan Jia et al. — arxiv:2605.06387 — Tool Use</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>From Agent Loops to Deterministic Graphs: Execution Lineage for Reproducible AI-Native Work</title><link>http://arxiv.org/abs/2605.06365v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06365v1</guid><description>Josh Rosen et al. — arxiv:2605.06365 — Tool Use</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>MANTRA: Synthesizing SMT-Validated Compliance Benchmarks for Tool-Using LLM Agents</title><link>http://arxiv.org/abs/2605.06334v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06334v1</guid><description>Ashwani Anand et al. — arxiv:2605.06334 — Tool Use</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Teaching Thinking Models to Reason with Tools: A Full-Pipeline Recipe for Tool-Integrated Reasoning</title><link>http://arxiv.org/abs/2605.06326v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06326v1</guid><description>Qianjia Cheng et al. — arxiv:2605.06326 — Tool Use</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Safactory: A Scalable Agent Factory for Trustworthy Autonomous Intelligence</title><link>http://arxiv.org/abs/2605.06230v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06230v1</guid><description>Xinquan Chen et al. — arxiv:2605.06230 — Tool Use</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>DexSynRefine: Synthesizing and Refining Human-Object Interaction Motion for Physically Feasible Dexterous Robot Actions</title><link>http://arxiv.org/abs/2605.05925v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05925v1</guid><description>Hyesung Lee et al. — arxiv:2605.05925 — Tool Use</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>More Is Not Always Better: Cross-Component Interference in LLM Agent Scaffolding</title><link>http://arxiv.org/abs/2605.05716v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05716v1</guid><description>Ming Liu et al. — arxiv:2605.05716 — Tool Use</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Accelerating the Simulation of Ordinary Differential Equations Through Physics-Preserving Neural Networks</title><link>http://arxiv.org/abs/2605.06980v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06980v1</guid><description>Andrew Tagg et al. — arxiv:2605.06980 — Tool Use</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>MedHorizon: Towards Long-context Medical Video Understanding in the Wild</title><link>http://arxiv.org/abs/2605.06537v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06537v1</guid><description>Bodong Du et al. — arxiv:2605.06537 — Multimodal LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>GeoStack: A Framework for Quasi-Abelian Knowledge Composition in VLMs</title><link>http://arxiv.org/abs/2605.06477v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06477v1</guid><description>Pranav Mantini et al. — arxiv:2605.06477 — Multimodal LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>A Regime Theory of Controller Class Selection for LLM Action Decisions</title><link>http://arxiv.org/abs/2605.06339v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06339v1</guid><description>Zhaoyang Jiang et al. — arxiv:2605.06339 — Multimodal LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Toward Visually Realistic Simulation: A Benchmark for Evaluating Robot Manipulation in Simulation</title><link>http://arxiv.org/abs/2605.06311v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06311v1</guid><description>Yixin Zhu et al. — arxiv:2605.06311 — Multimodal LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Towards Annotation-Free Validation of MLLMs: A Vision-Language Logical Consistency Metric</title><link>http://arxiv.org/abs/2605.06201v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06201v1</guid><description>Ying Gu et al. — arxiv:2605.06201 — Multimodal LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Event-Causal RAG: A Retrieval-Augmented Generation Framework for Long Video Reasoning in Complex Scenarios</title><link>http://arxiv.org/abs/2605.06185v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06185v1</guid><description>Peizheng Yan et al. — arxiv:2605.06185 — Multimodal LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>VLA-GSE: Boosting Parameter-Efficient Fine-Tuning in VLA with Generalized and Specialized Experts</title><link>http://arxiv.org/abs/2605.06175v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06175v1</guid><description>Yuhua Jiang et al. — arxiv:2605.06175 — Multimodal LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Retina-RAG: Retrieval-Augmented Vision-Language Modeling for Joint Retinal Diagnosis and Clinical Report Generation</title><link>http://arxiv.org/abs/2605.06173v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06173v1</guid><description>Abdelrahman Zaian et al. — arxiv:2605.06173 — Multimodal LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Pest-Thinker: Learning to Think and Reason like Entomologists via Reinforcement Learning</title><link>http://arxiv.org/abs/2605.06121v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06121v1</guid><description>Xueheng Li et al. — arxiv:2605.06121 — Multimodal LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>CrossCult-KIBench: A Benchmark for Cross-Cultural Knowledge Insertion in MLLMs</title><link>http://arxiv.org/abs/2605.06115v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06115v1</guid><description>Zhen Zeng et al. — arxiv:2605.06115 — Multimodal LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Long Context Pre-Training with Lighthouse Attention</title><link>http://arxiv.org/abs/2605.06554v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06554v1</guid><description>Bowen Peng et al. — arxiv:2605.06554 — Long Context</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>MedHorizon: Towards Long-context Medical Video Understanding in the Wild</title><link>http://arxiv.org/abs/2605.06537v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06537v1</guid><description>Bodong Du et al. — arxiv:2605.06537 — Long Context</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>STALE: Can LLM Agents Know When Their Memories Are No Longer Valid?</title><link>http://arxiv.org/abs/2605.06527v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06527v1</guid><description>Hanxiang Chao et al. — arxiv:2605.06527 — Long Context</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>MiA-Signature: Approximating Global Activation for Long-Context Understanding</title><link>http://arxiv.org/abs/2605.06416v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06416v1</guid><description>Yuqing Li et al. — arxiv:2605.06416 — Long Context</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Don&apos;t Lose Focus: Activation Steering via Key-Orthogonal Projections</title><link>http://arxiv.org/abs/2605.06342v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06342v1</guid><description>Haoyan Luo et al. — arxiv:2605.06342 — Long Context</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>CKT-WAM: Parameter-Efficient Context Knowledge Transfer Between World Action Models</title><link>http://arxiv.org/abs/2605.06247v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06247v1</guid><description>Yuhua Jiang et al. — arxiv:2605.06247 — Long Context</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>UniPrefill: Universal Long-Context Prefill Acceleration via Block-wise Dynamic Sparsification</title><link>http://arxiv.org/abs/2605.06221v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06221v1</guid><description>Qihang Fan et al. — arxiv:2605.06221 — Long Context</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>OPSD Compresses What RLVR Teaches: A Post-RL Compaction Stage for Reasoning Models</title><link>http://arxiv.org/abs/2605.06188v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06188v1</guid><description>Jaehoon Kim et al. — arxiv:2605.06188 — Long Context</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Event-Causal RAG: A Retrieval-Augmented Generation Framework for Long Video Reasoning in Complex Scenarios</title><link>http://arxiv.org/abs/2605.06185v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06185v1</guid><description>Peizheng Yan et al. — arxiv:2605.06185 — Long Context</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>MemReranker: Reasoning-Aware Reranking for Agent Memory Retrieval</title><link>http://arxiv.org/abs/2605.06132v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06132v1</guid><description>Chunyu Li et al. — arxiv:2605.06132 — Long Context</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Optimizer-Model Consistency: Full Finetuning with the Same Optimizer as Pretraining Forgets Less</title><link>http://arxiv.org/abs/2605.06654v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06654v1</guid><description>Yuxing Liu et al. — arxiv:2605.06654 — LLM Efficiency</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>LiVeAction: a Lightweight, Versatile, and Asymmetric Neural Codec Design for Real-time Operation</title><link>http://arxiv.org/abs/2605.06628v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06628v1</guid><description>Dan Jacobellis et al. — arxiv:2605.06628 — LLM Efficiency</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>PairAlign: A Framework for Sequence Tokenization via Self-Alignment with Applications to Audio Tokenization</title><link>http://arxiv.org/abs/2605.06582v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06582v1</guid><description>Adhiraj Banerjee et al. — arxiv:2605.06582 — LLM Efficiency</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>PACZero: PAC-Private Fine-Tuning of Language Models via Sign Quantization</title><link>http://arxiv.org/abs/2605.06505v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06505v1</guid><description>Murat Bilgehan Ertan et al. — arxiv:2605.06505 — LLM Efficiency</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>AgenticPrecoding: LLM-Empowered Multi-Agent System for Precoding Optimization</title><link>http://arxiv.org/abs/2605.06443v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06443v1</guid><description>Zijiu Yang et al. — arxiv:2605.06443 — LLM Efficiency</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Layer Collapse in Diffusion Language Models</title><link>http://arxiv.org/abs/2605.06366v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06366v1</guid><description>Alexander Conzelmann et al. — arxiv:2605.06366 — LLM Efficiency</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Fine-Tuning Small Language Models for Solution-Oriented Windows Event Log Analysis</title><link>http://arxiv.org/abs/2605.06330v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06330v1</guid><description>Siraaj Akhtar et al. — arxiv:2605.06330 — LLM Efficiency</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Taming the Entropy Cliff: Variable Codebook Size Quantization for Autoregressive Visual Generation</title><link>http://arxiv.org/abs/2605.06207v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06207v1</guid><description>Bowen Zheng et al. — arxiv:2605.06207 — LLM Efficiency</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Rethinking Adapter Placement: A Dominant Adaptation Module Perspective</title><link>http://arxiv.org/abs/2605.06183v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06183v1</guid><description>Suoxin Zhang et al. — arxiv:2605.06183 — LLM Efficiency</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>VLA-GSE: Boosting Parameter-Efficient Fine-Tuning in VLA with Generalized and Specialized Experts</title><link>http://arxiv.org/abs/2605.06175v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06175v1</guid><description>Yuhua Jiang et al. — arxiv:2605.06175 — LLM Efficiency</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>On the non-radial oscillations of realistic anisotropic neutron stars: Axial modes</title><link>http://arxiv.org/abs/2605.06418v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06418v1</guid><description>Jose F. Rodriguez-Ruiz et al. — arxiv:2605.06418 — LLM Efficiency</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>A Unified Pair-GRPO Family: From Implicit to Explicit Preference Constraints for Stable and General RL Alignment</title><link>http://arxiv.org/abs/2605.06375v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06375v1</guid><description>Hao Yu et al. — arxiv:2605.06375 — Alignment</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Arena as Offline Reward: Efficient Fine-Grained Preference Optimization for Diffusion Models</title><link>http://arxiv.org/abs/2605.06070v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06070v1</guid><description>Zhikai Li et al. — arxiv:2605.06070 — Alignment</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Fusion in Your Way: Aligning Image Fusion with Heterogeneous Demands via Direct Preference Optimization</title><link>http://arxiv.org/abs/2605.06049v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06049v1</guid><description>Weijian Su et al. — arxiv:2605.06049 — Alignment</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Optimal Transport for LLM Reward Modeling from Noisy Preference</title><link>http://arxiv.org/abs/2605.06036v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06036v1</guid><description>Licheng Pan et al. — arxiv:2605.06036 — Alignment</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>PREFER: Personalized Review Summarization with Online Preference Learning</title><link>http://arxiv.org/abs/2605.05911v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05911v1</guid><description>Millend Roy et al. — arxiv:2605.05911 — Alignment</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>RVPO: Risk-Sensitive Alignment via Variance Regularization</title><link>http://arxiv.org/abs/2605.05750v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05750v1</guid><description>Ivan Montero et al. — arxiv:2605.05750 — Alignment</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Dual-Agent Co-Training for Health Coaching via Implicit Adversarial Preference Optimization</title><link>http://arxiv.org/abs/2605.07011v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07011v1</guid><description>Da Long et al. — arxiv:2605.07011 — Alignment</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>$f$-Divergence Regularized RLHF: Two Tales of Sampling and Unified Analyses</title><link>http://arxiv.org/abs/2605.06977v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06977v1</guid><description>Di Wu et al. — arxiv:2605.06977 — Alignment</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Multi-Objective Constraint Inference using Inverse reinforcement learning</title><link>http://arxiv.org/abs/2605.06951v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06951v1</guid><description>Syed Ihtesham Hussain Shah et al. — arxiv:2605.06951 — Alignment</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Mitigating Cognitive Bias in RLHF by Altering Rationality</title><link>http://arxiv.org/abs/2605.06895v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06895v1</guid><description>Tiffany Horter et al. — arxiv:2605.06895 — Alignment</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>How to Compress KV Cache in RL Post-Training? Shadow Mask Distillation for Memory-Efficient Alignment</title><link>http://arxiv.org/abs/2605.06850v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06850v1</guid><description>Rui Zhu et al. — arxiv:2605.06850 — Alignment</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Cited but Not Verified: Parsing and Evaluating Source Attribution in LLM Deep Research Agents</title><link>http://arxiv.org/abs/2605.06635v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06635v1</guid><description>Hailey Onweller et al. — arxiv:2605.06635 — Hallucination</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>How Many Iterations to Jailbreak? Dynamic Budget Allocation for Multi-Turn LLM Evaluation</title><link>http://arxiv.org/abs/2605.06605v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06605v1</guid><description>Shai Feldman et al. — arxiv:2605.06605 — Hallucination</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Automated Clinical Report Generation for Remote Cognitive Remediation: Comparing Knowledge-Engineered Templates and LLMs in Low-Resource Settings</title><link>http://arxiv.org/abs/2605.06594v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06594v1</guid><description>Yongxin Zhou et al. — arxiv:2605.06594 — Hallucination</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Towards Metric-Faithful Neural Graph Matching</title><link>http://arxiv.org/abs/2605.06588v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06588v1</guid><description>Jyotirmaya Shivottam et al. — arxiv:2605.06588 — Hallucination</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>An algebraic model for rational ultracommutative rings</title><link>http://arxiv.org/abs/2605.06515v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06515v1</guid><description>William Balderrama et al. — arxiv:2605.06515 — Hallucination</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Hyperbolic Concept Bottleneck Models</title><link>http://arxiv.org/abs/2605.06440v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06440v1</guid><description>Daniel Uyterlinde et al. — arxiv:2605.06440 — Hallucination</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>FRInGe: Distribution-Space Integrated Gradients with Fisher--Rao Geometry</title><link>http://arxiv.org/abs/2605.06404v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06404v1</guid><description>Gabriele Martino et al. — arxiv:2605.06404 — Hallucination</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>SwiftI2V: Efficient High-Resolution Image-to-Video Generation via Conditional Segment-wise Generation</title><link>http://arxiv.org/abs/2605.06356v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06356v1</guid><description>YaoYang Liu et al. — arxiv:2605.06356 — Hallucination</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>PACE: Prune-And-Compress Ensemble Models</title><link>http://arxiv.org/abs/2605.06278v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06278v1</guid><description>Fabian Akkerman et al. — arxiv:2605.06278 — Hallucination</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>A Comparative Study of Mass Extraction Schemes and $π^\pm-ρ^\pm$ Mixing</title><link>http://arxiv.org/abs/2605.06271v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06271v1</guid><description>Ziyue Wang et al. — arxiv:2605.06271 — Hallucination</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>When No Benchmark Exists: Validating Comparative LLM Safety Scoring Without Ground-Truth Labels</title><link>http://arxiv.org/abs/2605.06652v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06652v1</guid><description>Sushant Gautam et al. — arxiv:2605.06652 — LLM Safety</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>How Many Iterations to Jailbreak? Dynamic Budget Allocation for Multi-Turn LLM Evaluation</title><link>http://arxiv.org/abs/2605.06605v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06605v1</guid><description>Shai Feldman et al. — arxiv:2605.06605 — LLM Safety</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Autonomous Adversary: Red-Teaming in the age of LLM</title><link>http://arxiv.org/abs/2605.06486v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06486v1</guid><description>Mohammad Mamun et al. — arxiv:2605.06486 — LLM Safety</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Memory Efficient Full-gradient Attacks (MEFA) Framework for Adversarial Defense Evaluations</title><link>http://arxiv.org/abs/2605.06357v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06357v1</guid><description>Yuan Du et al. — arxiv:2605.06357 — LLM Safety</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Beyond Accuracy: Policy Invariance as a Reliability Test for LLM Safety Judges</title><link>http://arxiv.org/abs/2605.06161v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06161v1</guid><description>Shihao Weng et al. — arxiv:2605.06161 — LLM Safety</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Lightweight Stylistic Consistency Profiling: Robust Detection of LLM-Generated Textual Content for Multimedia Moderation</title><link>http://arxiv.org/abs/2605.05950v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05950v1</guid><description>Siyuan Li et al. — arxiv:2605.05950 — LLM Safety</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>LoopTrap: Termination Poisoning Attacks on LLM Agents</title><link>http://arxiv.org/abs/2605.05846v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05846v1</guid><description>Huiyu Xu et al. — arxiv:2605.05846 — LLM Safety</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Conceal, Reconstruct, Jailbreak: Exploiting the Reconstruction-Concealment Tradeoff in MLLMs</title><link>http://arxiv.org/abs/2605.05709v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05709v1</guid><description>Md Farhamdur Reza et al. — arxiv:2605.05709 — LLM Safety</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>DataDignity: Training Data Attribution for Large Language Models</title><link>http://arxiv.org/abs/2605.05687v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05687v1</guid><description>Xiaomin Li et al. — arxiv:2605.05687 — LLM Safety</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>PersonaTeaming: Supporting Persona-Driven Red-Teaming for Generative AI</title><link>http://arxiv.org/abs/2605.05682v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05682v1</guid><description>Wesley Hanwen Deng et al. — arxiv:2605.05682 — LLM Safety</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>A Systematic Investigation of The RL-Jailbreaker in LLMs</title><link>http://arxiv.org/abs/2605.07032v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07032v1</guid><description>Montaser Mohammedalamen et al. — arxiv:2605.07032 — LLM Safety</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>MIND: Monge Inception Distance for Generative Models Evaluation</title><link>http://arxiv.org/abs/2605.06797v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06797v1</guid><description>Quentin Berthet et al. — arxiv:2605.06797 — LLM Safety</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Cited but Not Verified: Parsing and Evaluating Source Attribution in LLM Deep Research Agents</title><link>http://arxiv.org/abs/2605.06635v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06635v1</guid><description>Hailey Onweller et al. — arxiv:2605.06635 — LLM Evaluation</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>How Many Iterations to Jailbreak? Dynamic Budget Allocation for Multi-Turn LLM Evaluation</title><link>http://arxiv.org/abs/2605.06605v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06605v1</guid><description>Shai Feldman et al. — arxiv:2605.06605 — LLM Evaluation</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Autonomous Adversary: Red-Teaming in the age of LLM</title><link>http://arxiv.org/abs/2605.06486v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06486v1</guid><description>Mohammad Mamun et al. — arxiv:2605.06486 — LLM Evaluation</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>PrefixGuard: From LLM-Agent Traces to Online Failure-Warning Monitors</title><link>http://arxiv.org/abs/2605.06455v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06455v1</guid><description>Xinmiao Huang et al. — arxiv:2605.06455 — LLM Evaluation</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>SCRuB: Social Concept Reasoning under Rubric-Based Evaluation</title><link>http://arxiv.org/abs/2605.06444v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06444v1</guid><description>Jamelle Watson-Daniels et al. — arxiv:2605.06444 — LLM Evaluation</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>MANTRA: Synthesizing SMT-Validated Compliance Benchmarks for Tool-Using LLM Agents</title><link>http://arxiv.org/abs/2605.06334v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06334v1</guid><description>Ashwani Anand et al. — arxiv:2605.06334 — LLM Evaluation</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Measuring Evaluation-Context Divergence in Open-Weight LLMs: A Paired-Prompt Protocol with Pilot Evidence of Alignment-Pipeline-Specific Heterogeneity</title><link>http://arxiv.org/abs/2605.06327v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06327v1</guid><description>Florian A. D. Burnat et al. — arxiv:2605.06327 — LLM Evaluation</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Quantifying the Statistical Effect of Rubric Modifications on Human-Autorater Agreement</title><link>http://arxiv.org/abs/2605.06283v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06283v1</guid><description>Jessica Huynh et al. — arxiv:2605.06283 — LLM Evaluation</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Joint Consistency: A Unified Test-Time Aggregation Framework via Energy Minimization</title><link>http://arxiv.org/abs/2605.06219v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06219v1</guid><description>Yunzhen Yao et al. — arxiv:2605.06219 — LLM Evaluation</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Beyond Accuracy: Policy Invariance as a Reliability Test for LLM Safety Judges</title><link>http://arxiv.org/abs/2605.06161v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06161v1</guid><description>Shihao Weng et al. — arxiv:2605.06161 — LLM Evaluation</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>To What Extent Does Agent-generated Code Require Maintenance? An Empirical Study</title><link>http://arxiv.org/abs/2605.06464v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06464v1</guid><description>Shota Sawada et al. — arxiv:2605.06464 — Code LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Constraint Decay: The Fragility of LLM Agents in Backend Code Generation</title><link>http://arxiv.org/abs/2605.06445v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06445v1</guid><description>Francesco Dente et al. — arxiv:2605.06445 — Code LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>AgenticPrecoding: LLM-Empowered Multi-Agent System for Precoding Optimization</title><link>http://arxiv.org/abs/2605.06443v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06443v1</guid><description>Zijiu Yang et al. — arxiv:2605.06443 — Code LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Rethinking Adapter Placement: A Dominant Adaptation Module Perspective</title><link>http://arxiv.org/abs/2605.06183v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06183v1</guid><description>Suoxin Zhang et al. — arxiv:2605.06183 — Code LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Schedule-and-Calibrate: Utility-Guided Multi-Task Reinforcement Learning for Code LLMs</title><link>http://arxiv.org/abs/2605.06111v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06111v1</guid><description>Yujia Chen et al. — arxiv:2605.06111 — Code LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>FalconGEMM: Surpassing Hardware Peaks with Lower-Complexity Matrix Multiplication</title><link>http://arxiv.org/abs/2605.06057v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06057v1</guid><description>Honglin Zhu et al. — arxiv:2605.06057 — Code LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Evaluating Non-English Developer Support in Machine Learning for Software Engineering</title><link>http://arxiv.org/abs/2605.05902v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05902v1</guid><description>Jonathan Katzy et al. — arxiv:2605.05902 — Code LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>On Fixing Insecure AI-Generated Code through Model Fine-Tuning and Prompting Strategies</title><link>http://arxiv.org/abs/2605.05867v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05867v1</guid><description>Ali Soltanian Fard Jahromi et al. — arxiv:2605.05867 — Code LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>CircuitFormer: A Circuit Language Model for Analog Topology Design from Natural Language Prompt</title><link>http://arxiv.org/abs/2605.05773v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05773v1</guid><description>Md Touhidul Islam et al. — arxiv:2605.05773 — Code LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Retrieval-Conditioned Topology Selection with Provable Budget Conservation for Multi-Agent Code Generation</title><link>http://arxiv.org/abs/2605.05657v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05657v1</guid><description>Abhijit Talluri et al. — arxiv:2605.05657 — Code LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Delulu: A Verified Multi-Lingual Benchmark for Code Hallucination Detection in Fill-in-the-Middle Tasks</title><link>http://arxiv.org/abs/2605.07024v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07024v1</guid><description>Mahdi Erfanian et al. — arxiv:2605.07024 — Code LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Minimizing Modality Gap from the Input Side: Your Speech LLM Can Be a Prosody-Aware Text LLM</title><link>http://arxiv.org/abs/2605.05927v2</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05927v2</guid><description>Wenqian Cui et al. — arxiv:2605.05927 — Speech LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Speech LLM</category></item><item><title>VITA-QinYu: Expressive Spoken Language Model for Role-Playing and Singing</title><link>http://arxiv.org/abs/2605.06765v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06765v1</guid><description>Jiacheng Xu et al. — arxiv:2605.06765 — Speech LLM</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Speech LLM</category></item><item><title>Systematic Evaluation of Large Language Models for Post-Discharge Clinical Action Extraction</title><link>http://arxiv.org/abs/2605.06191v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06191v1</guid><description>Shivali Dalmia et al. — arxiv:2605.06191 — Medical NLP</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Medical NLP</category></item><item><title>Decodable but Not Corrected by Fixed Residual-Stream Linear Steering: Evidence from Medical LLM Failure Regimes</title><link>http://arxiv.org/abs/2605.05715v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05715v1</guid><description>Ming Liu et al. — arxiv:2605.05715 — Medical NLP</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Medical NLP</category></item><item><title>In Data or Invisible: Toward a Better Digital Representation of Low-Resource Languages with Knowledge Graphs</title><link>http://arxiv.org/abs/2605.05931v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05931v1</guid><description>Ndeye-Emilie Mbengue et al. — arxiv:2605.05931 — Multilingual NLP</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Which Are the Low-Resource Languages of the Semantic Web?</title><link>http://arxiv.org/abs/2605.05929v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05929v1</guid><description>Ndeye-Emilie Mbengue et al. — arxiv:2605.05929 — Multilingual NLP</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Understanding Cross-Language Transfer Improvements in Low-Resource HTR: The Role of Sequence Modeling</title><link>http://arxiv.org/abs/2605.05900v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05900v1</guid><description>Sana Al-azzawi et al. — arxiv:2605.05900 — Multilingual NLP</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>X-Voice: Enabling Everyone to Speak 30 Languages via Zero-Shot Cross-Lingual Voice Cloning</title><link>http://arxiv.org/abs/2605.05611v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05611v1</guid><description>Rixi Xu et al. — arxiv:2605.05611 — Multilingual NLP</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Delulu: A Verified Multi-Lingual Benchmark for Code Hallucination Detection in Fill-in-the-Middle Tasks</title><link>http://arxiv.org/abs/2605.07024v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07024v1</guid><description>Mahdi Erfanian et al. — arxiv:2605.07024 — Multilingual NLP</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>MultiSoc-4D: A Benchmark for Diagnosing Instruction-Induced Label Collapse in Closed-Set LLM Annotation of Bengali Social Media</title><link>http://arxiv.org/abs/2605.06940v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06940v1</guid><description>Souvik Pramanik et al. — arxiv:2605.06940 — Multilingual NLP</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>IRC-Bench: Recognizing Entities from Contextual Cues in First-Person Reminiscences</title><link>http://arxiv.org/abs/2605.06142v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06142v1</guid><description>Yehudit Aperstein et al. — arxiv:2605.06142 — Named Entity Recognition</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Named Entity Recognition</category></item><item><title>Inductive Power Grid Cascading Failure Analysis with GRU-Gated Graph Attention</title><link>http://arxiv.org/abs/2605.07010v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.07010v1</guid><description>Tianxin Zhou et al. — arxiv:2605.07010 — Information Extraction</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>Superintelligent Retrieval Agent: The Next Frontier of Information Retrieval</title><link>http://arxiv.org/abs/2605.06647v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06647v1</guid><description>Zeyu Yang et al. — arxiv:2605.06647 — Question Answering</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Task-Aware Answer Preservation under Audio Compression for Large Audio Language Models</title><link>http://arxiv.org/abs/2605.06631v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06631v1</guid><description>Amir Ivry et al. — arxiv:2605.06631 — Question Answering</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Rethinking Vacuity for OOD Detection in Evidential Deep Learning</title><link>http://arxiv.org/abs/2605.06382v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06382v1</guid><description>Claire McNamara et al. — arxiv:2605.06382 — Question Answering</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>LatentRAG: Latent Reasoning and Retrieval for Efficient Agentic RAG</title><link>http://arxiv.org/abs/2605.06285v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06285v1</guid><description>Yijia Zheng et al. — arxiv:2605.06285 — Question Answering</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Towards Self-Explainable Document Visual Question Answering with Chain-of-Explanation Predictions</title><link>http://arxiv.org/abs/2605.06058v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06058v1</guid><description>Kjetil Indrehus et al. — arxiv:2605.06058 — Question Answering</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Multi-agent decision making: A Blackwell&apos;s informativeness approach</title><link>http://arxiv.org/abs/2605.06028v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06028v1</guid><description>Zheng Zhang et al. — arxiv:2605.06028 — Question Answering</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Training Transformers for KV Cache Compressibility</title><link>http://arxiv.org/abs/2605.05971v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05971v1</guid><description>Yoav Gelberg et al. — arxiv:2605.05971 — Question Answering</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Tatarstan Toponyms: A Bilingual Dataset and Hybrid RAG System for Geospatial Question Answering</title><link>http://arxiv.org/abs/2605.05962v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05962v1</guid><description>Mullosharaf K. Arabov et al. — arxiv:2605.05962 — Question Answering</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Hallucination as an Anomaly: Dynamic Intervention via Probabilistic Circuits</title><link>http://arxiv.org/abs/2605.05953v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05953v1</guid><description>Erik Nielsen et al. — arxiv:2605.05953 — Question Answering</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>ICU-Bench:Benchmarking Continual Unlearning in Multimodal Large Language Models</title><link>http://arxiv.org/abs/2605.05938v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05938v1</guid><description>Yuhang Wang et al. — arxiv:2605.05938 — Question Answering</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Are We Making Progress in Multimodal Domain Generalization? A Comprehensive Benchmark Study</title><link>http://arxiv.org/abs/2605.06643v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06643v1</guid><description>Hao Dong et al. — arxiv:2605.06643 — Sentiment Analysis</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>AffectGPT-RL: Revealing Roles of Reinforcement Learning in Open-Vocabulary Emotion Recognition</title><link>http://arxiv.org/abs/2605.06126v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06126v1</guid><description>Zheng Lian et al. — arxiv:2605.06126 — Sentiment Analysis</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>Knowledge Graphs, the Missing Link in Agentic AI-based Formal Verification</title><link>http://arxiv.org/abs/2605.06434v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06434v1</guid><description>Vaisakh Naduvodi Viswambharan et al. — arxiv:2605.06434 — Knowledge Graph</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>GATHER: Convergence-Centric Hyper-Entity Retrieval for Zero-Shot Cell-Type Annotation</title><link>http://arxiv.org/abs/2605.06403v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06403v1</guid><description>Zhonghui Zhang et al. — arxiv:2605.06403 — Knowledge Graph</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Event-Causal RAG: A Retrieval-Augmented Generation Framework for Long Video Reasoning in Complex Scenarios</title><link>http://arxiv.org/abs/2605.06185v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06185v1</guid><description>Peizheng Yan et al. — arxiv:2605.06185 — Knowledge Graph</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Graphlets as Building Blocks for Structural Vocabulary in Knowledge Graph Foundation Models</title><link>http://arxiv.org/abs/2605.06154v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.06154v1</guid><description>Kossi Amouzouvi et al. — arxiv:2605.06154 — Knowledge Graph</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>In Data or Invisible: Toward a Better Digital Representation of Low-Resource Languages with Knowledge Graphs</title><link>http://arxiv.org/abs/2605.05931v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05931v1</guid><description>Ndeye-Emilie Mbengue et al. — arxiv:2605.05931 — Knowledge Graph</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Which Are the Low-Resource Languages of the Semantic Web?</title><link>http://arxiv.org/abs/2605.05929v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05929v1</guid><description>Ndeye-Emilie Mbengue et al. — arxiv:2605.05929 — Knowledge Graph</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Knowledge-Graph Paths as Intermediate Supervision for Self-Evolving Search Agents</title><link>http://arxiv.org/abs/2605.05702v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05702v1</guid><description>Huyu Wu et al. — arxiv:2605.05702 — Knowledge Graph</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>SPARK: Self-Play with Asymmetric Reward from Knowledge Graphs</title><link>http://arxiv.org/abs/2605.05546v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05546v1</guid><description>Hyobin Park et al. — arxiv:2605.05546 — Knowledge Graph</description><pubDate>Thu, 07 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Superposition Is Not Necessary: A Mechanistic Interpretability Analysis of Transformer Representations for Time Series Forecasting</title><link>http://arxiv.org/abs/2605.05151v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05151v1</guid><description>Alper Yıldırım et al. — arxiv:2605.05151 — NLP</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>TabEmbed: Benchmarking and Learning Generalist Embeddings for Tabular Understanding</title><link>http://arxiv.org/abs/2605.04962v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04962v1</guid><description>Minjie Qiang et al. — arxiv:2605.04962 — NLP</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Measuring Psychological States Through Semantic Projection: A Theory-Driven Approach to Language-Based Assessment</title><link>http://arxiv.org/abs/2605.04873v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04873v1</guid><description>Maria Luongo et al. — arxiv:2605.04873 — NLP</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Distributed Energy System Design including Unbalanced AC Power Flow for Large LV Networks with ADMM</title><link>http://arxiv.org/abs/2605.04746v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04746v1</guid><description>Robert Steven et al. — arxiv:2605.04746 — NLP</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>TajikNLP: An Open-Source Toolkit for Comprehensive Text Processing of Tajik (Cyrillic Script)</title><link>http://arxiv.org/abs/2605.04583v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04583v1</guid><description>Mullosharaf K. Arabov et al. — arxiv:2605.04583 — NLP</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>A Hybrid Method for Low-Resource Named Entity Recognition</title><link>http://arxiv.org/abs/2605.04489v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04489v1</guid><description>Do Minh Duc et al. — arxiv:2605.04489 — NLP</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Robustness of Graph Self-Supervised Learning to Real-World Noise: A Case Study on Text-Driven Biomedical Graphs</title><link>http://arxiv.org/abs/2605.05463v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05463v1</guid><description>Othmane Kabal et al. — arxiv:2605.05463 — NLP</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>D-OPSD: On-Policy Self-Distillation for Continuously Tuning Step-Distilled Diffusion Models</title><link>http://arxiv.org/abs/2605.05204v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05204v1</guid><description>Dengyang Jiang et al. — arxiv:2605.05204 — LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Almost-Orthogonality in Lp Spaces: A Case Study with Grok</title><link>http://arxiv.org/abs/2605.05192v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05192v1</guid><description>Ziang Chen et al. — arxiv:2605.05192 — LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>MRI-Eval: A Tiered Benchmark for Evaluating LLM Performance on MRI Physics and GE Scanner Operations Knowledge</title><link>http://arxiv.org/abs/2605.05175v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05175v1</guid><description>Perry E. Radau et al. — arxiv:2605.05175 — LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Design Conductor 2.0: An agent builds a TurboQuant inference accelerator in 80 hours</title><link>http://arxiv.org/abs/2605.05170v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05170v1</guid><description>The Verkor Team et al. — arxiv:2605.05170 — LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>PSK at SemEval-2026 Task 9: Multilingual Polarization Detection Using Ensemble Gemma Models with Synthetic Data Augmentation</title><link>http://arxiv.org/abs/2605.05159v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05159v1</guid><description>Srikar Kashyap Pulipaka et al. — arxiv:2605.05159 — LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Low-Cost Black-Box Detection of LLM Hallucinations via Dynamical System Prediction</title><link>http://arxiv.org/abs/2605.05134v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05134v1</guid><description>Dan Wilson et al. — arxiv:2605.05134 — LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Joint Treatment Effect Estimation from Incomplete Healthcare Data: Temporal Causal Normalizing Flows with LLM-driven Evolutionary MNAR Imputation</title><link>http://arxiv.org/abs/2605.05125v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05125v1</guid><description>Olivia Jullian Parra et al. — arxiv:2605.05125 — LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Beyond Semantics: An Evidential Reasoning-Aware Multi-View Learning Framework for Trustworthy Mental Health Prediction</title><link>http://arxiv.org/abs/2605.05121v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05121v1</guid><description>Yucheng Ruan et al. — arxiv:2605.05121 — LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>On the Hardness of Junking LLMs</title><link>http://arxiv.org/abs/2605.05116v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05116v1</guid><description>Marco Rando et al. — arxiv:2605.05116 — LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Text Corpora as Concept Fields: Black-Box Hallucination and Novelty Measurement</title><link>http://arxiv.org/abs/2605.05103v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05103v1</guid><description>Nicholas S. Kersting et al. — arxiv:2605.05103 — LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>LongSeeker: Elastic Context Orchestration for Long-Horizon Search Agents</title><link>http://arxiv.org/abs/2605.05191v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05191v1</guid><description>Yijun Lu et al. — arxiv:2605.05191 — LLM Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>OpenSearch-VL: An Open Recipe for Frontier Multimodal Search Agents</title><link>http://arxiv.org/abs/2605.05185v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05185v1</guid><description>Shuang Chen et al. — arxiv:2605.05185 — LLM Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Design Conductor 2.0: An agent builds a TurboQuant inference accelerator in 80 hours</title><link>http://arxiv.org/abs/2605.05170v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05170v1</guid><description>The Verkor Team et al. — arxiv:2605.05170 — LLM Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>PhysForge: Generating Physics-Grounded 3D Assets for Interactive Virtual World</title><link>http://arxiv.org/abs/2605.05163v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05163v1</guid><description>Yunhan Yang et al. — arxiv:2605.05163 — LLM Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Local and global optimization in Parallel Minority Games</title><link>http://arxiv.org/abs/2605.05141v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05141v1</guid><description>Soumyajyoti Biswas et al. — arxiv:2605.05141 — LLM Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Executable World Models for ARC-AGI-3 in the Era of Coding Agents</title><link>http://arxiv.org/abs/2605.05138v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05138v1</guid><description>Sergey Rodionov et al. — arxiv:2605.05138 — LLM Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>The Demand Externality of Automation</title><link>http://arxiv.org/abs/2605.05127v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05127v1</guid><description>Erhan Bayraktar et al. — arxiv:2605.05127 — LLM Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Rollout Pass-Rate Control: Steering Binary-Reward RL Toward Its Most Informative Regime</title><link>http://arxiv.org/abs/2605.05112v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05112v1</guid><description>Tianshu Zhu et al. — arxiv:2605.05112 — LLM Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Graph-SND: Sparse Aggregation for Behavioral Diversity in Multi-Agent Reinforcement Learning</title><link>http://arxiv.org/abs/2605.05020v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05020v1</guid><description>Shawn Ray et al. — arxiv:2605.05020 — LLM Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Uno-Orchestra: Parsimonious Agent Routing via Selective Delegation</title><link>http://arxiv.org/abs/2605.05007v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05007v1</guid><description>Zhiqing Cui et al. — arxiv:2605.05007 — LLM Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>OpenSearch-VL: An Open Recipe for Frontier Multimodal Search Agents</title><link>http://arxiv.org/abs/2605.05185v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05185v1</guid><description>Shuang Chen et al. — arxiv:2605.05185 — Multi-Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Design Conductor 2.0: An agent builds a TurboQuant inference accelerator in 80 hours</title><link>http://arxiv.org/abs/2605.05170v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05170v1</guid><description>The Verkor Team et al. — arxiv:2605.05170 — Multi-Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Graph-SND: Sparse Aggregation for Behavioral Diversity in Multi-Agent Reinforcement Learning</title><link>http://arxiv.org/abs/2605.05020v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05020v1</guid><description>Shawn Ray et al. — arxiv:2605.05020 — Multi-Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Uno-Orchestra: Parsimonious Agent Routing via Selective Delegation</title><link>http://arxiv.org/abs/2605.05007v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05007v1</guid><description>Zhiqing Cui et al. — arxiv:2605.05007 — Multi-Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Modular Reinforcement Learning For Cooperative Swarms</title><link>http://arxiv.org/abs/2605.04939v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04939v1</guid><description>Erel Shtossel et al. — arxiv:2605.04939 — Multi-Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Evolving Idea Graphs with Learnable Edits-and-Commits for Multi-Agent Scientific Ideation</title><link>http://arxiv.org/abs/2605.04922v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04922v1</guid><description>Jiangwen Dong et al. — arxiv:2605.04922 — Multi-Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Strat-Reasoner: Reinforcing Strategic Reasoning of LLMs in Multi-Agent Games</title><link>http://arxiv.org/abs/2605.04906v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04906v1</guid><description>Yidong He et al. — arxiv:2605.04906 — Multi-Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Storage Is Not Memory: A Retrieval-Centered Architecture for Agent Recall</title><link>http://arxiv.org/abs/2605.04897v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04897v1</guid><description>Joshua Adler et al. — arxiv:2605.04897 — Multi-Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Agentic Repository Mining: A Multi-Task Evaluation</title><link>http://arxiv.org/abs/2605.04845v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04845v1</guid><description>Johannes Härtel et al. — arxiv:2605.04845 — Multi-Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Tree-based Credit Assignment for Multi-Agent Memory System</title><link>http://arxiv.org/abs/2605.04811v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04811v1</guid><description>Marina Mao et al. — arxiv:2605.04811 — Multi-Agent</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>How Does Chunking Affect Retrieval-Augmented Code Completion? A Controlled Empirical Study</title><link>http://arxiv.org/abs/2605.04763v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04763v1</guid><description>Xinjian Wu et al. — arxiv:2605.04763 — RAG</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Graph-Augmented LLMs for Swiss MP Ideology Prediction</title><link>http://arxiv.org/abs/2605.04643v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04643v1</guid><description>Yifei Yuan et al. — arxiv:2605.04643 — RAG</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>CAR: Query-Guided Confidence-Aware Reranking for Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.04495v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04495v1</guid><description>Zhipeng Song et al. — arxiv:2605.04495 — RAG</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>DoGMaTiQ: Automated Generation of Question-and-Answer Nuggets for Report Evaluation</title><link>http://arxiv.org/abs/2605.04458v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04458v1</guid><description>Bryan Li et al. — arxiv:2605.04458 — RAG</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>EP-GRPO: Entropy-Progress Aligned Group Relative Policy Optimization with Implicit Process Guidance</title><link>http://arxiv.org/abs/2605.04960v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04960v1</guid><description>Song Yu et al. — arxiv:2605.04960 — Reasoning</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Strat-Reasoner: Reinforcing Strategic Reasoning of LLMs in Multi-Agent Games</title><link>http://arxiv.org/abs/2605.04906v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04906v1</guid><description>Yidong He et al. — arxiv:2605.04906 — Reasoning</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>VocalParse: Towards Unified and Scalable Singing Voice Transcription with Large Audio Language Models</title><link>http://arxiv.org/abs/2605.04613v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04613v1</guid><description>Yukun Chen et al. — arxiv:2605.04613 — Reasoning</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Pen-Strategist: A Reasoning Framework for Penetration Testing Strategy Formation and Analysis</title><link>http://arxiv.org/abs/2605.04499v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04499v1</guid><description>Yasod Ginige et al. — arxiv:2605.04499 — Reasoning</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>LongSeeker: Elastic Context Orchestration for Long-Horizon Search Agents</title><link>http://arxiv.org/abs/2605.05191v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05191v1</guid><description>Yijun Lu et al. — arxiv:2605.05191 — Tool Use</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Preference-Based Self-Distillation: Beyond KL Matching via Reward Regularization</title><link>http://arxiv.org/abs/2605.05040v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05040v1</guid><description>Xin Yu et al. — arxiv:2605.05040 — Tool Use</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Uno-Orchestra: Parsimonious Agent Routing via Selective Delegation</title><link>http://arxiv.org/abs/2605.05007v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05007v1</guid><description>Zhiqing Cui et al. — arxiv:2605.05007 — Tool Use</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>AgentTrust: Runtime Safety Evaluation and Interception for AI Agent Tool Use</title><link>http://arxiv.org/abs/2605.04785v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04785v1</guid><description>Chenglin Yang et al. — arxiv:2605.04785 — Tool Use</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>D-OPSD: On-Policy Self-Distillation for Continuously Tuning Step-Distilled Diffusion Models</title><link>http://arxiv.org/abs/2605.05204v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05204v1</guid><description>Dengyang Jiang et al. — arxiv:2605.05204 — Multimodal LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>PhysForge: Generating Physics-Grounded 3D Assets for Interactive Virtual World</title><link>http://arxiv.org/abs/2605.05163v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05163v1</guid><description>Yunhan Yang et al. — arxiv:2605.05163 — Multimodal LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Wasserstein-Aligned Localisation for VLM-Based Distributional OOD Detection in Medical Imaging</title><link>http://arxiv.org/abs/2605.05161v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05161v1</guid><description>Bernhard Kainz et al. — arxiv:2605.05161 — Multimodal LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Direct Product Flow Matching: Decoupling Radial and Angular Dynamics for Few-Shot Adaptation</title><link>http://arxiv.org/abs/2605.05054v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05054v1</guid><description>Hongxu Chen et al. — arxiv:2605.05054 — Multimodal LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>When Relations Break: Analyzing Relation Hallucination in Vision-Language Model Under Rotation and Noise</title><link>http://arxiv.org/abs/2605.05045v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05045v1</guid><description>Philip Wootaek Shin et al. — arxiv:2605.05045 — Multimodal LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Prompt-Anchored Vision-Text Distillation for Lifelong Person Re-identification</title><link>http://arxiv.org/abs/2605.05027v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05027v1</guid><description>Wen Wen et al. — arxiv:2605.05027 — Multimodal LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>FairEnc: A Fair Vision-Language Model with Fair Vision and Text Encoders for Glaucoma Detection</title><link>http://arxiv.org/abs/2605.04882v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04882v1</guid><description>Mohamed Elhabebe et al. — arxiv:2605.04882 — Multimodal LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Uncertainty-Aware Exploratory Direct Preference Optimization for Multimodal Large Language Models</title><link>http://arxiv.org/abs/2605.04874v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04874v1</guid><description>Huatian Zhang et al. — arxiv:2605.04874 — Multimodal LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Reward-Decomposed Reinforcement Learning for Immersive Video Role-Playing</title><link>http://arxiv.org/abs/2605.04733v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04733v1</guid><description>Miao Wang et al. — arxiv:2605.04733 — Multimodal LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Anny-Fit: All-Age Human Mesh Recovery</title><link>http://arxiv.org/abs/2605.04728v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04728v1</guid><description>Laura Bravo-Sánchez et al. — arxiv:2605.04728 — Multimodal LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Stable homotopy theory of higher categories</title><link>http://arxiv.org/abs/2605.05195v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05195v1</guid><description>Hadrian Heine et al. — arxiv:2605.05195 — Long Context</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>LongSeeker: Elastic Context Orchestration for Long-Horizon Search Agents</title><link>http://arxiv.org/abs/2605.05191v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05191v1</guid><description>Yijun Lu et al. — arxiv:2605.05191 — Long Context</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Driver-WM: A Driver-Centric Traffic-Conditioned Latent World Model for In-Cabin Dynamics Rollout</title><link>http://arxiv.org/abs/2605.05092v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05092v1</guid><description>Haozhuang Chi et al. — arxiv:2605.05092 — Long Context</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>The Impossibility Triangle of Long-Context Modeling</title><link>http://arxiv.org/abs/2605.05066v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05066v1</guid><description>Yan Zhou et al. — arxiv:2605.05066 — Long Context</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Uno-Orchestra: Parsimonious Agent Routing via Selective Delegation</title><link>http://arxiv.org/abs/2605.05007v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05007v1</guid><description>Zhiqing Cui et al. — arxiv:2605.05007 — Long Context</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>A meta-analysis of the effect of generative AI on productivity and learning in programming</title><link>http://arxiv.org/abs/2605.04779v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04779v1</guid><description>Sebastian Maier et al. — arxiv:2605.04779 — Long Context</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Spectro-Polarimetric Observations of TeV Sources (SPOTS): First results</title><link>http://arxiv.org/abs/2605.04619v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04619v1</guid><description>J. Barnard et al. — arxiv:2605.04619 — Long Context</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>SCOUT: Active Information Foraging for Long-Text Understanding with Decoupled Epistemic States</title><link>http://arxiv.org/abs/2605.04496v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04496v1</guid><description>Zhenliang Zhang et al. — arxiv:2605.04496 — Long Context</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Stream-T1: Test-Time Scaling for Streaming Video Generation</title><link>http://arxiv.org/abs/2605.04461v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04461v1</guid><description>Yijing Tu et al. — arxiv:2605.04461 — Long Context</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>How Do Ice Shelves Calve? Peridynamic Modeling of Ice Shelf Fracture Driven by Wave Erosion, Basal Melting, and Buoyancy Flexure</title><link>http://arxiv.org/abs/2605.04365v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04365v1</guid><description>Ying Song et al. — arxiv:2605.04365 — Long Context</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Dynamical correlations in a dissipative XXZ spin chain</title><link>http://arxiv.org/abs/2605.05162v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05162v1</guid><description>Cătălin Paşcu Moca et al. — arxiv:2605.05162 — LLM Efficiency</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>PSK at SemEval-2026 Task 9: Multilingual Polarization Detection Using Ensemble Gemma Models with Synthetic Data Augmentation</title><link>http://arxiv.org/abs/2605.05159v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05159v1</guid><description>Srikar Kashyap Pulipaka et al. — arxiv:2605.05159 — LLM Efficiency</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Quantum Entanglement in the Dirac Field Quantization around Charged Black Holes</title><link>http://arxiv.org/abs/2605.05143v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05143v1</guid><description>Abdessamie Chhieb et al. — arxiv:2605.05143 — LLM Efficiency</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>CapsID: Soft-Routed Variable-Length Semantic IDs for Generative Recommendation</title><link>http://arxiv.org/abs/2605.05096v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05096v1</guid><description>Wenzhuo Cheng et al. — arxiv:2605.05096 — LLM Efficiency</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Quantized Probabilistic AI for Gear Fault Diagnosis in Motor Drives</title><link>http://arxiv.org/abs/2605.05032v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05032v1</guid><description>Subham Sahoo et al. — arxiv:2605.05032 — LLM Efficiency</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>You Snooze, You Lose: Automatic Safety Alignment Restoration through Neural Weight Translation</title><link>http://arxiv.org/abs/2605.04992v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04992v1</guid><description>Marco Arazzi et al. — arxiv:2605.04992 — LLM Efficiency</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Low-Rank Adaptation of Geospatial Foundation Models for Wildfire Mapping Using Sentinel-2 Data</title><link>http://arxiv.org/abs/2605.04989v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04989v1</guid><description>Ali Shibli et al. — arxiv:2605.04989 — LLM Efficiency</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>KernelBench-X: A Comprehensive Benchmark for Evaluating LLM-Generated GPU Kernels</title><link>http://arxiv.org/abs/2605.04956v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04956v1</guid><description>Han Wang et al. — arxiv:2605.04956 — LLM Efficiency</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Adaptive Inverted-Index Routing for Granular Mixtures-of-Experts</title><link>http://arxiv.org/abs/2605.04952v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04952v1</guid><description>Klaus-Rudolf Kladny et al. — arxiv:2605.04952 — LLM Efficiency</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Adapting Large Language Models to a Low-Resource Agglutinative Language: A Comparative Study of LoRA and QLoRA for Bashkir</title><link>http://arxiv.org/abs/2605.04948v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04948v1</guid><description>Mullosharaf K. Arabov et al. — arxiv:2605.04948 — LLM Efficiency</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Preference-Based Self-Distillation: Beyond KL Matching via Reward Regularization</title><link>http://arxiv.org/abs/2605.05040v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05040v1</guid><description>Xin Yu et al. — arxiv:2605.05040 — Alignment</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Uncertainty-Aware Exploratory Direct Preference Optimization for Multimodal Large Language Models</title><link>http://arxiv.org/abs/2605.04874v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04874v1</guid><description>Huatian Zhang et al. — arxiv:2605.04874 — Alignment</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>RLearner-LLM: Balancing Logical Grounding and Fluency in Large Language Models via Hybrid Direct Preference Optimization</title><link>http://arxiv.org/abs/2605.04539v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04539v1</guid><description>Qiming Bao et al. — arxiv:2605.04539 — Alignment</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Towards General Preference Alignment: Diffusion Models at Nash Equilibrium</title><link>http://arxiv.org/abs/2605.04494v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04494v1</guid><description>Jiaming Hu et al. — arxiv:2605.04494 — Alignment</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Data-dependent Exploration for Online Reinforcement Learning from Human Feedback</title><link>http://arxiv.org/abs/2605.04477v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04477v1</guid><description>Zhen-Yu Zhang et al. — arxiv:2605.04477 — Alignment</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>LongSeeker: Elastic Context Orchestration for Long-Horizon Search Agents</title><link>http://arxiv.org/abs/2605.05191v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05191v1</guid><description>Yijun Lu et al. — arxiv:2605.05191 — Hallucination</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>The First Token Knows: Single-Decode Confidence for Hallucination Detection</title><link>http://arxiv.org/abs/2605.05166v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05166v1</guid><description>Mina Gabriel et al. — arxiv:2605.05166 — Hallucination</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Low-Cost Black-Box Detection of LLM Hallucinations via Dynamical System Prediction</title><link>http://arxiv.org/abs/2605.05134v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05134v1</guid><description>Dan Wilson et al. — arxiv:2605.05134 — Hallucination</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Text Corpora as Concept Fields: Black-Box Hallucination and Novelty Measurement</title><link>http://arxiv.org/abs/2605.05103v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05103v1</guid><description>Nicholas S. Kersting et al. — arxiv:2605.05103 — Hallucination</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Automatically Finding and Validating Unexpected Side-Effects of Interventions on Language Models</title><link>http://arxiv.org/abs/2605.05090v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05090v1</guid><description>Quintin Pope et al. — arxiv:2605.05090 — Hallucination</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>When Relations Break: Analyzing Relation Hallucination in Vision-Language Model Under Rotation and Noise</title><link>http://arxiv.org/abs/2605.05045v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05045v1</guid><description>Philip Wootaek Shin et al. — arxiv:2605.05045 — Hallucination</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Local Intrinsic Dimension Unveils Hallucinations in Diffusion Models</title><link>http://arxiv.org/abs/2605.05026v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05026v1</guid><description>Bartlomiej Sobieski et al. — arxiv:2605.05026 — Hallucination</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Detecting Hallucinations in Large Language Models via Internal Attention Divergence Signals</title><link>http://arxiv.org/abs/2605.05025v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05025v1</guid><description>Gijs van Dijk et al. — arxiv:2605.05025 — Hallucination</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Misaligned by Reward: Socially Undesirable Preferences in LLMs</title><link>http://arxiv.org/abs/2605.05003v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05003v1</guid><description>Gayane Ghazaryan et al. — arxiv:2605.05003 — Hallucination</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Self-Attention as Transport: Limits of Symmetric Spectral Diagnostics</title><link>http://arxiv.org/abs/2605.04893v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04893v1</guid><description>Dominik Dahlem et al. — arxiv:2605.04893 — Hallucination</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>On the Hardness of Junking LLMs</title><link>http://arxiv.org/abs/2605.05116v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05116v1</guid><description>Marco Rando et al. — arxiv:2605.05116 — LLM Safety</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>SoK: Robustness in Large Language Models against Jailbreak Attacks</title><link>http://arxiv.org/abs/2605.05058v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05058v1</guid><description>Feiyue Xu et al. — arxiv:2605.05058 — LLM Safety</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>DecodingTrust-Agent Platform (DTap): A Controllable and Interactive Red-Teaming Platform for AI Agents</title><link>http://arxiv.org/abs/2605.04808v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04808v1</guid><description>Zhaorun Chen et al. — arxiv:2605.04808 — LLM Safety</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Sparse Tokens Suffice: Jailbreaking Audio Language Models via Token-Aware Gradient Optimization</title><link>http://arxiv.org/abs/2605.04700v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04700v1</guid><description>Zheng Fang et al. — arxiv:2605.04700 — LLM Safety</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Physical Adversarial Clothing Evades Visible-Thermal Detectors via Non-Overlapping RGB-T Pattern</title><link>http://arxiv.org/abs/2605.04675v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04675v1</guid><description>Xiaopei Zhu et al. — arxiv:2605.04675 — LLM Safety</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Dissociating spatial frequency reliance from adversarial robustness advantages in neurally guided deep convolutional neural networks</title><link>http://arxiv.org/abs/2605.04443v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04443v1</guid><description>Zhenan Shao et al. — arxiv:2605.04443 — LLM Safety</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>MRI-Eval: A Tiered Benchmark for Evaluating LLM Performance on MRI Physics and GE Scanner Operations Knowledge</title><link>http://arxiv.org/abs/2605.05175v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05175v1</guid><description>Perry E. Radau et al. — arxiv:2605.05175 — LLM Evaluation</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Text Corpora as Concept Fields: Black-Box Hallucination and Novelty Measurement</title><link>http://arxiv.org/abs/2605.05103v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05103v1</guid><description>Nicholas S. Kersting et al. — arxiv:2605.05103 — LLM Evaluation</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>SoK: Robustness in Large Language Models against Jailbreak Attacks</title><link>http://arxiv.org/abs/2605.05058v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05058v1</guid><description>Feiyue Xu et al. — arxiv:2605.05058 — LLM Evaluation</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>BenCSSmark: Making the Social Sciences Count in LLM Research</title><link>http://arxiv.org/abs/2605.04886v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04886v1</guid><description>Arnault Chatelain et al. — arxiv:2605.04886 — LLM Evaluation</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>AgentTrust: Runtime Safety Evaluation and Interception for AI Agent Tool Use</title><link>http://arxiv.org/abs/2605.04785v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04785v1</guid><description>Chenglin Yang et al. — arxiv:2605.04785 — LLM Evaluation</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>RLearner-LLM: Balancing Logical Grounding and Fluency in Large Language Models via Hybrid Direct Preference Optimization</title><link>http://arxiv.org/abs/2605.04539v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04539v1</guid><description>Qiming Bao et al. — arxiv:2605.04539 — LLM Evaluation</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>RaguTeam at SemEval-2026 Task 8: Meno and Friends in a Judge-Orchestrated LLM Ensemble for Faithful Multi-Turn Response Generation</title><link>http://arxiv.org/abs/2605.04523v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04523v1</guid><description>Ivan Bondarenko et al. — arxiv:2605.04523 — LLM Evaluation</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>DiffCap-Bench: A Comprehensive, Challenging, Robust Benchmark for Image Difference Captioning</title><link>http://arxiv.org/abs/2605.04503v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04503v1</guid><description>Yuancheng Wei et al. — arxiv:2605.04503 — LLM Evaluation</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Architectural Constraints Alignment in AI-assisted, Platform-based Service Development</title><link>http://arxiv.org/abs/2605.04973v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04973v1</guid><description>Julius Irion et al. — arxiv:2605.04973 — Code LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Delta-Based Neural Architecture Search: LLM Fine-Tuning via Code Diffs</title><link>http://arxiv.org/abs/2605.04903v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04903v1</guid><description>Santosh Premi Adhikari et al. — arxiv:2605.04903 — Code LLM</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Implicit Representations of Grammaticality in Language Models</title><link>http://arxiv.org/abs/2605.05197v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05197v1</guid><description>Yingshan Susan Wang et al. — arxiv:2605.05197 — Multilingual NLP</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Harnessing Linguistic Dissimilarity for Language Generalization on Unseen Low-Resource Varieties</title><link>http://arxiv.org/abs/2605.04500v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04500v1</guid><description>Jinju Kim et al. — arxiv:2605.04500 — Multilingual NLP</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>A Hybrid Method for Low-Resource Named Entity Recognition</title><link>http://arxiv.org/abs/2605.04489v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04489v1</guid><description>Do Minh Duc et al. — arxiv:2605.04489 — Multilingual NLP</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>DoGMaTiQ: Automated Generation of Question-and-Answer Nuggets for Report Evaluation</title><link>http://arxiv.org/abs/2605.04458v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04458v1</guid><description>Bryan Li et al. — arxiv:2605.04458 — Multilingual NLP</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>A Hybrid Method for Low-Resource Named Entity Recognition</title><link>http://arxiv.org/abs/2605.04489v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04489v1</guid><description>Do Minh Duc et al. — arxiv:2605.04489 — Named Entity Recognition</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Named Entity Recognition</category></item><item><title>A Hybrid Method for Low-Resource Named Entity Recognition</title><link>http://arxiv.org/abs/2605.04489v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04489v1</guid><description>Do Minh Duc et al. — arxiv:2605.04489 — Information Extraction</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>The First Token Knows: Single-Decode Confidence for Hallucination Detection</title><link>http://arxiv.org/abs/2605.05166v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05166v1</guid><description>Mina Gabriel et al. — arxiv:2605.05166 — Text Classification</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>The First Token Knows: Single-Decode Confidence for Hallucination Detection</title><link>http://arxiv.org/abs/2605.05166v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05166v1</guid><description>Mina Gabriel et al. — arxiv:2605.05166 — Question Answering</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>VTAgent: Agentic Keyframe Anchoring for Evidence-Aware Video TextVQA</title><link>http://arxiv.org/abs/2605.04870v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04870v1</guid><description>Haibin He et al. — arxiv:2605.04870 — Question Answering</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Tree-based Credit Assignment for Multi-Agent Memory System</title><link>http://arxiv.org/abs/2605.04811v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04811v1</guid><description>Marina Mao et al. — arxiv:2605.04811 — Question Answering</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Information Coordination as a Bridge: A Neuro-Symbolic Architecture for Reliable Autonomous Driving Scene Understanding</title><link>http://arxiv.org/abs/2605.04475v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04475v1</guid><description>Shuo Liu et al. — arxiv:2605.04475 — Question Answering</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>KEET: Explaining Performance of GPU Kernels Using LLM Agents</title><link>http://arxiv.org/abs/2605.04467v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04467v1</guid><description>Joshua H. Davis et al. — arxiv:2605.04467 — Question Answering</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>DoGMaTiQ: Automated Generation of Question-and-Answer Nuggets for Report Evaluation</title><link>http://arxiv.org/abs/2605.04458v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04458v1</guid><description>Bryan Li et al. — arxiv:2605.04458 — Question Answering</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Misrouter: Exploiting Routing Mechanisms for Input-Only Attacks on Mixture-of-Experts LLMs</title><link>http://arxiv.org/abs/2605.04446v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04446v1</guid><description>Zekun Fei et al. — arxiv:2605.04446 — Question Answering</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Telegraph English: Semantic Prompt Compression via Structured Symbolic Rewriting</title><link>http://arxiv.org/abs/2605.04426v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04426v1</guid><description>Mikhail L. Arbuzov et al. — arxiv:2605.04426 — Question Answering</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Sentiment Analysis and Customer Satisfaction Prediction on E-Commerce Platforms Based on YouTube Comments Using the XGBoost Algorithm</title><link>http://arxiv.org/abs/2605.04887v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04887v1</guid><description>Ridho Benedictus Togi Manik et al. — arxiv:2605.04887 — Sentiment Analysis</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>Measuring Psychological States Through Semantic Projection: A Theory-Driven Approach to Language-Based Assessment</title><link>http://arxiv.org/abs/2605.04873v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04873v1</guid><description>Maria Luongo et al. — arxiv:2605.04873 — Sentiment Analysis</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>CHE-TKG: Collaborative Historical Evidence and Evolutionary Dynamics Learning for Temporal Knowledge Graph Reasoning</title><link>http://arxiv.org/abs/2605.04652v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04652v1</guid><description>Shuai-long Lei et al. — arxiv:2605.04652 — Knowledge Graph</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Graph-Augmented LLMs for Swiss MP Ideology Prediction</title><link>http://arxiv.org/abs/2605.04643v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04643v1</guid><description>Yifei Yuan et al. — arxiv:2605.04643 — Knowledge Graph</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>A Unified Benchmark for Evaluating Knowledge Graph Construction Methods and Graph Neural Networks</title><link>http://arxiv.org/abs/2605.05476v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05476v1</guid><description>Othmane Kabal et al. — arxiv:2605.05476 — Knowledge Graph</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Robustness of Graph Self-Supervised Learning to Real-World Noise: A Case Study on Text-Driven Biomedical Graphs</title><link>http://arxiv.org/abs/2605.05463v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.05463v1</guid><description>Othmane Kabal et al. — arxiv:2605.05463 — Knowledge Graph</description><pubDate>Wed, 06 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Natural Language Processing: A Comprehensive Practical Guide from Tokenisation to RLHF</title><link>http://arxiv.org/abs/2605.03799v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03799v1</guid><description>Mullosharaf K. Arabov et al. — arxiv:2605.03799 — NLP</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>SERE: Structural Example Retrieval for Enhancing LLMs in Event Causality Identification</title><link>http://arxiv.org/abs/2605.03701v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03701v1</guid><description>Zhifeng Hao et al. — arxiv:2605.03701 — NLP</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Annotation Quality in Aspect-Based Sentiment Analysis: A Case Study Comparing Experts, Students, Crowdworkers, and Large Language Model</title><link>http://arxiv.org/abs/2605.03624v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03624v1</guid><description>Niklas Donhauser et al. — arxiv:2605.03624 — NLP</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Retrieving Floods without Floodlights: Topic Models as Binary Classifiers for Extreme Climate Events in German News</title><link>http://arxiv.org/abs/2605.03450v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03450v1</guid><description>Brielen Madureira et al. — arxiv:2605.03450 — NLP</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Towards Self-Referential Analytic Assessment: A Profile-Based Approach to L2 Writing Evaluation with LLMs</title><link>http://arxiv.org/abs/2605.04298v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04298v1</guid><description>Stefano Bannò et al. — arxiv:2605.04298 — NLP</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Nsanku: Evaluating Zero-Shot Translation Performance of LLMs for Ghanaian Languages</title><link>http://arxiv.org/abs/2605.04208v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04208v1</guid><description>Stephen E. Moore et al. — arxiv:2605.04208 — NLP</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Large Language Models are Universal Reasoners for Visual Generation</title><link>http://arxiv.org/abs/2605.04040v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04040v1</guid><description>Sucheng Ren et al. — arxiv:2605.04040 — LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Safety and accuracy follow different scaling laws in clinical large language models</title><link>http://arxiv.org/abs/2605.04039v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04039v1</guid><description>Sebastian Wind et al. — arxiv:2605.04039 — LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>OpenSeeker-v2: Pushing the Limits of Search Agents with Informative and High-Difficulty Trajectories</title><link>http://arxiv.org/abs/2605.04036v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04036v1</guid><description>Yuwen Du et al. — arxiv:2605.04036 — LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Stayin&apos; Aligned Over Time: Towards Longitudinal Human-LLM Alignment via Contextual Reflection and Privacy-Preserving Behavioral Data</title><link>http://arxiv.org/abs/2605.04029v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04029v1</guid><description>Simret Araya Gebreegziabher et al. — arxiv:2605.04029 — LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>SymptomAI: Towards a Conversational AI Agent for Everyday Symptom Assessment</title><link>http://arxiv.org/abs/2605.04012v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04012v1</guid><description>Joseph Breda et al. — arxiv:2605.04012 — LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Physics-Grounded Multi-Agent Architecture for Traceable, Risk-Aware Human-AI Decision Support in Manufacturing</title><link>http://arxiv.org/abs/2605.04003v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04003v1</guid><description>Danny Hoang et al. — arxiv:2605.04003 — LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Mitigating False Positives in Static Memory Safety Analysis of Rust Programs via Reinforcement Learning</title><link>http://arxiv.org/abs/2605.04000v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04000v1</guid><description>P Akilesh et al. — arxiv:2605.04000 — LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>EQUITRIAGE: A Fairness Audit of Gender Bias in LLM-Based Emergency Department Triage</title><link>http://arxiv.org/abs/2605.03998v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03998v1</guid><description>Richard J. Young et al. — arxiv:2605.03998 — LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>From Intent to Execution: Composing Agentic Workflows with Agent Recommendation</title><link>http://arxiv.org/abs/2605.03986v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03986v1</guid><description>Kishan Athrey et al. — arxiv:2605.03986 — LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Logical Consistency as a Bridge: Improving LLM Hallucination Detection via Label Constraint Modeling between Responses and Self-Judgments</title><link>http://arxiv.org/abs/2605.03971v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03971v1</guid><description>Hao Mi et al. — arxiv:2605.03971 — LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Audio-Visual Intelligence in Large Foundation Models</title><link>http://arxiv.org/abs/2605.04045v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04045v1</guid><description>You Qin et al. — arxiv:2605.04045 — LLM Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Safety and accuracy follow different scaling laws in clinical large language models</title><link>http://arxiv.org/abs/2605.04039v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04039v1</guid><description>Sebastian Wind et al. — arxiv:2605.04039 — LLM Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Model order reduction for parametrized variational inequalities: application to crowd motion</title><link>http://arxiv.org/abs/2605.04037v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04037v1</guid><description>Giulia Sambataro et al. — arxiv:2605.04037 — LLM Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>OpenSeeker-v2: Pushing the Limits of Search Agents with Informative and High-Difficulty Trajectories</title><link>http://arxiv.org/abs/2605.04036v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04036v1</guid><description>Yuwen Du et al. — arxiv:2605.04036 — LLM Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Redefining AI Red Teaming in the Agentic Era: From Weeks to Hours</title><link>http://arxiv.org/abs/2605.04019v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04019v1</guid><description>Raja Sekhar Rao Dheekonda et al. — arxiv:2605.04019 — LLM Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Rethinking Reasoning-Intensive Retrieval: Evaluating and Advancing Retrievers in Agentic Search Systems</title><link>http://arxiv.org/abs/2605.04018v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04018v1</guid><description>Yilun Zhao et al. — arxiv:2605.04018 — LLM Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>SymptomAI: Towards a Conversational AI Agent for Everyday Symptom Assessment</title><link>http://arxiv.org/abs/2605.04012v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04012v1</guid><description>Joseph Breda et al. — arxiv:2605.04012 — LLM Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Physics-Grounded Multi-Agent Architecture for Traceable, Risk-Aware Human-AI Decision Support in Manufacturing</title><link>http://arxiv.org/abs/2605.04003v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04003v1</guid><description>Danny Hoang et al. — arxiv:2605.04003 — LLM Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Mitigating False Positives in Static Memory Safety Analysis of Rust Programs via Reinforcement Learning</title><link>http://arxiv.org/abs/2605.04000v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04000v1</guid><description>P Akilesh et al. — arxiv:2605.04000 — LLM Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>An Agent-Oriented Pluggable Experience-RAG Skill for Experience-Driven Retrieval Strategy Orchestration</title><link>http://arxiv.org/abs/2605.03989v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03989v1</guid><description>Dutao Zhang et al. — arxiv:2605.03989 — LLM Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Redefining AI Red Teaming in the Agentic Era: From Weeks to Hours</title><link>http://arxiv.org/abs/2605.04019v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04019v1</guid><description>Raja Sekhar Rao Dheekonda et al. — arxiv:2605.04019 — Multi-Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Rethinking Reasoning-Intensive Retrieval: Evaluating and Advancing Retrievers in Agentic Search Systems</title><link>http://arxiv.org/abs/2605.04018v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04018v1</guid><description>Yilun Zhao et al. — arxiv:2605.04018 — Multi-Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Physics-Grounded Multi-Agent Architecture for Traceable, Risk-Aware Human-AI Decision Support in Manufacturing</title><link>http://arxiv.org/abs/2605.04003v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04003v1</guid><description>Danny Hoang et al. — arxiv:2605.04003 — Multi-Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>An Agent-Oriented Pluggable Experience-RAG Skill for Experience-Driven Retrieval Strategy Orchestration</title><link>http://arxiv.org/abs/2605.03989v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03989v1</guid><description>Dutao Zhang et al. — arxiv:2605.03989 — Multi-Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>From Intent to Execution: Composing Agentic Workflows with Agent Recommendation</title><link>http://arxiv.org/abs/2605.03986v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03986v1</guid><description>Kishan Athrey et al. — arxiv:2605.03986 — Multi-Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Contextual Multi-Objective Optimization: Rethinking Objectives in Frontier AI Systems</title><link>http://arxiv.org/abs/2605.03900v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03900v1</guid><description>Jie Zhou et al. — arxiv:2605.03900 — Multi-Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>QKVShare: Quantized KV-Cache Handoff for Multi-Agent On-Device LLMs</title><link>http://arxiv.org/abs/2605.03884v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03884v1</guid><description>Pratik Honavar et al. — arxiv:2605.03884 — Multi-Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Mechanical Conscience: A Mathematical Framework for Dependability of Machine Intelligenc</title><link>http://arxiv.org/abs/2605.03847v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03847v1</guid><description>Munkhdegerekh Batzorig et al. — arxiv:2605.03847 — Multi-Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>SOAR: Real-Time Joint Optimization of Order Allocation and Robot Scheduling in Robotic Mobile Fulfillment Systems</title><link>http://arxiv.org/abs/2605.03842v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03842v1</guid><description>Yibang Tang et al. — arxiv:2605.03842 — Multi-Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>TRACE: A Metrologically-Grounded Engineering Framework for Trustworthy Agentic AI Systems in Operationally Critical Domains</title><link>http://arxiv.org/abs/2605.03838v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03838v1</guid><description>Serhii Zabolotnii et al. — arxiv:2605.03838 — Multi-Agent</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Safety and accuracy follow different scaling laws in clinical large language models</title><link>http://arxiv.org/abs/2605.04039v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04039v1</guid><description>Sebastian Wind et al. — arxiv:2605.04039 — RAG</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>An Agent-Oriented Pluggable Experience-RAG Skill for Experience-Driven Retrieval Strategy Orchestration</title><link>http://arxiv.org/abs/2605.03989v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03989v1</guid><description>Dutao Zhang et al. — arxiv:2605.03989 — RAG</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Beyond Rules: LLM-Powered Linting for Quantum Programs</title><link>http://arxiv.org/abs/2605.03943v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03943v1</guid><description>Pietro Cassieri et al. — arxiv:2605.03943 — RAG</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Natural Language Processing: A Comprehensive Practical Guide from Tokenisation to RLHF</title><link>http://arxiv.org/abs/2605.03799v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03799v1</guid><description>Mullosharaf K. Arabov et al. — arxiv:2605.03799 — RAG</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Enhancing Visual Question Answering with Multimodal LLMs via Chain-of-Question Guided Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.03790v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03790v1</guid><description>Quanxing Xu et al. — arxiv:2605.03790 — RAG</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Deep Graph-Language Fusion for Structure-Aware Code Generation</title><link>http://arxiv.org/abs/2605.03689v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03689v1</guid><description>Mert Tiftikci et al. — arxiv:2605.03689 — RAG</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>MEMTIER: Tiered Memory Architecture and Retrieval Bottleneck Analysis for Long-Running Autonomous AI Agents</title><link>http://arxiv.org/abs/2605.03675v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03675v1</guid><description>Bronislav Sidik et al. — arxiv:2605.03675 — RAG</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>SURE-RAG: Sufficiency and Uncertainty-Aware Evidence Verification for Selective Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.03534v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03534v1</guid><description>Jingxi Qiu et al. — arxiv:2605.03534 — RAG</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>From prompting to evidence-based translation: A RAG+prompt system for Japanese-Chinese translation and its pedagogical potential</title><link>http://arxiv.org/abs/2605.03387v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03387v1</guid><description>Wenshi Gu et al. — arxiv:2605.03387 — RAG</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>RAG over Thinking Traces Can Improve Reasoning Tasks</title><link>http://arxiv.org/abs/2605.03344v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03344v1</guid><description>Negar Arabzadeh et al. — arxiv:2605.03344 — RAG</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Large Language Models are Universal Reasoners for Visual Generation</title><link>http://arxiv.org/abs/2605.04040v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04040v1</guid><description>Sucheng Ren et al. — arxiv:2605.04040 — Reasoning</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>EQUITRIAGE: A Fairness Audit of Gender Bias in LLM-Based Emergency Department Triage</title><link>http://arxiv.org/abs/2605.03998v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03998v1</guid><description>Richard J. Young et al. — arxiv:2605.03998 — Reasoning</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Beyond Rules: LLM-Powered Linting for Quantum Programs</title><link>http://arxiv.org/abs/2605.03943v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03943v1</guid><description>Pietro Cassieri et al. — arxiv:2605.03943 — Reasoning</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Enhancing Visual Question Answering with Multimodal LLMs via Chain-of-Question Guided Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.03790v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03790v1</guid><description>Quanxing Xu et al. — arxiv:2605.03790 — Reasoning</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Say the Mission, Execute the Swarm: Agent-Enhanced LLM Reasoning in the Web-of-Drones</title><link>http://arxiv.org/abs/2605.03788v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03788v1</guid><description>Andrea Iannoli et al. — arxiv:2605.03788 — Reasoning</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Rose-SQL: Role-State Evolution Guided Structured Reasoning for Multi-Turn Text-to-SQL</title><link>http://arxiv.org/abs/2605.03720v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03720v1</guid><description>Le Zhou et al. — arxiv:2605.03720 — Reasoning</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>AgenticPosesRanker: An Agentic AI Framework for Physically Grounded Ranking of Protein-Ligand Docking Poses</title><link>http://arxiv.org/abs/2605.03707v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03707v1</guid><description>Sofiene Khiari et al. — arxiv:2605.03707 — Reasoning</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>BIT.UA-AAUBS at ArchEHR-QA 2026: Evaluating Open-Source and Proprietary LLMs via Prompting in Low-Resource QA</title><link>http://arxiv.org/abs/2605.03618v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03618v1</guid><description>Richard A. A. Jonker et al. — arxiv:2605.03618 — Reasoning</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>FinSTaR: Towards Financial Reasoning with Time Series Reasoning Models</title><link>http://arxiv.org/abs/2605.03460v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03460v1</guid><description>Seunghan Lee et al. — arxiv:2605.03460 — Reasoning</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>DGPO: Distribution Guided Policy Optimization for Fine Grained Credit Assignment</title><link>http://arxiv.org/abs/2605.03327v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03327v1</guid><description>Hongbo Jin et al. — arxiv:2605.03327 — Reasoning</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Contextual Multi-Objective Optimization: Rethinking Objectives in Frontier AI Systems</title><link>http://arxiv.org/abs/2605.03900v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03900v1</guid><description>Jie Zhou et al. — arxiv:2605.03900 — Tool Use</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>GeoDecider: A Coarse-to-Fine Agentic Workflow for Explainable Lithology Classification</title><link>http://arxiv.org/abs/2605.03383v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03383v1</guid><description>Jiahao Wang et al. — arxiv:2605.03383 — Tool Use</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>ARGUS: Defending LLM Agents Against Context-Aware Prompt Injection</title><link>http://arxiv.org/abs/2605.03378v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03378v1</guid><description>Shihao Weng et al. — arxiv:2605.03378 — Tool Use</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Revisiting the Travel Planning Capabilities of Large Language Models</title><link>http://arxiv.org/abs/2605.03308v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03308v1</guid><description>Bo-Wen Zhang et al. — arxiv:2605.03308 — Tool Use</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Enhancing Agent Safety Judgment: Controlled Benchmark Rewriting and Analogical Reasoning for Deceptive Out-of-Distribution Scenarios</title><link>http://arxiv.org/abs/2605.03242v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03242v1</guid><description>Zuoyu Zhang et al. — arxiv:2605.03242 — Tool Use</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Overview of the New Hubble Spectroscopic Legacy Archive</title><link>http://arxiv.org/abs/2605.04167v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04167v1</guid><description>Ravi Sankrit et al. — arxiv:2605.04167 — Tool Use</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Frontier Lag: A Bibliometric Audit of Capability Misrepresentation in Academic AI Evaluation</title><link>http://arxiv.org/abs/2605.04135v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04135v1</guid><description>David Gringras et al. — arxiv:2605.04135 — Tool Use</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>StateVLM: A State-Aware Vision-Language Model for Robotic Affordance Reasoning</title><link>http://arxiv.org/abs/2605.03927v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03927v1</guid><description>Xiaowen Sun et al. — arxiv:2605.03927 — Multimodal LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Quantifying the human visual exposome with vision language models</title><link>http://arxiv.org/abs/2605.03863v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03863v1</guid><description>Christian Rominger et al. — arxiv:2605.03863 — Multimodal LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>RoboAlign-R1: Distilled Multimodal Reward Alignment for Robot Video World Models</title><link>http://arxiv.org/abs/2605.03821v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03821v1</guid><description>Hao Wu et al. — arxiv:2605.03821 — Multimodal LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>ScrapMem: A Bio-inspired Framework for On-device Personalized Agent Memory via Optical Forgetting</title><link>http://arxiv.org/abs/2605.03804v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03804v1</guid><description>Jiale Chang et al. — arxiv:2605.03804 — Multimodal LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Enhancing Visual Question Answering with Multimodal LLMs via Chain-of-Question Guided Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.03790v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03790v1</guid><description>Quanxing Xu et al. — arxiv:2605.03790 — Multimodal LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>What You Think is What You See: Driving Exploration in VLM Agents via Visual-Linguistic Curiosity</title><link>http://arxiv.org/abs/2605.03782v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03782v1</guid><description>Haoxi Li et al. — arxiv:2605.03782 — Multimodal LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Before Forgetting, Learn to Remember: Revisiting Foundational Learning Failures in LVLM Unlearning Benchmarks</title><link>http://arxiv.org/abs/2605.03759v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03759v1</guid><description>JuneHyoung Kwon et al. — arxiv:2605.03759 — Multimodal LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Uni-OPD: Unifying On-Policy Distillation with a Dual-Perspective Recipe</title><link>http://arxiv.org/abs/2605.03677v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03677v1</guid><description>Wenjin Hou et al. — arxiv:2605.03677 — Multimodal LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>The Detector Teaches Itself: Lightweight Self-Supervised Adaptation for Open-Vocabulary Object Detection</title><link>http://arxiv.org/abs/2605.03642v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03642v1</guid><description>Yazhe Wan et al. — arxiv:2605.03642 — Multimodal LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Erase Persona, Forget Lore: Benchmarking Multimodal Copyright Unlearning in Large Vision Language Models</title><link>http://arxiv.org/abs/2605.03547v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03547v1</guid><description>JuneHyoung Kwon et al. — arxiv:2605.03547 — Multimodal LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Contextual Multi-Objective Optimization: Rethinking Objectives in Frontier AI Systems</title><link>http://arxiv.org/abs/2605.03900v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03900v1</guid><description>Jie Zhou et al. — arxiv:2605.03900 — Long Context</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>RoboAlign-R1: Distilled Multimodal Reward Alignment for Robot Video World Models</title><link>http://arxiv.org/abs/2605.03821v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03821v1</guid><description>Hao Wu et al. — arxiv:2605.03821 — Long Context</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Say the Mission, Execute the Swarm: Agent-Enhanced LLM Reasoning in the Web-of-Drones</title><link>http://arxiv.org/abs/2605.03788v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03788v1</guid><description>Andrea Iannoli et al. — arxiv:2605.03788 — Long Context</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Rose-SQL: Role-State Evolution Guided Structured Reasoning for Multi-Turn Text-to-SQL</title><link>http://arxiv.org/abs/2605.03720v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03720v1</guid><description>Le Zhou et al. — arxiv:2605.03720 — Long Context</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>MEMTIER: Tiered Memory Architecture and Retrieval Bottleneck Analysis for Long-Running Autonomous AI Agents</title><link>http://arxiv.org/abs/2605.03675v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03675v1</guid><description>Bronislav Sidik et al. — arxiv:2605.03675 — Long Context</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>AdapShot: Adaptive Many-Shot In-Context Learning with Semantic-Aware KV Cache Reuse</title><link>http://arxiv.org/abs/2605.03644v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03644v1</guid><description>Jie Ou et al. — arxiv:2605.03644 — Long Context</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Tutti: Making SSD-Backed KV Cache Practical for Long-Context LLM Serving</title><link>http://arxiv.org/abs/2605.03375v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03375v1</guid><description>Shi Qiu et al. — arxiv:2605.03375 — Long Context</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>MemFlow: Intent-Driven Memory Orchestration for Small Language Model Agents</title><link>http://arxiv.org/abs/2605.03312v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03312v1</guid><description>Jiayi Chen et al. — arxiv:2605.03312 — Long Context</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Revisiting the Travel Planning Capabilities of Large Language Models</title><link>http://arxiv.org/abs/2605.03308v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03308v1</guid><description>Bo-Wen Zhang et al. — arxiv:2605.03308 — Long Context</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Exploring Sustainability in Scientific Software through Code Quality &amp; Test Coverage Metrics</title><link>http://arxiv.org/abs/2605.03243v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03243v1</guid><description>Sheikh Md. Mushfiqur Rahman et al. — arxiv:2605.03243 — Long Context</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Rethinking Reasoning-Intensive Retrieval: Evaluating and Advancing Retrievers in Agentic Search Systems</title><link>http://arxiv.org/abs/2605.04018v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04018v1</guid><description>Yilun Zhao et al. — arxiv:2605.04018 — LLM Efficiency</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Nonlinear Compton scattering in a frequency-modulated field</title><link>http://arxiv.org/abs/2605.04011v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04011v1</guid><description>Antonino Di Piazza et al. — arxiv:2605.04011 — LLM Efficiency</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>RD-ViT: Recurrent-Depth Vision Transformer for Semantic Segmentation with Reduced Data Dependence Extending the Recurrent-Depth Transformer Architecture to Dense Prediction</title><link>http://arxiv.org/abs/2605.03999v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03999v1</guid><description>Renjie He et al. — arxiv:2605.03999 — LLM Efficiency</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>QKVShare: Quantized KV-Cache Handoff for Multi-Agent On-Device LLMs</title><link>http://arxiv.org/abs/2605.03884v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03884v1</guid><description>Pratik Honavar et al. — arxiv:2605.03884 — LLM Efficiency</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Path integral quantization of the electromagnetic field in nonlinear dielectric materials</title><link>http://arxiv.org/abs/2605.03836v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03836v1</guid><description>Arman Kashef et al. — arxiv:2605.03836 — LLM Efficiency</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>A density-matrix derivation of the Hartree--Fock equations in a nonorthogonal atomic-orbital basis</title><link>http://arxiv.org/abs/2605.03761v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03761v1</guid><description>Thomas Kjærgaard et al. — arxiv:2605.03761 — LLM Efficiency</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Benchmarking Parameter-Efficient Fine-Tuning of Large Language Models for Low-Resource Tajik Text Generation with the Tajik Web Corpus</title><link>http://arxiv.org/abs/2605.03742v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03742v1</guid><description>Mullosharaf K. Arabov et al. — arxiv:2605.03742 — LLM Efficiency</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Rethinking the Rank Threshold for LoRA Fine-Tuning</title><link>http://arxiv.org/abs/2605.03724v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03724v1</guid><description>Juneyoung Park et al. — arxiv:2605.03724 — LLM Efficiency</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>From Code to Prediction: Fine-Tuning LLMs for Neural Network Performance Classification in NNGPT</title><link>http://arxiv.org/abs/2605.03686v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03686v1</guid><description>Mahmoud Hanouneh et al. — arxiv:2605.03686 — LLM Efficiency</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>PriorNet: Prior-Guided Engagement Estimation from Face Video</title><link>http://arxiv.org/abs/2605.03615v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03615v1</guid><description>Alexander Vedernikov et al. — arxiv:2605.03615 — LLM Efficiency</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Stayin&apos; Aligned Over Time: Towards Longitudinal Human-LLM Alignment via Contextual Reflection and Privacy-Preserving Behavioral Data</title><link>http://arxiv.org/abs/2605.04029v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04029v1</guid><description>Simret Araya Gebreegziabher et al. — arxiv:2605.04029 — Alignment</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Natural Language Processing: A Comprehensive Practical Guide from Tokenisation to RLHF</title><link>http://arxiv.org/abs/2605.03799v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03799v1</guid><description>Mullosharaf K. Arabov et al. — arxiv:2605.03799 — Alignment</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>QUIVER: Cost-Aware Adaptive Preference Querying in Surrogate-Assisted Evolutionary Multi-Objective Optimization</title><link>http://arxiv.org/abs/2605.04267v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04267v1</guid><description>Florian A. D. Burnat et al. — arxiv:2605.04267 — Alignment</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Explaining and Preventing Alignment Collapse in Iterative RLHF</title><link>http://arxiv.org/abs/2605.04266v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04266v1</guid><description>Etienne Gauthier et al. — arxiv:2605.04266 — Alignment</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Self-Prompting Small Language Models for Privacy-Sensitive Clinical Information Extraction</title><link>http://arxiv.org/abs/2605.04221v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04221v1</guid><description>Yao-Shun Chuang et al. — arxiv:2605.04221 — Alignment</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Large Language Models are Universal Reasoners for Visual Generation</title><link>http://arxiv.org/abs/2605.04040v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04040v1</guid><description>Sucheng Ren et al. — arxiv:2605.04040 — Hallucination</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Logical Consistency as a Bridge: Improving LLM Hallucination Detection via Label Constraint Modeling between Responses and Self-Judgments</title><link>http://arxiv.org/abs/2605.03971v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03971v1</guid><description>Hao Mi et al. — arxiv:2605.03971 — Hallucination</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>An extensive theory of nonlinearly intercoupled pseudomodes for noise model reduction in circuit QED</title><link>http://arxiv.org/abs/2605.03946v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03946v1</guid><description>M. Gabriela Boada G. et al. — arxiv:2605.03946 — Hallucination</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Steer Like the LLM: Activation Steering that Mimics Prompting</title><link>http://arxiv.org/abs/2605.03907v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03907v1</guid><description>Geert Heyman et al. — arxiv:2605.03907 — Hallucination</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Deco: Extending Personal Physical Objects into Pervasive AI Companion through a Dual-Embodiment Framework</title><link>http://arxiv.org/abs/2605.03882v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03882v1</guid><description>Zhihan Jiang et al. — arxiv:2605.03882 — Hallucination</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Correct Is Not Enough: Training Reasoning Planners with Executor-Grounded Rewards</title><link>http://arxiv.org/abs/2605.03862v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03862v1</guid><description>Tianyang Han et al. — arxiv:2605.03862 — Hallucination</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>TriBench-Ko: Evaluating LLM Risks in Judicial Workflows</title><link>http://arxiv.org/abs/2605.03792v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03792v1</guid><description>Haesung Lee et al. — arxiv:2605.03792 — Hallucination</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>GeoTopoDiff: Learning Geometry--Topology Graph Priors through Boundary-Constrained Mixed Diffusion for Sparse-Slice 3D Porous Reconstruction</title><link>http://arxiv.org/abs/2605.03764v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03764v1</guid><description>Yue Shi et al. — arxiv:2605.03764 — Hallucination</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>FluxFlow: Conservative Flow-Matching for Astronomical Image Super-Resolution</title><link>http://arxiv.org/abs/2605.03749v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03749v1</guid><description>Shuhong Liu et al. — arxiv:2605.03749 — Hallucination</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>SERE: Structural Example Retrieval for Enhancing LLMs in Event Causality Identification</title><link>http://arxiv.org/abs/2605.03701v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03701v1</guid><description>Zhifeng Hao et al. — arxiv:2605.03701 — Hallucination</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Safety and accuracy follow different scaling laws in clinical large language models</title><link>http://arxiv.org/abs/2605.04039v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04039v1</guid><description>Sebastian Wind et al. — arxiv:2605.04039 — LLM Safety</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Redefining AI Red Teaming in the Agentic Era: From Weeks to Hours</title><link>http://arxiv.org/abs/2605.04019v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04019v1</guid><description>Raja Sekhar Rao Dheekonda et al. — arxiv:2605.04019 — LLM Safety</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Real-Time Evaluation of Autonomous Systems under Adversarial Attacks</title><link>http://arxiv.org/abs/2605.03491v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03491v1</guid><description>Adithya Mohan et al. — arxiv:2605.03491 — LLM Safety</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Exposing LLM Safety Gaps Through Mathematical Encoding:New Attacks and Systematic Analysis</title><link>http://arxiv.org/abs/2605.03441v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03441v1</guid><description>Haoyu Zhang et al. — arxiv:2605.03441 — LLM Safety</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>TsallisPGD: Adaptive Gradient Weighting for Adversarial Attacks on Semantic Segmentation</title><link>http://arxiv.org/abs/2605.03405v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03405v1</guid><description>Alexander Matyasko et al. — arxiv:2605.03405 — LLM Safety</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Enhancing Agent Safety Judgment: Controlled Benchmark Rewriting and Analogical Reasoning for Deceptive Out-of-Distribution Scenarios</title><link>http://arxiv.org/abs/2605.03242v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03242v1</guid><description>Zuoyu Zhang et al. — arxiv:2605.03242 — LLM Safety</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Laundering AI Authority with Adversarial Examples</title><link>http://arxiv.org/abs/2605.04261v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04261v1</guid><description>Jie Zhang et al. — arxiv:2605.04261 — LLM Safety</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>MCJudgeBench: A Benchmark for Constraint-Level Judge Evaluation in Multi-Constraint Instruction Following</title><link>http://arxiv.org/abs/2605.03858v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03858v1</guid><description>Jaeyun Lee et al. — arxiv:2605.03858 — LLM Evaluation</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Evaluating Generative Models as Interactive Emergent Representations of Human-Like Collaborative Behavior</title><link>http://arxiv.org/abs/2605.03855v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03855v1</guid><description>Shinas Shaji et al. — arxiv:2605.03855 — LLM Evaluation</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>BIT.UA-AAUBS at ArchEHR-QA 2026: Evaluating Open-Source and Proprietary LLMs via Prompting in Low-Resource QA</title><link>http://arxiv.org/abs/2605.03618v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03618v1</guid><description>Richard A. A. Jonker et al. — arxiv:2605.03618 — LLM Evaluation</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Detecting Stealth Sycophancy in Mental-Health Dialogue with Dynamic Emotional Signature Graphs</title><link>http://arxiv.org/abs/2605.03472v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03472v1</guid><description>Tianze Han et al. — arxiv:2605.03472 — LLM Evaluation</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>LLM-ADAM: A Generalizable LLM Agent Framework for Pre-Print Anomaly Detection in Additive Manufacturing</title><link>http://arxiv.org/abs/2605.03328v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03328v1</guid><description>Ahmadreza Eslaminia et al. — arxiv:2605.03328 — LLM Evaluation</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Enhancing Agent Safety Judgment: Controlled Benchmark Rewriting and Analogical Reasoning for Deceptive Out-of-Distribution Scenarios</title><link>http://arxiv.org/abs/2605.03242v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03242v1</guid><description>Zuoyu Zhang et al. — arxiv:2605.03242 — LLM Evaluation</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>FlowEval: Reference-based Evaluation of Generated User Interfaces</title><link>http://arxiv.org/abs/2605.04165v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04165v1</guid><description>Jason Wu et al. — arxiv:2605.04165 — LLM Evaluation</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Contextual Multi-Objective Optimization: Rethinking Objectives in Frontier AI Systems</title><link>http://arxiv.org/abs/2605.03900v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03900v1</guid><description>Jie Zhou et al. — arxiv:2605.03900 — Code LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Say the Mission, Execute the Swarm: Agent-Enhanced LLM Reasoning in the Web-of-Drones</title><link>http://arxiv.org/abs/2605.03788v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03788v1</guid><description>Andrea Iannoli et al. — arxiv:2605.03788 — Code LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Rose-SQL: Role-State Evolution Guided Structured Reasoning for Multi-Turn Text-to-SQL</title><link>http://arxiv.org/abs/2605.03720v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03720v1</guid><description>Le Zhou et al. — arxiv:2605.03720 — Code LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Deep Graph-Language Fusion for Structure-Aware Code Generation</title><link>http://arxiv.org/abs/2605.03689v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03689v1</guid><description>Mert Tiftikci et al. — arxiv:2605.03689 — Code LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>From Code to Prediction: Fine-Tuning LLMs for Neural Network Performance Classification in NNGPT</title><link>http://arxiv.org/abs/2605.03686v2</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03686v2</guid><description>Mahmoud Hanouneh et al. — arxiv:2605.03686 — Code LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>RAG over Thinking Traces Can Improve Reasoning Tasks</title><link>http://arxiv.org/abs/2605.03344v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03344v1</guid><description>Negar Arabzadeh et al. — arxiv:2605.03344 — Code LLM</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>MedFabric and EtHER: A Data-Centric Framework for Word-Level Fabrication Generation and Detection in Medical LLMs</title><link>http://arxiv.org/abs/2605.04180v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04180v1</guid><description>Tung Sum Thomas Kwok et al. — arxiv:2605.04180 — Medical NLP</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Medical NLP</category></item><item><title>Natural Language Processing: A Comprehensive Practical Guide from Tokenisation to RLHF</title><link>http://arxiv.org/abs/2605.03799v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03799v1</guid><description>Mullosharaf K. Arabov et al. — arxiv:2605.03799 — Multilingual NLP</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Benchmarking Parameter-Efficient Fine-Tuning of Large Language Models for Low-Resource Tajik Text Generation with the Tajik Web Corpus</title><link>http://arxiv.org/abs/2605.03742v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03742v1</guid><description>Mullosharaf K. Arabov et al. — arxiv:2605.03742 — Multilingual NLP</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>LLM-XTM: Enhancing Cross-Lingual Topic Models with Large Language Models</title><link>http://arxiv.org/abs/2605.03299v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03299v1</guid><description>Minh Chu Xuan et al. — arxiv:2605.03299 — Multilingual NLP</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>SAM-NER: Semantic Archetype Mediation for Zero-Shot Named Entity Recognition</title><link>http://arxiv.org/abs/2605.03706v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03706v1</guid><description>Ruichu Cai et al. — arxiv:2605.03706 — Named Entity Recognition</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Named Entity Recognition</category></item><item><title>Geolocating News about Extreme Climate Events: A Comparative Analysis of Off-the-Shelf Tools for Toponym Identification in German</title><link>http://arxiv.org/abs/2605.03414v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03414v1</guid><description>Brielen Madureira et al. — arxiv:2605.03414 — Named Entity Recognition</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Named Entity Recognition</category></item><item><title>Self-Prompting Small Language Models for Privacy-Sensitive Clinical Information Extraction</title><link>http://arxiv.org/abs/2605.04221v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04221v1</guid><description>Yao-Shun Chuang et al. — arxiv:2605.04221 — Named Entity Recognition</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Named Entity Recognition</category></item><item><title>CC-OCR V2: Benchmarking Large Multimodal Models for Literacy in Real-world Document Processing</title><link>http://arxiv.org/abs/2605.03903v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03903v1</guid><description>Zhipeng Xu et al. — arxiv:2605.03903 — Information Extraction</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>Geolocating News about Extreme Climate Events: A Comparative Analysis of Off-the-Shelf Tools for Toponym Identification in German</title><link>http://arxiv.org/abs/2605.03414v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03414v1</guid><description>Brielen Madureira et al. — arxiv:2605.03414 — Information Extraction</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>Material Database Agent: A Multimodal Agentic Framework for Scientific Literature Mining</title><link>http://arxiv.org/abs/2605.04278v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04278v1</guid><description>Achuth Chandrasekhar et al. — arxiv:2605.04278 — Information Extraction</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>Self-Prompting Small Language Models for Privacy-Sensitive Clinical Information Extraction</title><link>http://arxiv.org/abs/2605.04221v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04221v1</guid><description>Yao-Shun Chuang et al. — arxiv:2605.04221 — Information Extraction</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>An Agent-Oriented Pluggable Experience-RAG Skill for Experience-Driven Retrieval Strategy Orchestration</title><link>http://arxiv.org/abs/2605.03989v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03989v1</guid><description>Dutao Zhang et al. — arxiv:2605.03989 — Question Answering</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Magic-Informed Quantum Architecture Search</title><link>http://arxiv.org/abs/2605.03932v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03932v1</guid><description>Vincenzo Lipardi et al. — arxiv:2605.03932 — Question Answering</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>CC-OCR V2: Benchmarking Large Multimodal Models for Literacy in Real-world Document Processing</title><link>http://arxiv.org/abs/2605.03903v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03903v1</guid><description>Zhipeng Xu et al. — arxiv:2605.03903 — Question Answering</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Enhancing Visual Question Answering with Multimodal LLMs via Chain-of-Question Guided Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.03790v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03790v1</guid><description>Quanxing Xu et al. — arxiv:2605.03790 — Question Answering</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Before Forgetting, Learn to Remember: Revisiting Foundational Learning Failures in LVLM Unlearning Benchmarks</title><link>http://arxiv.org/abs/2605.03759v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03759v1</guid><description>JuneHyoung Kwon et al. — arxiv:2605.03759 — Question Answering</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Geographic Variation in Stack Overflow Code Quality: Evidence from a Cross-Regional Study of Coding Practices</title><link>http://arxiv.org/abs/2605.03670v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03670v1</guid><description>Elijah Zolduoarrati et al. — arxiv:2605.03670 — Question Answering</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>BIT.UA-AAUBS at ArchEHR-QA 2026: Evaluating Open-Source and Proprietary LLMs via Prompting in Low-Resource QA</title><link>http://arxiv.org/abs/2605.03618v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03618v1</guid><description>Richard A. A. Jonker et al. — arxiv:2605.03618 — Question Answering</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>DALPHIN: Benchmarking Digital Pathology AI Copilots Against Pathologists on an Open Multicentric Dataset</title><link>http://arxiv.org/abs/2605.03544v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03544v1</guid><description>Carlijn Lems et al. — arxiv:2605.03544 — Question Answering</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>WorldJen: An End-to-End Multi-Dimensional Benchmark for Generative Video Models</title><link>http://arxiv.org/abs/2605.03475v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03475v1</guid><description>Karthik Inbasekar et al. — arxiv:2605.03475 — Question Answering</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>VEBench:Benchmarking Large Multimodal Models for Real-World Video Editing</title><link>http://arxiv.org/abs/2605.03276v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03276v1</guid><description>Andong Deng et al. — arxiv:2605.03276 — Question Answering</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Hierarchical Visual Agent: Managing Contexts in Joint Image-Text Space for Advanced Chart Reasoning</title><link>http://arxiv.org/abs/2605.04304v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04304v1</guid><description>Qihua Dong et al. — arxiv:2605.04304 — Question Answering</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Temporal Reasoning Is Not the Bottleneck: A Probabilistic Inconsistency Framework for Neuro-Symbolic QA</title><link>http://arxiv.org/abs/2605.04243v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04243v1</guid><description>Tran Quang Liem et al. — arxiv:2605.04243 — Question Answering</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Annotation Quality in Aspect-Based Sentiment Analysis: A Case Study Comparing Experts, Students, Crowdworkers, and Large Language Model</title><link>http://arxiv.org/abs/2605.03624v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03624v1</guid><description>Niklas Donhauser et al. — arxiv:2605.03624 — Sentiment Analysis</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>Sentiment Analysis of Indonesian Spotify Reviews Using Machine Learning and BiLSTM</title><link>http://arxiv.org/abs/2605.03443v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03443v1</guid><description>Uliano Wilyam Purba et al. — arxiv:2605.03443 — Sentiment Analysis</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>A Comparison of Traditional Machine Learning Algorithms and LSTM-Based Deep Learning Models for Email Sentiment Analysis</title><link>http://arxiv.org/abs/2605.03440v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03440v1</guid><description>Virdio Samuel Saragih et al. — arxiv:2605.03440 — Sentiment Analysis</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>Benchmarking Logistic Regression, SVM, Naive Bayes, and IndoBERT Fine-Tuning for Sentiment Analysis on Indonesian Product Reviews</title><link>http://arxiv.org/abs/2605.03439v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03439v1</guid><description>Nabila Zakiyah Zahra et al. — arxiv:2605.03439 — Sentiment Analysis</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>Science discussions of retracted articles on Bluesky: public scrutiny or misinformation spreading?</title><link>http://arxiv.org/abs/2605.04334v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04334v1</guid><description>Er-Te Zheng et al. — arxiv:2605.04334 — Sentiment Analysis</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>OpenSeeker-v2: Pushing the Limits of Search Agents with Informative and High-Difficulty Trajectories</title><link>http://arxiv.org/abs/2605.04036v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04036v1</guid><description>Yuwen Du et al. — arxiv:2605.04036 — Knowledge Graph</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Physics-Grounded Multi-Agent Architecture for Traceable, Risk-Aware Human-AI Decision Support in Manufacturing</title><link>http://arxiv.org/abs/2605.04003v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.04003v1</guid><description>Danny Hoang et al. — arxiv:2605.04003 — Knowledge Graph</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>ConRAD: Conformal Risk-Aware Neural Databases</title><link>http://arxiv.org/abs/2605.03806v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03806v1</guid><description>Sonia Horchidan et al. — arxiv:2605.03806 — Knowledge Graph</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Graph Neural Network based Hierarchy-Aware Embeddings of Knowledge Graphs: Applications to Yeast Phenotype Prediction</title><link>http://arxiv.org/abs/2605.03690v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03690v1</guid><description>Filip Kronström et al. — arxiv:2605.03690 — Knowledge Graph</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>CuraView: A Multi-Agent Framework for Medical Hallucination Detection with GraphRAG-Enhanced Knowledge Verification</title><link>http://arxiv.org/abs/2605.03476v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03476v1</guid><description>Severin Ye et al. — arxiv:2605.03476 — Knowledge Graph</description><pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>AcademiClaw: When Students Set Challenges for AI Agents</title><link>http://arxiv.org/abs/2605.02661v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02661v1</guid><description>Junjie Yu et al. — arxiv:2605.02661 — NLP</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Beating the Style Detector: Three Hours of Agentic Research on the AI-Text Arms Race</title><link>http://arxiv.org/abs/2605.02620v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02620v1</guid><description>Andreas Maier et al. — arxiv:2605.02620 — NLP</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>SemEval-2026 Task 7: Everyday Knowledge Across Diverse Languages and Cultures</title><link>http://arxiv.org/abs/2605.02601v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02601v1</guid><description>Nedjma Ousidhoum et al. — arxiv:2605.02601 — NLP</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Revisiting Semantic Role Labeling: Efficient Structured Inference with Dependency-Informed Analysis</title><link>http://arxiv.org/abs/2605.02505v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02505v1</guid><description>Sangpil Youm et al. — arxiv:2605.02505 — NLP</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Shadow-Loom: Causal Reasoning over Graphical World Model of Narratives</title><link>http://arxiv.org/abs/2605.02475v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02475v1</guid><description>David Wilmot et al. — arxiv:2605.02475 — NLP</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>HalluScan: A Systematic Benchmark for Detecting and Mitigating Hallucinations in Instruction-Following LLMs</title><link>http://arxiv.org/abs/2605.02443v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02443v1</guid><description>Ahmed Cherif et al. — arxiv:2605.02443 — NLP</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Controllable and Verifiable Process Data Synthesis for Process Reward Models</title><link>http://arxiv.org/abs/2605.02395v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02395v1</guid><description>Yinghui Chi et al. — arxiv:2605.02395 — NLP</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Semantically Enriching Investor Micro-blogs for Opinion-Aware Emotion Analysis: A Practical Approach</title><link>http://arxiv.org/abs/2605.03092v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03092v1</guid><description>Gaurav Negi et al. — arxiv:2605.03092 — NLP</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>EvoPoC: Automated Exploit Synthesis for DeFi Smart Contracts via Hierarchical Knowledge Graphs</title><link>http://arxiv.org/abs/2605.02868v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02868v1</guid><description>Ruichao Liang et al. — arxiv:2605.02868 — LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Semantic Risk-Aware Heuristic Planning for Robotic Navigation in Dynamic Environments: An LLM-Inspired Approach</title><link>http://arxiv.org/abs/2605.02862v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02862v1</guid><description>Hamza Ahmed Durrani et al. — arxiv:2605.02862 — LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Standing on the Shoulders of Giants: Stabilized Knowledge Distillation for Cross--Language Code Clone Detection</title><link>http://arxiv.org/abs/2605.02860v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02860v1</guid><description>Mohamad Khajezade et al. — arxiv:2605.02860 — LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>When Is the Same Model Not the Same Service? A Measurement Study of Hosted Open-Weight LLM APIs</title><link>http://arxiv.org/abs/2605.02821v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02821v1</guid><description>Haorui Li et al. — arxiv:2605.02821 — LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>SCPRM: A Schema-aware Cumulative Process Reward Model for Knowledge Graph Question Answering</title><link>http://arxiv.org/abs/2605.02819v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02819v1</guid><description>Jiujiu Chen et al. — arxiv:2605.02819 — LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Autonomous LLM Agent Worms: Cross-Platform Propagation, Automated Discovery and Temporal Re-Entry Defense</title><link>http://arxiv.org/abs/2605.02812v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02812v1</guid><description>Mingming Zha et al. — arxiv:2605.02812 — LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>AIs and Humans with Agency</title><link>http://arxiv.org/abs/2605.02810v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02810v1</guid><description>David Mumford et al. — arxiv:2605.02810 — LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Reinforcement Learning for LLM-based Multi-Agent Systems through Orchestration Traces</title><link>http://arxiv.org/abs/2605.02801v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02801v1</guid><description>Chenchen Zhang et al. — arxiv:2605.02801 — LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>FunFuzz: An LLM-Powered Evolutionary Fuzzing Framework</title><link>http://arxiv.org/abs/2605.02789v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02789v1</guid><description>Mario Rodríguez Béjar et al. — arxiv:2605.02789 — LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>U-Define: Designing User Workflows for Hard and Soft Constraints in LLM-Based Planning</title><link>http://arxiv.org/abs/2605.02765v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02765v1</guid><description>Christine P Lee et al. — arxiv:2605.02765 — LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>SpecKV: Adaptive Speculative Decoding with Compression-Aware Gamma Selection</title><link>http://arxiv.org/abs/2605.02888v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02888v1</guid><description>Shikhar Shukla et al. — arxiv:2605.02888 — LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>EvoPoC: Automated Exploit Synthesis for DeFi Smart Contracts via Hierarchical Knowledge Graphs</title><link>http://arxiv.org/abs/2605.02868v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02868v1</guid><description>Ruichao Liang et al. — arxiv:2605.02868 — LLM Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Uncountably many conditionally inaccessible decisions exist in every finite probability space</title><link>http://arxiv.org/abs/2605.02865v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02865v1</guid><description>Zalán Gyenis et al. — arxiv:2605.02865 — LLM Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>HAAS: A Policy-Aware Framework for Adaptive Task Allocation Between Humans and Artificial Intelligence Systems</title><link>http://arxiv.org/abs/2605.02832v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02832v1</guid><description>Vicente Pelechanoa et al. — arxiv:2605.02832 — LLM Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Equilibrium Stability and Uniqueness with a Large Number of Commodities and Patient Consumers</title><link>http://arxiv.org/abs/2605.02817v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02817v1</guid><description>Xinyang Wang et al. — arxiv:2605.02817 — LLM Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>FlexSQL: Flexible Exploration and Execution Make Better Text-to-SQL Agents</title><link>http://arxiv.org/abs/2605.02815v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02815v1</guid><description>Quang Hieu Pham et al. — arxiv:2605.02815 — LLM Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Autonomous LLM Agent Worms: Cross-Platform Propagation, Automated Discovery and Temporal Re-Entry Defense</title><link>http://arxiv.org/abs/2605.02812v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02812v1</guid><description>Mingming Zha et al. — arxiv:2605.02812 — LLM Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Tool Use as Action: Towards Agentic Control in Mobile Core Networks</title><link>http://arxiv.org/abs/2605.02811v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02811v1</guid><description>Purna Sai Garigipati et al. — arxiv:2605.02811 — LLM Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Reinforcement Learning for LLM-based Multi-Agent Systems through Orchestration Traces</title><link>http://arxiv.org/abs/2605.02801v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02801v1</guid><description>Chenchen Zhang et al. — arxiv:2605.02801 — LLM Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Truthful Communication and Exclusive Information Clubs</title><link>http://arxiv.org/abs/2605.02776v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02776v1</guid><description>Paolo Pin et al. — arxiv:2605.02776 — LLM Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>DynoSLAM: Dynamic SLAM with Generative Graph Neural Networks for Real-World Social Navigation</title><link>http://arxiv.org/abs/2605.02759v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02759v1</guid><description>Danil Tokhchukov et al. — arxiv:2605.02759 — LLM Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>EvoPoC: Automated Exploit Synthesis for DeFi Smart Contracts via Hierarchical Knowledge Graphs</title><link>http://arxiv.org/abs/2605.02868v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02868v1</guid><description>Ruichao Liang et al. — arxiv:2605.02868 — Multi-Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Autonomous LLM Agent Worms: Cross-Platform Propagation, Automated Discovery and Temporal Re-Entry Defense</title><link>http://arxiv.org/abs/2605.02812v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02812v1</guid><description>Mingming Zha et al. — arxiv:2605.02812 — Multi-Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Reinforcement Learning for LLM-based Multi-Agent Systems through Orchestration Traces</title><link>http://arxiv.org/abs/2605.02801v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02801v1</guid><description>Chenchen Zhang et al. — arxiv:2605.02801 — Multi-Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Mitigating Misalignment Contagion by Steering with Implicit Traits</title><link>http://arxiv.org/abs/2605.02751v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02751v1</guid><description>Maria Chang et al. — arxiv:2605.02751 — Multi-Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>AI-Generated Smells: An Analysis of Code and Architecture in LLM and Agent-Driven Development</title><link>http://arxiv.org/abs/2605.02741v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02741v1</guid><description>Yuecai Zhu et al. — arxiv:2605.02741 — Multi-Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Hybrid Inspection and Task-Based Access Control in Zero-Trust Agentic AI</title><link>http://arxiv.org/abs/2605.02682v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02682v1</guid><description>Majed El Helou et al. — arxiv:2605.02682 — Multi-Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>AcademiClaw: When Students Set Challenges for AI Agents</title><link>http://arxiv.org/abs/2605.02661v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02661v1</guid><description>Junjie Yu et al. — arxiv:2605.02661 — Multi-Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Beyond State Machines: Executing Network Procedures with Agentic Tool-Calling Sequences</title><link>http://arxiv.org/abs/2605.02584v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02584v1</guid><description>Purna Sai Garigipati et al. — arxiv:2605.02584 — Multi-Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>IteRate: Autonomous AI Synthesis of In-Kernel eBPF Wi-Fi Rate Control Algorithms</title><link>http://arxiv.org/abs/2605.02542v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02542v1</guid><description>James Lynch et al. — arxiv:2605.02542 — Multi-Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>From Experimental Limits to Physical Insight: A Retrieval-Augmented Multi-Agent Framework for Interpreting Searches Beyond the Standard Model</title><link>http://arxiv.org/abs/2605.02491v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02491v1</guid><description>Altan Cakir et al. — arxiv:2605.02491 — Multi-Agent</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Benchmarking Retrieval Strategies for Biomedical Retrieval-Augmented Generation: A Controlled Empirical Study</title><link>http://arxiv.org/abs/2605.02520v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02520v1</guid><description>Devi Prasad Bal et al. — arxiv:2605.02520 — RAG</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Fight Poison with Poison: Enhancing Robustness in Few-shot Machine-Generated Text Detection with Adversarial Training</title><link>http://arxiv.org/abs/2605.02374v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02374v1</guid><description>Wenjing Duan et al. — arxiv:2605.02374 — RAG</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>ARGUS: Policy-Adaptive Ad Governance via Evolving Reinforcement with Adversarial Umpiring</title><link>http://arxiv.org/abs/2605.02200v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02200v1</guid><description>Deyi Ji et al. — arxiv:2605.02200 — RAG</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>DocSync: Agentic Documentation Maintenance via Critic-Guided Reflexion</title><link>http://arxiv.org/abs/2605.02163v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02163v1</guid><description>Sidhesh Badrinarayan et al. — arxiv:2605.02163 — RAG</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Semantic Risk-Aware Heuristic Planning for Robotic Navigation in Dynamic Environments: An LLM-Inspired Approach</title><link>http://arxiv.org/abs/2605.02862v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02862v1</guid><description>Hamza Ahmed Durrani et al. — arxiv:2605.02862 — Reasoning</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Bolek: A Multimodal Language Model for Molecular Reasoning</title><link>http://arxiv.org/abs/2605.02745v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02745v1</guid><description>Frederic Grabowski et al. — arxiv:2605.02745 — Reasoning</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Visual Latents Know More Than They Say: Unsilencing Latent Reasoning in MLLMs</title><link>http://arxiv.org/abs/2605.02735v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02735v1</guid><description>Xin Zhang et al. — arxiv:2605.02735 — Reasoning</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Accurate Legal Reasoning at Scale: Neuro-Symbolic Offloading and Structural Auditability for Robust Legal Adjudication</title><link>http://arxiv.org/abs/2605.02472v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02472v1</guid><description>Stanisław Sójka et al. — arxiv:2605.02472 — Reasoning</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Position: How can Graphs Help Large Language Models?</title><link>http://arxiv.org/abs/2605.02452v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02452v1</guid><description>Xiyuan Wang et al. — arxiv:2605.02452 — Reasoning</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Enhancing Multimodal In-Context Learning via Inductive-Deductive Reasoning</title><link>http://arxiv.org/abs/2605.02378v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02378v1</guid><description>Haoyu Wang et al. — arxiv:2605.02378 — Reasoning</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>SOTOPIA-TOM: Evaluating Information Management in Multi-Agent Interaction with Theory of Mind</title><link>http://arxiv.org/abs/2605.02307v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02307v1</guid><description>Yashwanth YS et al. — arxiv:2605.02307 — Reasoning</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Distilling Long-CoT Reasoning through Collaborative Step-wise Multi-Teacher Decoding</title><link>http://arxiv.org/abs/2605.02290v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02290v1</guid><description>Taewon Yun et al. — arxiv:2605.02290 — Reasoning</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Towards Understanding Specification Gaming in Reasoning Models</title><link>http://arxiv.org/abs/2605.02269v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02269v1</guid><description>Kei Nishimura-Gasparian et al. — arxiv:2605.02269 — Reasoning</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>ARGUS: Policy-Adaptive Ad Governance via Evolving Reinforcement with Adversarial Umpiring</title><link>http://arxiv.org/abs/2605.02200v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02200v1</guid><description>Deyi Ji et al. — arxiv:2605.02200 — Reasoning</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>MolmoAct2: Action Reasoning Models for Real-world Deployment</title><link>http://arxiv.org/abs/2605.02881v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02881v1</guid><description>Haoquan Fang et al. — arxiv:2605.02881 — Reasoning</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Tool Use as Action: Towards Agentic Control in Mobile Core Networks</title><link>http://arxiv.org/abs/2605.02811v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02811v1</guid><description>Purna Sai Garigipati et al. — arxiv:2605.02811 — Tool Use</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Reinforcement Learning for LLM-based Multi-Agent Systems through Orchestration Traces</title><link>http://arxiv.org/abs/2605.02801v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02801v1</guid><description>Chenchen Zhang et al. — arxiv:2605.02801 — Tool Use</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>The Design and Composition of Structural Causal Decision Processes</title><link>http://arxiv.org/abs/2605.02681v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02681v1</guid><description>Sebastian Benthall et al. — arxiv:2605.02681 — Tool Use</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>HeavySkill: Heavy Thinking as the Inner Skill in Agentic Harness</title><link>http://arxiv.org/abs/2605.02396v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02396v1</guid><description>Jianing Wang et al. — arxiv:2605.02396 — Tool Use</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>How to benchmark: the Measure-Explain-Test-Improve loop</title><link>http://arxiv.org/abs/2605.02233v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02233v1</guid><description>Gabriel Scherer et al. — arxiv:2605.02233 — Tool Use</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Planner Matters! An Efficient and Unbalanced Multi-agent Collaboration Framework for Long-horizon Planning</title><link>http://arxiv.org/abs/2605.02168v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02168v1</guid><description>Wenyi Wu et al. — arxiv:2605.02168 — Tool Use</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>From Knowledge to Action: Outcomes of the 2025 Large Language Model (LLM) Hackathon for Applications in Materials Science and Chemistry</title><link>http://arxiv.org/abs/2605.03205v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03205v1</guid><description>Aritra Roy et al. — arxiv:2605.03205 — Tool Use</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>VideoNet: A Large-Scale Dataset for Domain-Specific Action Recognition</title><link>http://arxiv.org/abs/2605.02834v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02834v1</guid><description>Tanush Yadav et al. — arxiv:2605.02834 — Multimodal LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Latent Bridge: Feature Delta Prediction for Efficient Dual-System Vision-Language-Action Model Inference</title><link>http://arxiv.org/abs/2605.02739v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02739v1</guid><description>Yudong Liu et al. — arxiv:2605.02739 — Multimodal LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Visual Latents Know More Than They Say: Unsilencing Latent Reasoning in MLLMs</title><link>http://arxiv.org/abs/2605.02735v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02735v1</guid><description>Xin Zhang et al. — arxiv:2605.02735 — Multimodal LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Perceptual Flow Network for Visually Grounded Reasoning</title><link>http://arxiv.org/abs/2605.02730v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02730v1</guid><description>Yangfu Li et al. — arxiv:2605.02730 — Multimodal LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>PubMed-Ophtha: An open resource for training ophthalmology vision-language models on scientific literature</title><link>http://arxiv.org/abs/2605.02720v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02720v1</guid><description>Verena Jasmin Hallitschke et al. — arxiv:2605.02720 — Multimodal LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>AutoFocus: Uncertainty-Aware Active Visual Search for GUI Grounding</title><link>http://arxiv.org/abs/2605.02630v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02630v1</guid><description>Ruilin Yao et al. — arxiv:2605.02630 — Multimodal LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Retrieving Any Relevant Moments: Benchmark and Models for Generalized Moment Retrieval</title><link>http://arxiv.org/abs/2605.02623v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02623v1</guid><description>Yiming Ding et al. — arxiv:2605.02623 — Multimodal LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Rethinking the Need for Source Models: Source-Free Domain Adaptation from Scratch Guided by a Vision-Language Model</title><link>http://arxiv.org/abs/2605.02604v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02604v1</guid><description>Zhou Bingtao et al. — arxiv:2605.02604 — Multimodal LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>CoRAL: Contact-Rich Adaptive LLM-based Control for Robotic Manipulation</title><link>http://arxiv.org/abs/2605.02600v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02600v1</guid><description>Berk Çiçek et al. — arxiv:2605.02600 — Multimodal LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>A Semantic Autonomy Framework for VLM-Integrated Indoor Mobile Robots: Hybrid Deterministic Reasoning and Cross-Robot Adaptive Memory</title><link>http://arxiv.org/abs/2605.02525v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02525v1</guid><description>Bogdan Felician Abaza et al. — arxiv:2605.02525 — Multimodal LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>AlbumFill: Album-Guided Reasoning and Retrieval for Personalized Image Completion</title><link>http://arxiv.org/abs/2605.02892v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02892v1</guid><description>Yu-Ju Tsai et al. — arxiv:2605.02892 — Multimodal LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>MolmoAct2: Action Reasoning Models for Real-world Deployment</title><link>http://arxiv.org/abs/2605.02881v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02881v1</guid><description>Haoquan Fang et al. — arxiv:2605.02881 — Multimodal LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Autonomous LLM Agent Worms: Cross-Platform Propagation, Automated Discovery and Temporal Re-Entry Defense</title><link>http://arxiv.org/abs/2605.02812v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02812v1</guid><description>Mingming Zha et al. — arxiv:2605.02812 — Long Context</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Triple Spectral Fusion for Sensor-based Human Activity Recognition</title><link>http://arxiv.org/abs/2605.02743v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02743v1</guid><description>Ye Zhang et al. — arxiv:2605.02743 — Long Context</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>MSMixer: Learned Multi-Scale Temporal Mixing with Complementary Linear Shortcut for Long-Term Time Series Forecasting</title><link>http://arxiv.org/abs/2605.02689v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02689v1</guid><description>Ahmed Cherif et al. — arxiv:2605.02689 — Long Context</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>The 2026 ACII Dyadic Conversations (DaiKon) Workshop &amp; Challenge</title><link>http://arxiv.org/abs/2605.02672v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02672v1</guid><description>Panagiotis Tzirakis et al. — arxiv:2605.02672 — Long Context</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>M\textsuperscript{4}Fuse: Lightweight State-Space MoE with a Cross-Scale Gating Bridge for Brain Tumor Segmentation</title><link>http://arxiv.org/abs/2605.02444v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02444v1</guid><description>Meihua Zhou et al. — arxiv:2605.02444 — Long Context</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>The Conversations Beneath the Code: Triadic Data for Long-Horizon Software Engineering Agents</title><link>http://arxiv.org/abs/2605.02244v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02244v1</guid><description>Yelin Kim et al. — arxiv:2605.02244 — Long Context</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Retrieval and Multi-Hop Reasoning in 1M-Token Context Windows: Evaluating LLMs on Classical Chinese Text</title><link>http://arxiv.org/abs/2605.02173v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02173v1</guid><description>Eric H. C. Chow et al. — arxiv:2605.02173 — Long Context</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Standing on the Shoulders of Giants: Stabilized Knowledge Distillation for Cross--Language Code Clone Detection</title><link>http://arxiv.org/abs/2605.02860v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02860v1</guid><description>Mohamad Khajezade et al. — arxiv:2605.02860 — LLM Efficiency</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Trust, but Verify: Peeling Low-Bit Transformer Networks for Training Monitoring</title><link>http://arxiv.org/abs/2605.02853v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02853v1</guid><description>Arian Eamaz et al. — arxiv:2605.02853 — LLM Efficiency</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Compress Then Adapt? No, Do It Together via Task-aware Union of Subspaces</title><link>http://arxiv.org/abs/2605.02829v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02829v1</guid><description>Jingze Ge et al. — arxiv:2605.02829 — LLM Efficiency</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>When Audio-Language Models Fail to Leverage Multimodal Context for Dysarthric Speech Recognition</title><link>http://arxiv.org/abs/2605.02782v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02782v1</guid><description>Pehuén Moure et al. — arxiv:2605.02782 — LLM Efficiency</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Probing the Valley-Selective Tunneling Density of States in Monolayer MoS2 based Resonant Tunneling Devices</title><link>http://arxiv.org/abs/2605.02646v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02646v1</guid><description>Abir Mukherjee et al. — arxiv:2605.02646 — LLM Efficiency</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>VertMark: A Unified Training-Free Robust Watermarking Framework for Vertical Domain Pre-trained Language Models</title><link>http://arxiv.org/abs/2605.02557v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02557v1</guid><description>Cong Kong et al. — arxiv:2605.02557 — LLM Efficiency</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Reduced-Feedback Hybrid Precoding for Wideband mmWave MIMO-OFDM Systems</title><link>http://arxiv.org/abs/2605.02418v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02418v1</guid><description>Po-Heng Chou et al. — arxiv:2605.02418 — LLM Efficiency</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Statistically-Lossless Quantization of Large Language Models</title><link>http://arxiv.org/abs/2605.02404v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02404v1</guid><description>Michael Helcig et al. — arxiv:2605.02404 — LLM Efficiency</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Description and error analysis of quantum alghorithms in the projection evolution model -- the Deutsch algorithm case</title><link>http://arxiv.org/abs/2605.02293v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02293v1</guid><description>Krzysztof Lider et al. — arxiv:2605.02293 — LLM Efficiency</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>EdgeLPR: On the Deep Neural Network trade-off between Precision and Performance in LiDAR Place Recognition</title><link>http://arxiv.org/abs/2605.02275v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02275v1</guid><description>Pierpaolo Serio et al. — arxiv:2605.02275 — LLM Efficiency</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Gradient-Gated DPO: Stabilizing Preference Optimization in Language Models</title><link>http://arxiv.org/abs/2605.02626v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02626v1</guid><description>Inoussa Mouiche et al. — arxiv:2605.02626 — Alignment</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>A Semantic Autonomy Framework for VLM-Integrated Indoor Mobile Robots: Hybrid Deterministic Reasoning and Cross-Robot Adaptive Memory</title><link>http://arxiv.org/abs/2605.02525v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02525v1</guid><description>Bogdan Felician Abaza et al. — arxiv:2605.02525 — Alignment</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Efficient Preference Poisoning Attack on Offline RLHF</title><link>http://arxiv.org/abs/2605.02495v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02495v1</guid><description>Chenye Yang et al. — arxiv:2605.02495 — Alignment</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Anomaly-Preference Image Generation</title><link>http://arxiv.org/abs/2605.02439v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02439v1</guid><description>Fuyun Wang et al. — arxiv:2605.02439 — Alignment</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>&quot;I Don&apos;t Have Faith in the Developers to Use My Feedback&quot;: Understanding Player Values and Expectancy for Reporting Systems in Video Games</title><link>http://arxiv.org/abs/2605.02842v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02842v1</guid><description>Michael Yin et al. — arxiv:2605.02842 — Hallucination</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>The classification of almost periodic flows on the hyperfinite type ${\rm II_1}$ factor</title><link>http://arxiv.org/abs/2605.02781v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02781v1</guid><description>Cyril Houdayer et al. — arxiv:2605.02781 — Hallucination</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Perceptual Flow Network for Visually Grounded Reasoning</title><link>http://arxiv.org/abs/2605.02730v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02730v1</guid><description>Yangfu Li et al. — arxiv:2605.02730 — Hallucination</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Foundation-Model-Based Agents in Industrial Automation: Purposes, Capabilities, and Open Challenges</title><link>http://arxiv.org/abs/2605.02592v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02592v1</guid><description>Vincent Henkel et al. — arxiv:2605.02592 — Hallucination</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Benchmarking Retrieval Strategies for Biomedical Retrieval-Augmented Generation: A Controlled Empirical Study</title><link>http://arxiv.org/abs/2605.02520v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02520v1</guid><description>Devi Prasad Bal et al. — arxiv:2605.02520 — Hallucination</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>A multilingual hallucination benchmark: MultiWikiQHalluA</title><link>http://arxiv.org/abs/2605.02504v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02504v1</guid><description>Freja Thoresen et al. — arxiv:2605.02504 — Hallucination</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>ExpoCM: Exposure-Aware One-Step Generative Single-Image HDR Reconstruction</title><link>http://arxiv.org/abs/2605.02464v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02464v1</guid><description>Aoyu Liu et al. — arxiv:2605.02464 — Hallucination</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Position: How can Graphs Help Large Language Models?</title><link>http://arxiv.org/abs/2605.02452v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02452v1</guid><description>Xiyuan Wang et al. — arxiv:2605.02452 — Hallucination</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>HalluScan: A Systematic Benchmark for Detecting and Mitigating Hallucinations in Instruction-Following LLMs</title><link>http://arxiv.org/abs/2605.02443v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02443v1</guid><description>Ahmed Cherif et al. — arxiv:2605.02443 — Hallucination</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Measuring AI Reasoning: A Guide for Researchers</title><link>http://arxiv.org/abs/2605.02442v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02442v1</guid><description>Munachiso Samuel Nwadike et al. — arxiv:2605.02442 — Hallucination</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>ContextualJailbreak: Evolutionary Red-Teaming via Simulated Conversational Priming</title><link>http://arxiv.org/abs/2605.02647v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02647v1</guid><description>Mario Rodríguez Béjar et al. — arxiv:2605.02647 — LLM Safety</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Self-Mined Hardness for Safety Fine-Tuning</title><link>http://arxiv.org/abs/2605.03226v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03226v1</guid><description>Prakhar Gupta et al. — arxiv:2605.03226 — LLM Safety</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Revisiting JBShield: Breaking and Rebuilding Representation-Level Jailbreak Defenses</title><link>http://arxiv.org/abs/2605.03095v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03095v1</guid><description>Kemal Derya et al. — arxiv:2605.03095 — LLM Safety</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Neuron-Anchored Rule Extraction for Large Language Models via Contrastive Hierarchical Ablation</title><link>http://arxiv.org/abs/2605.03058v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03058v1</guid><description>Francesco Sovrano et al. — arxiv:2605.03058 — LLM Safety</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>U-Define: Designing User Workflows for Hard and Soft Constraints in LLM-Based Planning</title><link>http://arxiv.org/abs/2605.02765v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02765v1</guid><description>Christine P Lee et al. — arxiv:2605.02765 — LLM Evaluation</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>ContextualJailbreak: Evolutionary Red-Teaming via Simulated Conversational Priming</title><link>http://arxiv.org/abs/2605.02647v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02647v1</guid><description>Mario Rodríguez Béjar et al. — arxiv:2605.02647 — LLM Evaluation</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>When Stress Becomes Signal: Detecting Antifragility-Compatible Regimes in Multi-Agent LLM Systems</title><link>http://arxiv.org/abs/2605.02463v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02463v1</guid><description>Jose Manuel de la Chica et al. — arxiv:2605.02463 — LLM Evaluation</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Decoding-Time Debiasing via Process Reward Models: From Controlled Fill-in to Open-Ended Generation</title><link>http://arxiv.org/abs/2605.02348v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02348v1</guid><description>Muneeb Ur Raheem Khan et al. — arxiv:2605.02348 — LLM Evaluation</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>DocSync: Agentic Documentation Maintenance via Critic-Guided Reflexion</title><link>http://arxiv.org/abs/2605.02163v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02163v1</guid><description>Sidhesh Badrinarayan et al. — arxiv:2605.02163 — LLM Evaluation</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Terminus-4B: Can a Smaller Model Replace Frontier LLMs at Agentic Execution Tasks?</title><link>http://arxiv.org/abs/2605.03195v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03195v1</guid><description>Spandan Garg et al. — arxiv:2605.03195 — LLM Evaluation</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>PIIGuard: Mitigating PII Harvesting under Adversarial Sanitization</title><link>http://arxiv.org/abs/2605.03129v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03129v1</guid><description>Mingshuo Liu et al. — arxiv:2605.03129 — LLM Evaluation</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>EvoPoC: Automated Exploit Synthesis for DeFi Smart Contracts via Hierarchical Knowledge Graphs</title><link>http://arxiv.org/abs/2605.02868v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02868v1</guid><description>Ruichao Liang et al. — arxiv:2605.02868 — Code LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>AI-Generated Smells: An Analysis of Code and Architecture in LLM and Agent-Driven Development</title><link>http://arxiv.org/abs/2605.02741v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02741v1</guid><description>Yuecai Zhu et al. — arxiv:2605.02741 — Code LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>LLM-Assisted Repository-Level Generation with Structured Spec-Driven Engineering</title><link>http://arxiv.org/abs/2605.02455v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02455v1</guid><description>Shuzhao Feng et al. — arxiv:2605.02455 — Code LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>ARIADNE: Agentic Reward-Informed Adaptive Decision Exploration via Blackboard-Driven MCTS for Competitive Program Generation</title><link>http://arxiv.org/abs/2605.02431v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02431v1</guid><description>Minnan Wei et al. — arxiv:2605.02431 — Code LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>MolViBench: Evaluating LLMs on Molecular Vibe Coding</title><link>http://arxiv.org/abs/2605.02351v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02351v1</guid><description>Jiatong Li et al. — arxiv:2605.02351 — Code LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>EngiAgent: Fully Connected Coordination of LLM Agents for Solving Open-ended Engineering Problems with Feasible Solutions</title><link>http://arxiv.org/abs/2605.02289v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02289v1</guid><description>Xiyuan Zhou et al. — arxiv:2605.02289 — Code LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Exact Higher-Order Derivatives for SE(3) via Analytical/AD Methods</title><link>http://arxiv.org/abs/2605.02252v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02252v1</guid><description>Frank O. Kuehnel et al. — arxiv:2605.02252 — Code LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>A Validated Prompt Bank for Malicious Code Generation: Separating Executable Weapons from Security Knowledge in 1,554 Consensus-Labeled Prompts</title><link>http://arxiv.org/abs/2605.03179v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03179v1</guid><description>Richard J. Young et al. — arxiv:2605.03179 — Code LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Learning Correct Behavior from Examples: Validating Sequential Execution in Autonomous Agents</title><link>http://arxiv.org/abs/2605.03159v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03159v1</guid><description>Reshabh K Sharma et al. — arxiv:2605.03159 — Code LLM</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Accurate Legal Reasoning at Scale: Neuro-Symbolic Offloading and Structural Auditability for Robust Legal Adjudication</title><link>http://arxiv.org/abs/2605.02472v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02472v1</guid><description>Stanisław Sójka et al. — arxiv:2605.02472 — Legal NLP</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Legal NLP</category></item><item><title>Structural Dilemmas and Developmental Pathways of Legal Argument Mining in the Era of Artificial Intelligence</title><link>http://arxiv.org/abs/2605.02308v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02308v1</guid><description>Xianglei Liao et al. — arxiv:2605.02308 — Legal NLP</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Legal NLP</category></item><item><title>Dependency Parsing Across the Resource Spectrum: Evaluating Architectures on High and Low-Resource Languages</title><link>http://arxiv.org/abs/2605.02608v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02608v1</guid><description>Kevin Guan et al. — arxiv:2605.02608 — Multilingual NLP</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>SemEval-2026 Task 7: Everyday Knowledge Across Diverse Languages and Cultures</title><link>http://arxiv.org/abs/2605.02601v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02601v1</guid><description>Nedjma Ousidhoum et al. — arxiv:2605.02601 — Multilingual NLP</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Tibetan-TTS:Low-Resource Tibetan Speech Synthesis with Large Model Adaptation</title><link>http://arxiv.org/abs/2605.02496v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02496v1</guid><description>Jiaxu He et al. — arxiv:2605.02496 — Multilingual NLP</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Reliability-Oriented Multilingual Orthopedic Diagnosis: A Domain-Adaptive Modeling and a Conceptual Validation Framework</title><link>http://arxiv.org/abs/2605.02266v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02266v1</guid><description>Danish Ali et al. — arxiv:2605.02266 — Multilingual NLP</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Adaptive Gait Generation for Multi-Terrain Exoskeletons via Constrained Kernelized Movement Primitives</title><link>http://arxiv.org/abs/2605.02513v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02513v1</guid><description>Edoardo Trombin et al. — arxiv:2605.02513 — Information Extraction</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>SCRIBE: Practical Static Binary Patching via Binary-Aware Recompilation of Decompiled Code</title><link>http://arxiv.org/abs/2605.02121v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02121v1</guid><description>Han Dai et al. — arxiv:2605.02121 — Information Extraction</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>MedStruct-S: A Benchmark for Key Discovery, Key-Conditioned QA and Semi-Structured Extraction from OCR Clinical Reports</title><link>http://arxiv.org/abs/2605.03103v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03103v1</guid><description>Yingyun Li et al. — arxiv:2605.03103 — Information Extraction</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>mdok-style at SemEval-2026 Task 10: Finetuning LLMs for Conspiracy Detection</title><link>http://arxiv.org/abs/2605.02712v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02712v1</guid><description>Dominik Macko et al. — arxiv:2605.02712 — Text Classification</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>VideoNet: A Large-Scale Dataset for Domain-Specific Action Recognition</title><link>http://arxiv.org/abs/2605.02834v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02834v1</guid><description>Tanush Yadav et al. — arxiv:2605.02834 — Question Answering</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Compress Then Adapt? No, Do It Together via Task-aware Union of Subspaces</title><link>http://arxiv.org/abs/2605.02829v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02829v1</guid><description>Jingze Ge et al. — arxiv:2605.02829 — Question Answering</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>SCPRM: A Schema-aware Cumulative Process Reward Model for Knowledge Graph Question Answering</title><link>http://arxiv.org/abs/2605.02819v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02819v1</guid><description>Jiujiu Chen et al. — arxiv:2605.02819 — Question Answering</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Benchmarking Retrieval Strategies for Biomedical Retrieval-Augmented Generation: A Controlled Empirical Study</title><link>http://arxiv.org/abs/2605.02520v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02520v1</guid><description>Devi Prasad Bal et al. — arxiv:2605.02520 — Question Answering</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>SkillCom: Decomposing LLM-based Semantic Communication into Task and Channel Aware Skills</title><link>http://arxiv.org/abs/2605.02333v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02333v1</guid><description>Jingwen Fu et al. — arxiv:2605.02333 — Question Answering</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>CBV: Clean-label Backdoor Attacks on Vision Language Models via Diffusion Models</title><link>http://arxiv.org/abs/2605.02202v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02202v1</guid><description>Ji Guo et al. — arxiv:2605.02202 — Question Answering</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>MEMAUDIT: An Exact Package-Oracle Evaluation Protocol for Budgeted Long-Term LLM Memory Writing</title><link>http://arxiv.org/abs/2605.02199v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02199v1</guid><description>Nishant Bhargava et al. — arxiv:2605.02199 — Question Answering</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>T$^2$PO: Uncertainty-Guided Exploration Control for Stable Multi-Turn Agentic Reinforcement Learning</title><link>http://arxiv.org/abs/2605.02178v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02178v1</guid><description>Haixin Wang et al. — arxiv:2605.02178 — Question Answering</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>CLaC at SemEval-2026 Task 6: Response Clarity Detection in Political Discourse</title><link>http://arxiv.org/abs/2605.02170v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02170v1</guid><description>Nawar Turk et al. — arxiv:2605.02170 — Question Answering</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Semantically Enriching Investor Micro-blogs for Opinion-Aware Emotion Analysis: A Practical Approach</title><link>http://arxiv.org/abs/2605.03092v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.03092v1</guid><description>Gaurav Negi et al. — arxiv:2605.03092 — Sentiment Analysis</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>EvoPoC: Automated Exploit Synthesis for DeFi Smart Contracts via Hierarchical Knowledge Graphs</title><link>http://arxiv.org/abs/2605.02868v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02868v1</guid><description>Ruichao Liang et al. — arxiv:2605.02868 — Knowledge Graph</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>SCPRM: A Schema-aware Cumulative Process Reward Model for Knowledge Graph Question Answering</title><link>http://arxiv.org/abs/2605.02819v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02819v1</guid><description>Jiujiu Chen et al. — arxiv:2605.02819 — Knowledge Graph</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Fine-Grained Graph Generation through Latent Mixture Scheduling</title><link>http://arxiv.org/abs/2605.02780v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02780v1</guid><description>Nidhi Vakil et al. — arxiv:2605.02780 — Knowledge Graph</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Position: How can Graphs Help Large Language Models?</title><link>http://arxiv.org/abs/2605.02452v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02452v1</guid><description>Xiyuan Wang et al. — arxiv:2605.02452 — Knowledge Graph</description><pubDate>Mon, 04 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>EditPropBench: Measuring Factual Edit Propagation in Scientific Manuscripts</title><link>http://arxiv.org/abs/2605.02083v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02083v1</guid><description>Garvin Kruthof et al. — arxiv:2605.02083 — NLP</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Maistros: A Greek Large Language Model Adapted Through Knowledge Distillation From Large Reasoning Models</title><link>http://arxiv.org/abs/2605.01870v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01870v1</guid><description>Nikolaos Giarelis et al. — arxiv:2605.01870 — NLP</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>The Cylindrical Representation Hypothesis for Language Model Steering</title><link>http://arxiv.org/abs/2605.01844v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01844v1</guid><description>Lang Gao et al. — arxiv:2605.01844 — NLP</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Enhancing Judgment Document Generation via Agentic Legal Information Collection and Rubric-Guided Optimization</title><link>http://arxiv.org/abs/2605.02011v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02011v1</guid><description>Weihang Su et al. — arxiv:2605.02011 — RAG</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Trojan Hippo: Weaponizing Agent Memory for Data Exfiltration</title><link>http://arxiv.org/abs/2605.01970v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01970v1</guid><description>Debeshee Das et al. — arxiv:2605.01970 — RAG</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Needle-in-RAG: Prompt-Conditioned Character-Level Traceback of Poisoned Spans in Retrieved Evidence</title><link>http://arxiv.org/abs/2605.01782v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01782v1</guid><description>Huining Cui et al. — arxiv:2605.01782 — RAG</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>TrajRAG: Retrieving Geometric-Semantic Experience for Zero-Shot Object Navigation</title><link>http://arxiv.org/abs/2605.01700v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01700v1</guid><description>Yiyao Wang et al. — arxiv:2605.01700 — RAG</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>A Hybrid Retrieval and Reranking Framework for Evidence-Grounded Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.01664v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01664v1</guid><description>Fariba Afrin Irany et al. — arxiv:2605.01664 — RAG</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Trojan Hippo: Weaponizing Agent Memory for Data Exfiltration</title><link>http://arxiv.org/abs/2605.01970v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01970v1</guid><description>Debeshee Das et al. — arxiv:2605.01970 — Long Context</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Stochastic Sparse Attention for Memory-Bound Inference</title><link>http://arxiv.org/abs/2605.01910v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01910v1</guid><description>Kyle Lee et al. — arxiv:2605.01910 — Long Context</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Long Sync Word Frame Synchronization for Future Wireless Networks</title><link>http://arxiv.org/abs/2605.01890v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01890v1</guid><description>Dimitris Nikolaidis et al. — arxiv:2605.01890 — Long Context</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>12 Angry AI Agents: Evaluating Multi-Agent LLM Decision-Making Through Cinematic Jury Deliberation</title><link>http://arxiv.org/abs/2605.01986v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01986v1</guid><description>Ahmet Bahaddin Ersoz et al. — arxiv:2605.01986 — Alignment</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>The Compliance Gap: Why AI Systems Promise to Follow Process Instructions but Don&apos;t</title><link>http://arxiv.org/abs/2605.01771v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01771v1</guid><description>Kwan Soo Shin et al. — arxiv:2605.01771 — Alignment</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Beyond Perplexity: Character Distribution Signatures and the MDTA Benchmark for AI Text Detection</title><link>http://arxiv.org/abs/2605.01647v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01647v1</guid><description>Priyadarshan Narayanasamy et al. — arxiv:2605.01647 — Alignment</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Trojan Hippo: Weaponizing Agent Memory for Data Exfiltration</title><link>http://arxiv.org/abs/2605.01970v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01970v1</guid><description>Debeshee Das et al. — arxiv:2605.01970 — LLM Safety</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Disentangling Intent from Role: Adversarial Self-Play for Persona-Invariant Safety Alignment</title><link>http://arxiv.org/abs/2605.01899v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01899v1</guid><description>Jiajia Li et al. — arxiv:2605.01899 — LLM Safety</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>TrajShield: Trajectory-Level Safety Mediation for Defending Text-to-Video Models Against Jailbreak Attacks</title><link>http://arxiv.org/abs/2605.01761v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01761v1</guid><description>Quanchen Zou et al. — arxiv:2605.01761 — LLM Safety</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Catching the Infection Before It Spreads: Foresight-Guided Defense in Multi-Agent Systems</title><link>http://arxiv.org/abs/2605.01758v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01758v1</guid><description>Yue Ma et al. — arxiv:2605.01758 — LLM Safety</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>MultiBreak: A Scalable and Diverse Multi-turn Jailbreak Benchmark for Evaluating LLM Safety</title><link>http://arxiv.org/abs/2605.01687v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01687v1</guid><description>Jialin Song et al. — arxiv:2605.01687 — LLM Safety</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>A Multimodal Dataset for Visually Grounded Ambiguity in Machine Translation</title><link>http://arxiv.org/abs/2605.02035v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02035v1</guid><description>Jingheng Pan et al. — arxiv:2605.02035 — LLM Evaluation</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Enhancing Judgment Document Generation via Agentic Legal Information Collection and Rubric-Guided Optimization</title><link>http://arxiv.org/abs/2605.02011v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02011v1</guid><description>Weihang Su et al. — arxiv:2605.02011 — LLM Evaluation</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>12 Angry AI Agents: Evaluating Multi-Agent LLM Decision-Making Through Cinematic Jury Deliberation</title><link>http://arxiv.org/abs/2605.01986v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01986v1</guid><description>Ahmet Bahaddin Ersoz et al. — arxiv:2605.01986 — LLM Evaluation</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>SurgCheck: Do Vision-Language Models Really Look at Images in Surgical VQA?</title><link>http://arxiv.org/abs/2605.01911v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01911v1</guid><description>Jongmin Shin et al. — arxiv:2605.01911 — LLM Evaluation</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Maistros: A Greek Large Language Model Adapted Through Knowledge Distillation From Large Reasoning Models</title><link>http://arxiv.org/abs/2605.01870v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01870v1</guid><description>Nikolaos Giarelis et al. — arxiv:2605.01870 — LLM Evaluation</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>VulKey: Automated Vulnerability Repair Guided by Domain-Specific Repair Patterns</title><link>http://arxiv.org/abs/2605.01769v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01769v1</guid><description>Jia Li et al. — arxiv:2605.01769 — Code LLM</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>A Hybrid Retrieval and Reranking Framework for Evidence-Grounded Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.01664v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01664v1</guid><description>Fariba Afrin Irany et al. — arxiv:2605.01664 — Medical NLP</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>Medical NLP</category></item><item><title>Multilingual Safety Alignment via Self-Distillation</title><link>http://arxiv.org/abs/2605.02971v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02971v1</guid><description>Ruiyang Qin et al. — arxiv:2605.02971 — Multilingual NLP</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>TIJERE: A Novel Threat Intelligence Joint Extraction Model Based on Analyst Expert Knowledge</title><link>http://arxiv.org/abs/2605.02041v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02041v1</guid><description>Inoussa Mouiche et al. — arxiv:2605.02041 — Named Entity Recognition</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>Named Entity Recognition</category></item><item><title>BIM Information Extraction Through LLM-based Adaptive Exploration</title><link>http://arxiv.org/abs/2605.01698v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01698v1</guid><description>Sylvain Hellin et al. — arxiv:2605.01698 — Information Extraction</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>Flexi-LoRA with Input-Adaptive Ranks: Efficient Finetuning for Speech and Reasoning Tasks</title><link>http://arxiv.org/abs/2605.01959v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01959v1</guid><description>Zongqian Li et al. — arxiv:2605.01959 — Question Answering</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>TIJERE: A Novel Threat Intelligence Joint Extraction Model Based on Analyst Expert Knowledge</title><link>http://arxiv.org/abs/2605.02041v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.02041v1</guid><description>Inoussa Mouiche et al. — arxiv:2605.02041 — Knowledge Graph</description><pubDate>Sun, 03 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Led to Mislead: Adversarial Content Injection for Attacks on Neural Ranking Models</title><link>http://arxiv.org/abs/2605.01591v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01591v1</guid><description>Amin Bigdeli et al. — arxiv:2605.01591 — RAG</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Strong light-matter interactions in hybrid polaritonic systems</title><link>http://arxiv.org/abs/2605.01583v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01583v1</guid><description>Ben Johns et al. — arxiv:2605.01583 — Tool Use</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>EO-Gym: A Multimodal, Interactive Environment for Earth Observation Agents</title><link>http://arxiv.org/abs/2605.01250v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01250v1</guid><description>Sai Ma et al. — arxiv:2605.01250 — Tool Use</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>S^3-R1: Learning to Retrieve and Answer Step-by-Step with Synthetic Data</title><link>http://arxiv.org/abs/2605.01248v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01248v1</guid><description>Harsh Goel et al. — arxiv:2605.01248 — Tool Use</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>A Theory of Generalization in Deep Learning</title><link>http://arxiv.org/abs/2605.01172v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01172v1</guid><description>Elon Litman et al. — arxiv:2605.01172 — Alignment</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>VisInject: Disruption != Injection -- A Dual-Dimension Evaluation of Universal Adversarial Attacks on Vision-Language Models</title><link>http://arxiv.org/abs/2605.01449v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01449v1</guid><description>Pang Liu et al. — arxiv:2605.01449 — LLM Safety</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Asymmetric Invertible Threat: Learning Reversible Privacy Defense for Face Recognition</title><link>http://arxiv.org/abs/2605.01217v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01217v1</guid><description>Jiabei Zhang et al. — arxiv:2605.01217 — LLM Safety</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Using LLMs in Software Design: An Empirical Study of GitHub and A Practitioner Survey</title><link>http://arxiv.org/abs/2605.01392v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01392v1</guid><description>Yifei Wang et al. — arxiv:2605.01392 — Code LLM</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>MAD-OPD: Breaking the Ceiling in On-Policy Distillation via Multi-Agent Debate</title><link>http://arxiv.org/abs/2605.01347v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01347v1</guid><description>Jianze Wang et al. — arxiv:2605.01347 — Code LLM</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Toward Fair Speech Technologies: A Comprehensive Survey of Bias and Fairness in Speech AI</title><link>http://arxiv.org/abs/2605.01597v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01597v1</guid><description>Yi-Cheng Lin et al. — arxiv:2605.01597 — Speech LLM</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Speech LLM</category></item><item><title>Concepts Whisper While Syntax Shouts: Spectral Anti-Concentration and the Dual Geometry of Transformer Representations</title><link>http://arxiv.org/abs/2605.01609v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01609v1</guid><description>Pratyush Acharya et al. — arxiv:2605.01609 — Multilingual NLP</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Auditing demographic bias in AI-based emergency police dispatch: a cross-lingual evaluation of eleven large language models</title><link>http://arxiv.org/abs/2605.01451v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01451v1</guid><description>William Guey et al. — arxiv:2605.01451 — Multilingual NLP</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Lost in the Tower of Babel: The Adverse Effects of Incidental Multilingualism in LLMs</title><link>http://arxiv.org/abs/2605.01224v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01224v1</guid><description>Anjishnu Mukherjee et al. — arxiv:2605.01224 — Multilingual NLP</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>Medmarks: A Comprehensive Open-Source LLM Benchmark Suite for Medical Tasks</title><link>http://arxiv.org/abs/2605.01417v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01417v1</guid><description>Benjamin Warner et al. — arxiv:2605.01417 — Information Extraction</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>Addressing Data Scarcity in Bangla Fake News Detection: An LLM-Based Dataset Augmentation Approach</title><link>http://arxiv.org/abs/2605.01292v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01292v1</guid><description>Ahmed Alfey Sani et al. — arxiv:2605.01292 — Text Classification</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>Arbitrarily Conditioned Hierarchical Flows for Spatiotemporal Events</title><link>http://arxiv.org/abs/2605.01226v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01226v1</guid><description>Keyan Chen et al. — arxiv:2605.01226 — Text Classification</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>Benchmarking LightGBM and BiLSTM for Sentiment Analysis on Indonesian E-Commerce Reviews</title><link>http://arxiv.org/abs/2605.01322v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01322v1</guid><description>Lidia Natasyah Marpaung et al. — arxiv:2605.01322 — Sentiment Analysis</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>Sentiment Analysis of Mobile Legends App Reviews Using Machine Learning and LSTM-Based Deep Learning Models</title><link>http://arxiv.org/abs/2605.01317v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01317v1</guid><description>Vira Putri Maharani et al. — arxiv:2605.01317 — Sentiment Analysis</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>Enhancing Game Review Sentiment Classification on Steam Platform with Attention-Based BiLSTM</title><link>http://arxiv.org/abs/2605.01315v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01315v1</guid><description>Abit Ahmad Oktarian et al. — arxiv:2605.01315 — Sentiment Analysis</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>KG-First, LLM-Fallback: A Hybrid Microservice for Grounded Skill Search and Explanation</title><link>http://arxiv.org/abs/2605.01582v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01582v1</guid><description>Ngoc Luyen Le et al. — arxiv:2605.01582 — Knowledge Graph</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Actionable Understanding: Action Units for Bridging the Knowledge-Action Gap in Post-FAIR Knowledge Infrastructures</title><link>http://arxiv.org/abs/2605.01564v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01564v1</guid><description>Lars Vogt et al. — arxiv:2605.01564 — Knowledge Graph</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>SciResearcher: Scaling Deep Research Agents for Frontier Scientific Reasoning</title><link>http://arxiv.org/abs/2605.01489v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01489v1</guid><description>Tianshi Zheng et al. — arxiv:2605.01489 — Knowledge Graph</description><pubDate>Sat, 02 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>Directed Social Regard: Surfacing Targeted Advocacy, Opposition, Aid, Harms, and Victimization in Online Media</title><link>http://arxiv.org/abs/2605.00776v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00776v1</guid><description>Scott Friedman et al. — arxiv:2605.00776 — NLP</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>BWLA: Breaking the Barrier of W1AX Post-Training Quantization for LLMs</title><link>http://arxiv.org/abs/2605.00422v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00422v1</guid><description>Zhixiong Zhao et al. — arxiv:2605.00422 — NLP</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>When LLMs Stop Following Steps: A Diagnostic Study of Procedural Execution in Language Models</title><link>http://arxiv.org/abs/2605.00817v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00817v1</guid><description>Sailesh Panda et al. — arxiv:2605.00817 — LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Let ViT Speak: Generative Language-Image Pre-training</title><link>http://arxiv.org/abs/2605.00809v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00809v1</guid><description>Yan Fang et al. — arxiv:2605.00809 — LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Can Coding Agents Reproduce Findings in Computational Materials Science?</title><link>http://arxiv.org/abs/2605.00803v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00803v1</guid><description>Ziyang Huang et al. — arxiv:2605.00803 — LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Generating Statistical Charts with Validation-Driven LLM Workflows</title><link>http://arxiv.org/abs/2605.00800v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00800v1</guid><description>Pavlin G. Poličar et al. — arxiv:2605.00800 — LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>RunAgent: Interpreting Natural-Language Plans with Constraint-Guided Execution</title><link>http://arxiv.org/abs/2605.00798v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00798v1</guid><description>Arunabh Srivastava et al. — arxiv:2605.00798 — LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>When RAG Chatbots Expose Their Backend: An Anonymized Case Study of Privacy and Security Risks in Patient-Facing Medical AI</title><link>http://arxiv.org/abs/2605.00796v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00796v1</guid><description>Alfredo Madrid-García et al. — arxiv:2605.00796 — LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Make Your LVLM KV Cache More Lightweight</title><link>http://arxiv.org/abs/2605.00789v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00789v1</guid><description>Xihao Chen et al. — arxiv:2605.00789 — LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>GeoContra: From Fluent GIS Code to Verifiable Spatial Analysis with Geography-Grounded Repair</title><link>http://arxiv.org/abs/2605.00782v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00782v1</guid><description>Yinhao Xiao et al. — arxiv:2605.00782 — LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Position: agentic AI orchestration should be Bayes-consistent</title><link>http://arxiv.org/abs/2605.00742v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00742v1</guid><description>Theodore Papamarkou et al. — arxiv:2605.00742 — LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Self-Adaptive Multi-Agent LLM-Based Security Pattern Selection for IoT Systems</title><link>http://arxiv.org/abs/2605.00741v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00741v1</guid><description>Saeid Jamshidi et al. — arxiv:2605.00741 — LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM</category></item><item><title>Can Coding Agents Reproduce Findings in Computational Materials Science?</title><link>http://arxiv.org/abs/2605.00803v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00803v1</guid><description>Ziyang Huang et al. — arxiv:2605.00803 — LLM Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>RunAgent: Interpreting Natural-Language Plans with Constraint-Guided Execution</title><link>http://arxiv.org/abs/2605.00798v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00798v1</guid><description>Arunabh Srivastava et al. — arxiv:2605.00798 — LLM Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Simpson&apos;s paradox explains the ubiquity of nonlinear, threshold, and complex contagions</title><link>http://arxiv.org/abs/2605.00791v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00791v1</guid><description>Laurent Hébert-Dufresne et al. — arxiv:2605.00791 — LLM Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Penalized Likelihood for Dyadic Network Formation Models with Degree Heterogeneity</title><link>http://arxiv.org/abs/2605.00771v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00771v1</guid><description>Zizhong Yan et al. — arxiv:2605.00771 — LLM Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Meritocratic Fairness in Budgeted Combinatorial Multi-armed Bandits via Shapley Values</title><link>http://arxiv.org/abs/2605.00762v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00762v1</guid><description>Shradha Sharma et al. — arxiv:2605.00762 — LLM Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>NonZero: Interaction-Guided Exploration for Multi-Agent Monte Carlo Tree Search</title><link>http://arxiv.org/abs/2605.00751v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00751v1</guid><description>Sizhe Tang et al. — arxiv:2605.00751 — LLM Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Position: agentic AI orchestration should be Bayes-consistent</title><link>http://arxiv.org/abs/2605.00742v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00742v1</guid><description>Theodore Papamarkou et al. — arxiv:2605.00742 — LLM Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Self-Adaptive Multi-Agent LLM-Based Security Pattern Selection for IoT Systems</title><link>http://arxiv.org/abs/2605.00741v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00741v1</guid><description>Saeid Jamshidi et al. — arxiv:2605.00741 — LLM Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>To Call or Not to Call: A Framework to Assess and Optimize LLM Tool Calling</title><link>http://arxiv.org/abs/2605.00737v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00737v1</guid><description>Qinyuan Wu et al. — arxiv:2605.00737 — LLM Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>Decentralized Proximal Stochastic Gradient Langevin Dynamics</title><link>http://arxiv.org/abs/2605.00723v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00723v1</guid><description>Mohammad Rafiqul Islam et al. — arxiv:2605.00723 — LLM Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Agent</category></item><item><title>RunAgent: Interpreting Natural-Language Plans with Constraint-Guided Execution</title><link>http://arxiv.org/abs/2605.00798v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00798v1</guid><description>Arunabh Srivastava et al. — arxiv:2605.00798 — Multi-Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Meritocratic Fairness in Budgeted Combinatorial Multi-armed Bandits via Shapley Values</title><link>http://arxiv.org/abs/2605.00762v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00762v1</guid><description>Shradha Sharma et al. — arxiv:2605.00762 — Multi-Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>NonZero: Interaction-Guided Exploration for Multi-Agent Monte Carlo Tree Search</title><link>http://arxiv.org/abs/2605.00751v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00751v1</guid><description>Sizhe Tang et al. — arxiv:2605.00751 — Multi-Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Self-Adaptive Multi-Agent LLM-Based Security Pattern Selection for IoT Systems</title><link>http://arxiv.org/abs/2605.00741v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00741v1</guid><description>Saeid Jamshidi et al. — arxiv:2605.00741 — Multi-Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Learning How and What to Memorize: Cognition-Inspired Two-Stage Optimization for Evolving Memory</title><link>http://arxiv.org/abs/2605.00702v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00702v1</guid><description>Derong Xu et al. — arxiv:2605.00702 — Multi-Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Learning to Act and Cooperate for Distributed Black-Box Consensus Optimization</title><link>http://arxiv.org/abs/2605.00691v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00691v1</guid><description>Zi-Bo Qin et al. — arxiv:2605.00691 — Multi-Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>DySRec: Dynamic Context-Aware Psychometric Scale Recommendation via Multi-Agent Collaboration</title><link>http://arxiv.org/abs/2605.00574v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00574v1</guid><description>Yanzeng Li et al. — arxiv:2605.00574 — Multi-Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Hierarchical Abstract Tree for Cross-Document Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.00529v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00529v1</guid><description>Ziwen Zhao et al. — arxiv:2605.00529 — Multi-Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>SAGA: Workflow-Atomic Scheduling for AI Agent Inference on GPU Clusters</title><link>http://arxiv.org/abs/2605.00528v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00528v1</guid><description>Dongxin Guo et al. — arxiv:2605.00528 — Multi-Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>Scaling Video Understanding via Compact Latent Multi-Agent Collaboration</title><link>http://arxiv.org/abs/2605.00444v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00444v1</guid><description>Kerui Chen et al. — arxiv:2605.00444 — Multi-Agent</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multi-Agent</category></item><item><title>When RAG Chatbots Expose Their Backend: An Anonymized Case Study of Privacy and Security Risks in Patient-Facing Medical AI</title><link>http://arxiv.org/abs/2605.00796v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00796v1</guid><description>Alfredo Madrid-García et al. — arxiv:2605.00796 — RAG</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>BlenderRAG: High-Fidelity 3D Object Generation via Retrieval-Augmented Code Synthesis</title><link>http://arxiv.org/abs/2605.00632v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00632v1</guid><description>Massimo Rondelli et al. — arxiv:2605.00632 — RAG</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>H-RAG at SemEval-2026 Task 8: Hierarchical Parent-Child Retrieval for Multi-Turn RAG Conversations</title><link>http://arxiv.org/abs/2605.00631v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00631v1</guid><description>Passant Elchafei et al. — arxiv:2605.00631 — RAG</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Hierarchical Abstract Tree for Cross-Document Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.00529v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00529v1</guid><description>Ziwen Zhao et al. — arxiv:2605.00529 — RAG</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>LLM-Oriented Information Retrieval: A Denoising-First Perspective</title><link>http://arxiv.org/abs/2605.00505v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00505v1</guid><description>Lu Dai et al. — arxiv:2605.00505 — RAG</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>CleanBase: Detecting Malicious Documents in RAG Knowledge Databases</title><link>http://arxiv.org/abs/2605.00460v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00460v1</guid><description>Weifei Jin et al. — arxiv:2605.00460 — RAG</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Agentic AI for Substance Use Education: Integrating Regulatory and Scientific Knowledge Sources</title><link>http://arxiv.org/abs/2605.00383v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00383v1</guid><description>Kosar Haghani et al. — arxiv:2605.00383 — RAG</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Structure-Aware Chunking for Tabular Data in Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.00318v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00318v1</guid><description>Pooja Guttal et al. — arxiv:2605.00318 — RAG</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Evaluating the Architectural Reasoning Capabilities of LLM Provers via the Obfuscated Natural Number Game</title><link>http://arxiv.org/abs/2605.00677v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00677v1</guid><description>Lixing Li et al. — arxiv:2605.00677 — Reasoning</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Thinking in Text and Images: Interleaved Vision--Language Reasoning Traces for Long-Horizon Robot Manipulation</title><link>http://arxiv.org/abs/2605.00438v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00438v1</guid><description>Jinkun Liu et al. — arxiv:2605.00438 — Reasoning</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Social Bias in LLM-Generated Code: Benchmark and Mitigation</title><link>http://arxiv.org/abs/2605.00382v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00382v1</guid><description>Fazle Rabbi et al. — arxiv:2605.00382 — Reasoning</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>ResRL: Boosting LLM Reasoning via Negative Sample Projection Residual Reinforcement Learning</title><link>http://arxiv.org/abs/2605.00380v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00380v1</guid><description>Zihan Lin et al. — arxiv:2605.00380 — Reasoning</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>To Call or Not to Call: A Framework to Assess and Optimize LLM Tool Calling</title><link>http://arxiv.org/abs/2605.00737v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00737v1</guid><description>Qinyuan Wu et al. — arxiv:2605.00737 — Tool Use</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>ResRL: Boosting LLM Reasoning via Negative Sample Projection Residual Reinforcement Learning</title><link>http://arxiv.org/abs/2605.00380v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00380v1</guid><description>Zihan Lin et al. — arxiv:2605.00380 — Tool Use</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>AgentFloor: How Far Up the tool use Ladder Can Small Open-Weight Models Go?</title><link>http://arxiv.org/abs/2605.00334v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00334v1</guid><description>Ranit Karmakar et al. — arxiv:2605.00334 — Tool Use</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>On Aubry&apos;s completeness conjecture</title><link>http://arxiv.org/abs/2605.00305v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00305v1</guid><description>Tianqi Shi et al. — arxiv:2605.00305 — Tool Use</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>A Low-Latency Fraud Detection Layer for Detecting Adversarial Interaction Patterns in LLM-Powered Agents</title><link>http://arxiv.org/abs/2605.01143v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01143v1</guid><description>Sheldon Yu et al. — arxiv:2605.01143 — Tool Use</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>Persistent Visual Memory: Sustaining Perception for Deep Generation in LVLMs</title><link>http://arxiv.org/abs/2605.00814v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00814v1</guid><description>Siyuan Huang et al. — arxiv:2605.00814 — Multimodal LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Let ViT Speak: Generative Language-Image Pre-training</title><link>http://arxiv.org/abs/2605.00809v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00809v1</guid><description>Yan Fang et al. — arxiv:2605.00809 — Multimodal LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Generating Statistical Charts with Validation-Driven LLM Workflows</title><link>http://arxiv.org/abs/2605.00800v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00800v1</guid><description>Pavlin G. Poličar et al. — arxiv:2605.00800 — Multimodal LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Make Your LVLM KV Cache More Lightweight</title><link>http://arxiv.org/abs/2605.00789v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00789v1</guid><description>Xihao Chen et al. — arxiv:2605.00789 — Multimodal LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>STARE: Step-wise Temporal Alignment and Red-teaming Engine for Multi-modal Toxicity Attack</title><link>http://arxiv.org/abs/2605.00699v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00699v1</guid><description>Xutao Mao et al. — arxiv:2605.00699 — Multimodal LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Intrinsic Gradient Suppression for Label-Noise Prompt Tuning in Vision-Language Models</title><link>http://arxiv.org/abs/2605.00591v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00591v1</guid><description>Jiayu Li et al. — arxiv:2605.00591 — Multimodal LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Jailbreaking Vision-Language Models Through the Visual Modality</title><link>http://arxiv.org/abs/2605.00583v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00583v1</guid><description>Aharon Azulay et al. — arxiv:2605.00583 — Multimodal LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Leveraging Vision-Language Models as Weak Annotators in Active Learning</title><link>http://arxiv.org/abs/2605.00480v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00480v1</guid><description>Phuong Ngoc Nguyen et al. — arxiv:2605.00480 — Multimodal LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Scaling Video Understanding via Compact Latent Multi-Agent Collaboration</title><link>http://arxiv.org/abs/2605.00444v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00444v1</guid><description>Kerui Chen et al. — arxiv:2605.00444 — Multimodal LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Thinking in Text and Images: Interleaved Vision--Language Reasoning Traces for Long-Horizon Robot Manipulation</title><link>http://arxiv.org/abs/2605.00438v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00438v1</guid><description>Jinkun Liu et al. — arxiv:2605.00438 — Multimodal LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multimodal LLM</category></item><item><title>Learning How and What to Memorize: Cognition-Inspired Two-Stage Optimization for Evolving Memory</title><link>http://arxiv.org/abs/2605.00702v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00702v1</guid><description>Derong Xu et al. — arxiv:2605.00702 — Long Context</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Scaling Video Understanding via Compact Latent Multi-Agent Collaboration</title><link>http://arxiv.org/abs/2605.00444v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00444v1</guid><description>Kerui Chen et al. — arxiv:2605.00444 — Long Context</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>MemRouter: Memory-as-Embedding Routing for Long-Term Conversational Agents</title><link>http://arxiv.org/abs/2605.00356v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00356v1</guid><description>Tianyu Hu et al. — arxiv:2605.00356 — Long Context</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Budget-Aware Routing for Long Clinical Text</title><link>http://arxiv.org/abs/2605.00336v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00336v1</guid><description>Khizar Qureshi et al. — arxiv:2605.00336 — Long Context</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>The structure of gauge invariant Gaussian quantum operations on finite Fermion systems</title><link>http://arxiv.org/abs/2605.00784v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00784v1</guid><description>Eric A. Carlen et al. — arxiv:2605.00784 — LLM Efficiency</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>UniVidX: A Unified Multimodal Framework for Versatile Video Generation via Diffusion Priors</title><link>http://arxiv.org/abs/2605.00658v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00658v1</guid><description>Houyuan Chen et al. — arxiv:2605.00658 — LLM Efficiency</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Budget Constraints as Riemannian Manifolds</title><link>http://arxiv.org/abs/2605.00649v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00649v1</guid><description>Michael Helcig et al. — arxiv:2605.00649 — LLM Efficiency</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Faithful Extreme Image Rescaling with Learnable Reversible Transformation and Semantic Priors</title><link>http://arxiv.org/abs/2605.00605v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00605v1</guid><description>Hao Wei et al. — arxiv:2605.00605 — LLM Efficiency</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Fast and Exact: Asymptotically Linear KL-Optimal Frequency Normalization</title><link>http://arxiv.org/abs/2605.00579v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00579v1</guid><description>Kamila Szewczyk et al. — arxiv:2605.00579 — LLM Efficiency</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Quantum corrections to the Josephson dynamics: a population-imbalance approach</title><link>http://arxiv.org/abs/2605.00571v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00571v1</guid><description>Oliver Hideg et al. — arxiv:2605.00571 — LLM Efficiency</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>AGoQ: Activation and Gradient Quantization for Memory-Efficient Distributed Training of LLMs</title><link>http://arxiv.org/abs/2605.00539v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00539v1</guid><description>Wenxiang Lin et al. — arxiv:2605.00539 — LLM Efficiency</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>Silicon Showdown: Performance, Efficiency, and Ecosystem Barriers in Consumer-Grade LLM Inference</title><link>http://arxiv.org/abs/2605.00519v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00519v1</guid><description>Allan Kazakov et al. — arxiv:2605.00519 — LLM Efficiency</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>BWLA: Breaking the Barrier of W1AX Post-Training Quantization for LLMs</title><link>http://arxiv.org/abs/2605.00422v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00422v1</guid><description>Zhixiong Zhao et al. — arxiv:2605.00422 — LLM Efficiency</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>RadLite: Multi-Task LoRA Fine-Tuning of Small Language Models for CPU-Deployable Radiology AI</title><link>http://arxiv.org/abs/2605.00421v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00421v1</guid><description>Pankaj Gupta et al. — arxiv:2605.00421 — LLM Efficiency</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Efficiency</category></item><item><title>DynamicPO: Dynamic Preference Optimization for Recommendation</title><link>http://arxiv.org/abs/2605.00327v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00327v1</guid><description>Xingyu Hu et al. — arxiv:2605.00327 — Alignment</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Iterative Finetuning is Mostly Idempotent</title><link>http://arxiv.org/abs/2605.01130v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01130v1</guid><description>Zephaniah Roe et al. — arxiv:2605.01130 — Alignment</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>PERSA: Reinforcement Learning for Professor-Style Personalized Feedback with LLMs</title><link>http://arxiv.org/abs/2605.01123v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01123v1</guid><description>Ravi Ranjan et al. — arxiv:2605.01123 — Alignment</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>When LLMs Stop Following Steps: A Diagnostic Study of Procedural Execution in Language Models</title><link>http://arxiv.org/abs/2605.00817v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00817v1</guid><description>Sailesh Panda et al. — arxiv:2605.00817 — Hallucination</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>A Geometric Interpretation of Generalized Hurwitz--Radon Numbers Defined by Kannaka--Tojo</title><link>http://arxiv.org/abs/2605.00704v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00704v1</guid><description>Muneto Miyaji et al. — arxiv:2605.00704 — Hallucination</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>From Prediction to Practice: A Task-Aware Evaluation Framework for Blood Glucose Forecasting</title><link>http://arxiv.org/abs/2605.00645v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00645v1</guid><description>Alireza Namazi et al. — arxiv:2605.00645 — Hallucination</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Class Angular Distortion Index for Dimensionality Reduction</title><link>http://arxiv.org/abs/2605.00637v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00637v1</guid><description>Kaviru Gunaratne et al. — arxiv:2605.00637 — Hallucination</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>H-RAG at SemEval-2026 Task 8: Hierarchical Parent-Child Retrieval for Multi-Turn RAG Conversations</title><link>http://arxiv.org/abs/2605.00631v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00631v1</guid><description>Passant Elchafei et al. — arxiv:2605.00631 — Hallucination</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>Faithful Extreme Image Rescaling with Learnable Reversible Transformation and Semantic Priors</title><link>http://arxiv.org/abs/2605.00605v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00605v1</guid><description>Hao Wei et al. — arxiv:2605.00605 — Hallucination</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>LLM-Oriented Information Retrieval: A Denoising-First Perspective</title><link>http://arxiv.org/abs/2605.00505v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00505v1</guid><description>Lu Dai et al. — arxiv:2605.00505 — Hallucination</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>From Local to Global to Mechanistic: An iERF-Centered Unified Framework for Interpreting Vision Models</title><link>http://arxiv.org/abs/2605.00474v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00474v1</guid><description>Yearim Kim et al. — arxiv:2605.00474 — Hallucination</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>ReLay: Personalized LLM-Generated Plain-Language Summaries for Better Understanding, but at What Cost?</title><link>http://arxiv.org/abs/2605.00468v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00468v1</guid><description>Joey Chan et al. — arxiv:2605.00468 — Hallucination</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>LIMSSR: LLM-Driven Sequence-to-Score Reasoning under Training-Time Incomplete Multimodal Observations</title><link>http://arxiv.org/abs/2605.00434v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00434v1</guid><description>Huangbiao Xu et al. — arxiv:2605.00434 — Hallucination</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Hallucination</category></item><item><title>FinSafetyBench: Evaluating LLM Safety in Real-World Financial Scenarios</title><link>http://arxiv.org/abs/2605.00706v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00706v1</guid><description>Yutao Hou et al. — arxiv:2605.00706 — LLM Safety</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>STARE: Step-wise Temporal Alignment and Red-teaming Engine for Multi-modal Toxicity Attack</title><link>http://arxiv.org/abs/2605.00699v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00699v1</guid><description>Xutao Mao et al. — arxiv:2605.00699 — LLM Safety</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Jailbreaking Vision-Language Models Through the Visual Modality</title><link>http://arxiv.org/abs/2605.00583v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00583v1</guid><description>Aharon Azulay et al. — arxiv:2605.00583 — LLM Safety</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Stable-GFlowNet: Toward Diverse and Robust LLM Red-Teaming via Contrastive Trajectory Balance</title><link>http://arxiv.org/abs/2605.00553v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00553v1</guid><description>Minchan Kwon et al. — arxiv:2605.00553 — LLM Safety</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Disciplined Diffusion: Text-to-Image Diffusion Model against NSFW Generation</title><link>http://arxiv.org/abs/2605.01113v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01113v1</guid><description>Chi Zhang et al. — arxiv:2605.01113 — LLM Safety</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>SRTJ: Self-Evolving Rule-Driven Training-Free LLM Jailbreaking</title><link>http://arxiv.org/abs/2605.00974v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00974v1</guid><description>Jindong Li et al. — arxiv:2605.00974 — LLM Safety</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Agent Capsules: Quality-Gated Granularity Control for Multi-Agent LLM Pipelines</title><link>http://arxiv.org/abs/2605.00410v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00410v1</guid><description>Aninda Ray et al. — arxiv:2605.00410 — LLM Evaluation</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>Negative Data Mining for Contrastive Learning in Dense Retrieval at IKEA.com</title><link>http://arxiv.org/abs/2605.00353v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00353v1</guid><description>Eva Agapaki et al. — arxiv:2605.00353 — LLM Evaluation</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>RunAgent: Interpreting Natural-Language Plans with Constraint-Guided Execution</title><link>http://arxiv.org/abs/2605.00798v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00798v1</guid><description>Arunabh Srivastava et al. — arxiv:2605.00798 — Code LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Themis: Training Robust Multilingual Code Reward Models for Flexible Multi-Criteria Scoring</title><link>http://arxiv.org/abs/2605.00754v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00754v1</guid><description>Indraneil Paul et al. — arxiv:2605.00754 — Code LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Improving LLM Code Generation via Requirement-Aware Curriculum Reinforcement Learning</title><link>http://arxiv.org/abs/2605.00433v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00433v1</guid><description>Shouyu Yin et al. — arxiv:2605.00433 — Code LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>Social Bias in LLM-Generated Code: Benchmark and Mitigation</title><link>http://arxiv.org/abs/2605.00382v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00382v1</guid><description>Fazle Rabbi et al. — arxiv:2605.00382 — Code LLM</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Code LLM</category></item><item><title>ML-Bench&amp;Guard: Policy-Grounded Multilingual Safety Benchmark and Guardrail for Large Language Models</title><link>http://arxiv.org/abs/2605.00689v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00689v1</guid><description>Yunhan Zhao et al. — arxiv:2605.00689 — Legal NLP</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Legal NLP</category></item><item><title>Teaching LLMs Brazilian Healthcare: Injecting Knowledge from Official Clinical Guidelines</title><link>http://arxiv.org/abs/2605.01077v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01077v1</guid><description>Hugo Abonizio et al. — arxiv:2605.01077 — Medical NLP</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Medical NLP</category></item><item><title>Themis: Training Robust Multilingual Code Reward Models for Flexible Multi-Criteria Scoring</title><link>http://arxiv.org/abs/2605.00754v2</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00754v2</guid><description>Indraneil Paul et al. — arxiv:2605.00754 — Multilingual NLP</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>SC-Taxo: Hierarchical Taxonomy Generation under Semantic Consistency Constraints using Large Language Models</title><link>http://arxiv.org/abs/2605.00620v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00620v1</guid><description>Shiqiang Cai et al. — arxiv:2605.00620 — Multilingual NLP</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Multilingual NLP</category></item><item><title>MMAudio-LABEL: Audio Event Labeling via Audio Generation for Silent Video</title><link>http://arxiv.org/abs/2605.00495v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00495v1</guid><description>Kazuya Tateishi et al. — arxiv:2605.00495 — Text Classification</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>A Sentence Relation-Based Approach to Sanitizing Malicious Instructions</title><link>http://arxiv.org/abs/2605.01078v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.01078v1</guid><description>Soumil Datta et al. — arxiv:2605.01078 — Text Classification</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>Generating Statistical Charts with Validation-Driven LLM Workflows</title><link>http://arxiv.org/abs/2605.00800v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00800v1</guid><description>Pavlin G. Poličar et al. — arxiv:2605.00800 — Question Answering</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Hierarchical Abstract Tree for Cross-Document Retrieval-Augmented Generation</title><link>http://arxiv.org/abs/2605.00529v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00529v1</guid><description>Ziwen Zhao et al. — arxiv:2605.00529 — Question Answering</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>The Power of Order: Fooling LLMs with Adversarial Table Permutations</title><link>http://arxiv.org/abs/2605.00445v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00445v1</guid><description>Xinshuai Dong et al. — arxiv:2605.00445 — Question Answering</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>MemRouter: Memory-as-Embedding Routing for Long-Term Conversational Agents</title><link>http://arxiv.org/abs/2605.00356v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00356v1</guid><description>Tianyu Hu et al. — arxiv:2605.00356 — Question Answering</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item><item><title>Directed Social Regard: Surfacing Targeted Advocacy, Opposition, Aid, Harms, and Victimization in Online Media</title><link>http://arxiv.org/abs/2605.00776v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00776v1</guid><description>Scott Friedman et al. — arxiv:2605.00776 — Sentiment Analysis</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Sentiment Analysis</category></item><item><title>ARIS: Agentic and Relationship Intelligence System for Social Robots</title><link>http://arxiv.org/abs/2605.00943v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00943v1</guid><description>Stavya Datta et al. — arxiv:2605.00943 — Knowledge Graph</description><pubDate>Fri, 01 May 2026 00:00:00 GMT</pubDate><category>Knowledge Graph</category></item><item><title>NLPOpt-Net: A Learning Method for Nonlinear Optimization with Feasibility Guarantees</title><link>http://arxiv.org/abs/2605.00260v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00260v1</guid><description>Bimol Nath Roy et al. — arxiv:2605.00260 — NLP</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Estimating LLM Grading Ability and Response Difficulty in Automatic Short Answer Grading via Item Response Theory</title><link>http://arxiv.org/abs/2605.00238v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00238v1</guid><description>Longwei Cong et al. — arxiv:2605.00238 — NLP</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>NorBERTo: A ModernBERT Model Trained for Portuguese with 331 Billion Tokens Corpus</title><link>http://arxiv.org/abs/2605.00086v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00086v1</guid><description>Enzo S. N. Silva et al. — arxiv:2605.00086 — NLP</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>NLP</category></item><item><title>Retrieval-Augmented Reasoning for Chartered Accountancy</title><link>http://arxiv.org/abs/2605.00257v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00257v1</guid><description>Jatin Gupta et al. — arxiv:2605.00257 — RAG</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>RAG</category></item><item><title>Retrieval-Augmented Reasoning for Chartered Accountancy</title><link>http://arxiv.org/abs/2605.00257v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00257v1</guid><description>Jatin Gupta et al. — arxiv:2605.00257 — Reasoning</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>TUR-DPO: Topology- and Uncertainty-Aware Direct Preference Optimization</title><link>http://arxiv.org/abs/2605.00224v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00224v1</guid><description>Abdulhady Abas Abdullah et al. — arxiv:2605.00224 — Reasoning</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>Reasoning</category></item><item><title>Are Tools All We Need? Unveiling the Tool-Use Tax in LLM Agents</title><link>http://arxiv.org/abs/2605.00136v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00136v1</guid><description>Kaituo Zhang et al. — arxiv:2605.00136 — Tool Use</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>TADI: Tool-Augmented Drilling Intelligence via Agentic LLM Orchestration over Heterogeneous Wellsite Data</title><link>http://arxiv.org/abs/2605.00060v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00060v1</guid><description>Rong Lu et al. — arxiv:2605.00060 — Tool Use</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>Tool Use</category></item><item><title>TUR-DPO: Topology- and Uncertainty-Aware Direct Preference Optimization</title><link>http://arxiv.org/abs/2605.00224v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00224v1</guid><description>Abdulhady Abas Abdullah et al. — arxiv:2605.00224 — Long Context</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>NorBERTo: A ModernBERT Model Trained for Portuguese with 331 Billion Tokens Corpus</title><link>http://arxiv.org/abs/2605.00086v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00086v1</guid><description>Enzo S. N. Silva et al. — arxiv:2605.00086 — Long Context</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>Long Context</category></item><item><title>Attention Is Where You Attack</title><link>http://arxiv.org/abs/2605.00236v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00236v1</guid><description>Aviral Srivastava et al. — arxiv:2605.00236 — Alignment</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>TUR-DPO: Topology- and Uncertainty-Aware Direct Preference Optimization</title><link>http://arxiv.org/abs/2605.00224v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00224v1</guid><description>Abdulhady Abas Abdullah et al. — arxiv:2605.00224 — Alignment</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>Wasserstein Distributionally Robust Regret Optimization for Reinforcement Learning from Human Feedback</title><link>http://arxiv.org/abs/2605.00155v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00155v1</guid><description>Yikai Wang et al. — arxiv:2605.00155 — Alignment</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>Alignment</category></item><item><title>How Language Models Process Out-of-Distribution Inputs: A Two-Pathway Framework</title><link>http://arxiv.org/abs/2605.00269v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00269v1</guid><description>Hamidreza Saghir et al. — arxiv:2605.00269 — LLM Safety</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Jailbroken Frontier Models Retain Their Capabilities</title><link>http://arxiv.org/abs/2605.00267v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00267v1</guid><description>Daniel Zhu et al. — arxiv:2605.00267 — LLM Safety</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Attention Is Where You Attack</title><link>http://arxiv.org/abs/2605.00236v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00236v1</guid><description>Aviral Srivastava et al. — arxiv:2605.00236 — LLM Safety</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>Minimal, Local, Causal Explanations for Jailbreak Success in Large Language Models</title><link>http://arxiv.org/abs/2605.00123v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00123v1</guid><description>Shubham Kumar et al. — arxiv:2605.00123 — LLM Safety</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>LLM Safety</category></item><item><title>ARMOR 2025: A Military-Aligned Benchmark for Evaluating Large Language Model Safety Beyond Civilian Contexts</title><link>http://arxiv.org/abs/2605.00245v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00245v1</guid><description>Sydney Johns et al. — arxiv:2605.00245 — LLM Evaluation</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>TUR-DPO: Topology- and Uncertainty-Aware Direct Preference Optimization</title><link>http://arxiv.org/abs/2605.00224v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00224v1</guid><description>Abdulhady Abas Abdullah et al. — arxiv:2605.00224 — LLM Evaluation</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>How Frontier LLMs Adapt to Neurodivergence Context: A Measurement Framework for Surface vs. Structural Change in System-Prompted Responses</title><link>http://arxiv.org/abs/2605.00113v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00113v1</guid><description>Ishan Gupta et al. — arxiv:2605.00113 — LLM Evaluation</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>CRC-Screen: Certified DNA-Synthesis Hazard Screening Under Taxonomic Shift</title><link>http://arxiv.org/abs/2605.00074v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00074v1</guid><description>Najmul Hasan et al. — arxiv:2605.00074 — LLM Evaluation</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>LLM Evaluation</category></item><item><title>ViLegalNLI: Natural Language Inference for Vietnamese Legal Texts</title><link>http://arxiv.org/abs/2605.00116v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00116v1</guid><description>Nhung Thi-Hong Duong et al. — arxiv:2605.00116 — Legal NLP</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>Legal NLP</category></item><item><title>Sequential Measurements as a Resource for Quantum Metrology</title><link>http://arxiv.org/abs/2605.00287v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00287v1</guid><description>Koray Mentesoglu et al. — arxiv:2605.00287 — Information Extraction</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>Information Extraction</category></item><item><title>ViLegalNLI: Natural Language Inference for Vietnamese Legal Texts</title><link>http://arxiv.org/abs/2605.00116v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00116v1</guid><description>Nhung Thi-Hong Duong et al. — arxiv:2605.00116 — Text Classification</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>Text Classification</category></item><item><title>TUR-DPO: Topology- and Uncertainty-Aware Direct Preference Optimization</title><link>http://arxiv.org/abs/2605.00224v1</link><guid isPermaLink="true">http://arxiv.org/abs/2605.00224v1</guid><description>Abdulhady Abas Abdullah et al. — arxiv:2605.00224 — Question Answering</description><pubDate>Thu, 30 Apr 2026 00:00:00 GMT</pubDate><category>Question Answering</category></item></channel></rss>