timmy-home/research/poka-yoke/references.bib

@article{liu2023agentbench,
  title={AgentBench: Evaluating LLMs as Agents},
  author={Liu, Xiao and Yu, Hao and Zhang, Hanchen and Xu, Yifan and Lei, Xuanyu and Lai, Hanyu and Gu, Yu and Ding, Hangliang and Men, Kaiwen and Yang, Kejuan and others},
  journal={arXiv preprint arXiv:2308.03688},
  year={2023}
}

@article{zhang2025swebench,
  title={SWE-bench Goes Live!},
  author={Zhang, Linghao and He, Shilin and Zhang, Chaoyun and Kang, Yu and Li, Bowen and Xie, Chengxing and Wang, Junhao and Wang, Maoquan and Huang, Yufan and Fu, Shengyu and others},
  journal={arXiv preprint arXiv:2505.23419},
  year={2025}
}

@article{pan2024swegym,
  title={Training Software Engineering Agents and Verifiers with SWE-Gym},
  author={Pan, Jiayi and Wang, Xingyao and Neubig, Graham and Jaitly, Navdeep and Ji, Heng and Suhr, Alane and Zhang, Yizhe},
  journal={arXiv preprint arXiv:2412.21139},
  year={2024}
}

@article{aleithan2024swebenchplus,
  title={SWE-Bench+: Enhanced Coding Benchmark for LLMs},
  author={Aleithan, Reem and Xue, Haoran and Mohajer, Mohammad Mahdi and Nnorom, Elijah and Uddin, Gias and Wang, Song},
  journal={arXiv preprint arXiv:2410.06992},
  year={2024}
}

@article{willard2023outlines,
  title={Efficient Guided Generation for LLMs},
  author={Willard, Brandon T and Louf, R{\'e}mi},
  journal={arXiv preprint arXiv:2307.09702},
  year={2023}
}

@article{guidance2023,
  title={Guidance: Efficient Structured Generation for Language Models},
  author={Lundberg, Scott and others},
  journal={arXiv preprint},
  year={2023}
}

@article{liu2024instructor,
  title={Instructor: Structured LLM Outputs with Pydantic},
  author={Liu, Jason},
  journal={GitHub repository},
  year={2024}
}

@book{shingo1986zero,
  title={Zero Quality Control: Source Inspection and the Poka-Yoke System},
  author={Shingo, Shigeo},
  publisher={Productivity Press},
  year={1986}
}

@article{nypi2014orthodox,
  title={Orthodox Fault Tolerance},
  author={Nypi, Jouni},
  journal={arXiv preprint arXiv:1401.2519},
  year={2014}
}

@inproceedings{madry2018towards,
  title={Towards Deep Learning Models Resistant to Adversarial Attacks},
  author={Madry, Aleksander and Makelov, Aleksandar and Schmidt, Ludwig and Tsipras, Dimitris and Vladu, Adrian},
  booktitle={ICLR},
  year={2018}
}

@article{li2023aibughunter,
  title={AIBugHunter: AI-Driven Bug Detection in Software},
  author={Li, Zhen and others},
  journal={arXiv preprint arXiv:2305.04521},
  year={2023}
}

@article{yu2026benchmarking,
  title={Benchmarking LLM Tool-Use in the Wild},
  author={Yu, Peijie and Liu, Wei and Yang, Yifan and Li, Jinjian and Zhang, Zelong and Feng, Xiao and Zhang, Feng},
  journal={arXiv preprint},
  year={2026}
}

@article{mialon2023augmented,
  title={Augmented Language Models: a Survey},
  author={Mialon, Gr{\'e}goire and Dess{\`\i}, Roberto and Lomeli, Maria and Christoforou, Christos and Lample, Guillaume and Scialom, Thomas},
  journal={arXiv preprint arXiv:2302.07842},
  year={2023}
}

@article{schick2024toolformer,
  title={Toolformer: Language Models Can Teach Themselves to Use Tools},
  author={Schick, Timo and Dwivedi-Yu, Jane and Dess{\`\i}, Robert and Raileanu, Roberta and Lomeli, Maria and Hambro, Eric and Zettlemoyer, Luke and Cancedda, Nicola and Scialom, Thomas},
  journal={NeurIPS},
  year={2024}
}

@article{parisi2022webgpt,
  title={WebGPT: Browser-Assisted Question-Answering with Human Feedback},
  author={Parisi, Aaron and Zhao, Yao and Fiedel, Noah},
  journal={arXiv preprint arXiv:2112.09332},
  year={2022}
}