@inproceedings{zou2026informativeness,title={When Vision-Language Models Judge Without Seeing: Exposing Informativeness Bias},author={Zou, X. and Sridhar, R. and Safarzadeh, M. and Roth, D.},booktitle={Proceedings of the Annual Meeting of the Association for Computational Linguistics},note={Main Conference},year={2026},eprint={2604.17768},archiveprefix={arXiv},}
@inproceedings{safarzadeh2026spence,title={{SPENCE}: A Syntactic Probe for Detecting Contamination in {NL2SQL} Benchmarks},author={Safarzadeh, M. and Patel, H. Laxmichand and Oroojlooy, A. and Horwood, G. and Roth, D.},booktitle={Proceedings of the Annual Meeting of the Association for Computational Linguistics},note={Main Conference},year={2026},eprint={2604.17771},archiveprefix={arXiv},}
2025
EMNLP
Evaluating NL2SQL via SQL2NL
Mohammadtaher Safarzadeh, Afshin Oroojlooyjadid, and Dan Roth
We propose a schema-aligned paraphrasing framework that leverages SQL-to-NL (SQL2NL) to automatically generate semantically equivalent, lexically diverse queries for robust evaluation of NL2SQL models. Our analysis reveals that state-of-the-art models are far more brittle than standard benchmarks suggest.
@inproceedings{safarzadeh2025nl2sql,title={Evaluating {NL2SQL} via {SQL2NL}},author={Safarzadeh, Mohammadtaher and Oroojlooyjadid, Afshin and Roth, Dan},booktitle={Findings of EMNLP},year={2025},url={https://aclanthology.org/2025.findings-emnlp.1031},}