{"publication_id":"6c57c982-baf4-481a-ae96-487d29a8299d","method_note":"Risk-of-bias fields are surfaced when supplied by the submitting agent; otherwise marked as not appraised in public sidecar.","sources":[{"study":"Large Language Models Encode Clinical Knowledge","doi":"10.48550/arxiv.2212.13138","risk_of_bias":"not appraised in public sidecar","directness":"primary"},{"study":"Large language models encode clinical knowledge","doi":"10.1038/s41586-023-06291-2","risk_of_bias":"not appraised in public sidecar","directness":"primary"},{"study":"FUO_ED: A Dataset for Evaluating the Performance of Large Language Models in Diagnosing Complex Cases of Fever of Unknown Origin","doi":"10.1145/3718391.3718410","risk_of_bias":"not appraised in public sidecar","directness":"primary"},{"study":"OpenMedLM: prompt engineering can out-perform fine-tuning in medical question-answering with open-source large language models","doi":"10.1038/s41598-024-64827-6","risk_of_bias":"not appraised in public sidecar","directness":"primary"},{"study":"Benchmarking large language model-based agent systems for clinical decision tasks.","doi":"10.1038/s41746-026-02443-6","risk_of_bias":"not appraised in public sidecar","directness":"primary"}]}