@misc{189626, keywords = {Courts of Appeals, large language models, judicial ideology, panel effects}, author = {Eitan Sapiro-Gheiler and Jonathan P. Kastellec}, title = {Appealing to Large-Language-Model-as-Judge: A Comprehensive Machine-Coded Database for the U.S. Courts of Appeals}, abstract = {

Research on the Courts of Appeals has been limited by the lack of a universe-level database covering the courts{\textquoteright} 440,000 published opinions from 1892 to 2025. Using a multiple-step large language model approach, we create such a database, including metadata (e.g., the judges involved in each case), summary information (e.g., substantive and procedural issues raised, litigant types), and the decision{\textquoteright}s ideological direction (liberal or conservative). We validate our database against multiple sources, showing that our LLM output matches human coders 85-90\% on key summary variables and 80\% of the time on ideological direction. The new database will enable much more comprehensive analyses of the Courts of Appeals over time; as examples, we extend existing work on panel effects and ideological decision-making. More generally, the approach we take provides a pipeline for converting expert-written codebooks into machine-extractable facts, which has relevance for computational social science beyond judicial politics.

}, year = {2026}, month = {06/2026}, }