Interface administrators, Administrators (Semantic MediaWiki), Curators (Semantic MediaWiki), Editors (Semantic MediaWiki), Suppressors, Administrators
7,785
edits
No edit summary |
No edit summary |
||
Line 193: | Line 193: | ||
| 1-shot PaLM | | 1-shot PaLM | ||
| 88.4 | | 88.4 | ||
|} | |||
==Benmarks (Visual)== | |||
{| class="wikitable" | |||
! Benchmark | |||
! GPT-4 | |||
! Evaluated few-shot | |||
! Few-shot SOTA | |||
! SOTA | |||
! Best external model (includes benchmark-specific training) | |||
|- | |||
| VQAv2 | |||
| 77.2% | |||
| 0-shot | |||
| 67.6% | |||
| Flamingo 32-shot | |||
| 84.3% | |||
| PaLI-17B | |||
|- | |||
| TextVQA | |||
| 78.0% | |||
| 0-shot | |||
| 37.9% | |||
| Flamingo 32-shot | |||
| 71.8% | |||
| PaLI-17B | |||
|- | |||
| ChartQA | |||
| 78.5%A | |||
| - | |||
| 58.6% | |||
| Pix2Struct Large | |||
| - | |||
|- | |||
| AI2 Diagram (AI2D) | |||
| 78.2% | |||
| 0-shot | |||
| - | |||
| 42.1% | |||
| Pix2Struct Large | |||
| - | |||
|- | |||
| DocVQA | |||
| 88.4% | |||
| 0-shot (pixel-only) | |||
| - | |||
| 88.4% | |||
| ERNIE-Layout 2.0 | |||
| - | |||
|- | |||
| Infographic VQA | |||
| 75.1% | |||
| 0-shot (pixel-only) | |||
| - | |||
| 61.2% | |||
| Applica.ai TILT | |||
| - | |||
|- | |||
| TVQA | |||
| 87.3% | |||
| 0-shot | |||
| - | |||
| 86.5% | |||
| MERLOT Reserve Large | |||
| - | |||
|- | |||
| LSMDC | |||
| 45.7% | |||
| 0-shot | |||
| 31.0% | |||
| MERLOT Reserve 0-shot | |||
| 52.9% | |||
| MERLOT | |||
|} | |} |