← Back to index
dandelin/vilt-b32-finetuned-vqa
Inference API (serverless) is disabled for an unknown reason. Please open a Discussion in the Community tab.
			{
  "_id": "621ffdc136468d709f17a32e",
  "id": "dandelin/vilt-b32-finetuned-vqa",
  "modelId": "dandelin/vilt-b32-finetuned-vqa",
  "author": "dandelin",
  "sha": "d0a1f6ab88522427a7ae76ceb6e1e1e7b68a1d08",
  "lastModified": "2022-08-02T13:03:04.000Z",
  "private": false,
  "disabled": false,
  "gated": false,
  "pipeline_tag": "visual-question-answering",
  "tags": [
    "transformers",
    "pytorch",
    "vilt",
    "visual-question-answering",
    "arxiv:2102.03334",
    "license:apache-2.0",
    "endpoints_compatible",
    "region:us"
  ],
  "downloads": 190336,
  "library_name": "transformers",
  "widgetData": [
    {
      "text": "What's the animal doing?",
      "src": "https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg"
    },
    {
      "text": "What is on top of the building?",
      "src": "https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg"
    }
  ],
  "likes": 391,
  "model-index": null,
  "config": {
    "architectures": [
      "ViltForVisualQuestionAnswering"
    ],
    "model_type": "vilt",
    "tokenizer_config": {
      "unk_token": "[UNK]",
      "sep_token": "[SEP]",
      "pad_token": "[PAD]",
      "cls_token": "[CLS]",
      "mask_token": "[MASK]"
    }
  },
  "cardData": {
    "tags": [
      "visual-question-answering"
    ],
    "license": "apache-2.0",
    "widget": [
      {
        "text": "What's the animal doing?",
        "src": "https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg"
      },
      {
        "text": "What is on top of the building?",
        "src": "https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg"
      }
    ]
  },
  "transformersInfo": {
    "auto_model": "ViltForVisualQuestionAnswering",
    "processor": "AutoProcessor"
  },
  "siblings": [
    {
      "rfilename": ".gitattributes"
    },
    {
      "rfilename": "README.md"
    },
    {
      "rfilename": "config.json"
    },
    {
      "rfilename": "preprocessor_config.json"
    },
    {
      "rfilename": "pytorch_model.bin"
    },
    {
      "rfilename": "special_tokens_map.json"
    },
    {
      "rfilename": "tokenizer.json"
    },
    {
      "rfilename": "tokenizer_config.json"
    },
    {
      "rfilename": "vocab.txt"
    }
  ],
  "spaces": [
    "microsoft/HuggingGPT",
    "Yntec/ToyWorld",
    "Yntec/HuggingfaceDiffusion",
    "Yntec/PrintingPress",
    "nielsr/comparing-VQA-models",
    "Nymbo/Compare-6",
    "llamameta/flux-pro-uncensored",
    "Yntec/ToyWorldXL",
    "nielsr/vilt-vqa",
    "llamameta/fluxproV2",
    "Yntec/blitz_diffusion",
    "phenixrhyder/NSFW-ToyWorld",
    "ethanchern/Anole",
    "John6666/Diffusion80XX4sg",
    "taesiri/HuggingGPT-Lite",
    "DemiPoto/TestDifs",
    "John6666/PrintingPress4",
    "DemiPoto/testSortModels",
    "Yntec/october-2024-image-models",
    "Yntec/MiniPrintingPress",
    "Yntec/StableDiffusion35Large-Image-Models-Test-November-2024",
    "Madhuri/vqa_audiobot",
    "ixxan/cross-lingual-vqa",
    "Nymbo/Diffusion80XX4sg",
    "John6666/hfd_test_nostopbutton",
    "huggingfacejs/doc-vis-qa",
    "John6666/ToyWorld4",
    "John6666/Diffusion80XX4g",
    "K00B404/HuggingfaceDiffusion_custom",
    "llamameta/fast-sd3.5-large",
    "Babyloncoder/chat-with-an-image",
    "John6666/Diffusion80XX4",
    "John6666/blitz_diffusion4",
    "OpenXAIProject/pnpxai-image-classification",
    "hayas-tohoku-workshop-2023/comparing-VQA-models",
    "dreamdrop-art/000555111",
    "SeyedAli/Persian-Visual-Question-Answering-1",
    "sathvikparasa20/vqa-vitgpt",
    "Blane187/multi-diffusion",
    "Yntec/MiniToyWorld",
    "Yntec/MiniHuggingfaceDiffusion",
    "ccarr0807/HuggingGPT",
    "theholycityweb/HuggingGPT",
    "gtx4010661/dandelin-vilt-b32-finetuned-vqa",
    "Alfasign/HuggingGPT-Lite",
    "roontoon/Demo-TTI-dandelin-vilt-b32-finetuned-vqa",
    "nilaymodi/dandelin-vilt-b32-finetuned-vqa",
    "saurshaz/HuggingGPT",
    "optowo/dandelin-vilt-b32-finetuned-vqa",
    "Nikhil0987/omm",
    "wendys-llc/comparing-VQA-models",
    "SilvusTV/Test",
    "roshithindia/imageQuestionAnswering",
    "mahitha11/vqa-vitgpt",
    "John6666/blitz_diffusion_builtin",
    "TotoB12/dandelin-vilt-b32-finetuned-vqa",
    "kaleidoskop-hug/PrintingPress",
    "chrisW6825/HuggingGPT",
    "Shenziqian/HuggingGPT",
    "hema1/Text-Visual-QA",
    "lokutus/HuggingGPT",
    "mimiqiao/HuggingGPT",
    "tsgbalakarthik/HuggingGPT",
    "wowochkin/HuggingGPT",
    "Msp/HuggingGPT",
    "apgarmd/jarvis",
    "apgarmd/jarvis2",
    "ryan12439/HuggingGPTpub",
    "turbowed/HuggingGPT",
    "Chokyounghoon/HuggingGPT",
    "mukulnag/HuggingGPT1",
    "FANCHIYU/HuggingGPT",
    "Betacuckgpt/HuggingGPT",
    "cashqin/HuggingGPT",
    "kanishka207004/project7",
    "lollo21/Will-GPT",
    "sakshamm/project7",
    "felixfriday/MICROSOFTT_JARVIS_HuggingGPT",
    "Meffordh/HuggingGPT",
    "Pfs2021Funny/HuggingGPT",
    "lugifudun/HuggingGPT",
    "irritablebro/HuggingGPT",
    "leadmaister/HuggingGPT",
    "pors/HuggingGPT",
    "keaneu/HuggingGPT",
    "MagKoz/HuggingGPT",
    "lzqfree/HuggingGPT",
    "zhangdream/HuggingGPT",
    "bountyfuljr/HuggingGPTplaypublic",
    "viscosity/HuggingGPT",
    "calliber/HuggingGPT",
    "Pitak/HuggingGPT",
    "Mcdof/HuggingGPT",
    "gaocegege/HuggingGPT",
    "BMukhtar/BMA",
    "mearjunsha/HuggingGPT",
    "vs4vijay/HuggingGPT",
    "mastere00/JarvisMeetsProfessor",
    "Anniek/VQA",
    "CollaalloC/HuggingGPT"
  ],
  "createdAt": "2022-03-02T23:29:05.000Z"
}