@article{ajeee20221012,
author={{Ye, Kexin and Zhang, Lei and Li, Tianran},
title={Design of a Picture-seeing and Talking System Based on Attention Mechanism},
journal={American Journal of Electrical and Electronic Engineering},
volume={10},
number={1},
pages={6--23},
year={2022},
url={http://pubs.sciepub.com/ajeee/10/1/2},
issn={2328-7357},
abstract={In order to solve this problem, this paper proposes an image title generation model based on deep loop architecture. This model combines some new achievements in computer vision and machine translation, and can generate natural sentences that accurately describe the image for an uncomplicated physical image. The model is trained to maximize the accuracy of the target description sentence in a given training image. The training data set was mainly completed by the MSCOCO data set, and in the later adjustment stage, some feature pictures I specifically looked for on the Internet were included for improvement. Test experiments on several data sets verify that the model has the ability of basic accurate image description. This model is usually more accurate in the case of uncomplicated physical pictures, which I have verified both qualitatively and quantitatively. The final result can input a qualified image and output a natural language sentence to describe the main content of the image.},
doi={10.12691/ajeee-10-1-2}
publisher={Science and Education Publishing}
}
