C# Image Caption

介绍

效果

模型

decoder_fc_nsc.onnx

encoder.onnx

项目

代码

下载

C# Image Caption

介绍

地址：https://github.com/ruotianluo/ImageCaptioning.pytorch

I decide to sync up this repo and self-critical.pytorch. (The old master is in old master branch for archive)

效果

模型

decoder_fc_nsc.onnx

Inputs
-------------------------
name：fc_feats
tensor：Float[1, 2048]
---------------------------------------------------------------

Outputs
-------------------------
name：seq
tensor：Int64[1, 20]
name：logprobs
tensor：Float[1, 20, 9488]
---------------------------------------------------------------

encoder.onnx

Inputs
-------------------------
name：img
tensor：Float[1, 3, 640, 640]
---------------------------------------------------------------

Outputs
-------------------------
name：fc
tensor：Float[2048]
---------------------------------------------------------------

项目

代码

using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using OpenCvSharp;
using OpenCvSharp.Dnn;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Linq;
using System.Windows.Forms;

namespace ImageCaption
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}

string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
string image_path = "";
string startupPath;
string classer_path;
DateTime dt1 = DateTime.Now;
DateTime dt2 = DateTime.Now;
string model_path;
Mat image;
Mat result_image;

SessionOptions options;
InferenceSession onnx_session;
Tensor<float> input_tensor;
List<NamedOnnxValue> input_container;
IDisposableReadOnlyCollection<DisposableNamedOnnxValue> result_infer;
DisposableNamedOnnxValue[] results_onnxvalue;

Tensor<Int64> result_tensors;

Net net;

int feat_len;
int D;
int inpWidth = 640;
int inpHeight = 640;
float[] mean = new float[] { 0.485f, 0.456f, 0.406f };
float[] std = new float[] { 0.229f, 0.224f, 0.225f };

Dictionary<string, string> ix_to_word = new Dictionary<string, string>();

private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = fileFilter;
if (ofd.ShowDialog() != DialogResult.OK) return;
pictureBox1.Image = null;
image_path = ofd.FileName;
pictureBox1.Image = new Bitmap(image_path);
textBox1.Text = "";
image = new Mat(image_path);
pictureBox2.Image = null;
}

private unsafe void button2_Click(object sender, EventArgs e)
{
if (image_path == "")
{
return;
}

button2.Enabled = false;
pictureBox2.Image = null;
textBox1.Text = "";
pictureBox2.Image = null;
Application.DoEvents();

//图片缩放
image = new Mat(image_path);

Mat temp_image = new Mat();
Cv2.Resize(image, temp_image, new OpenCvSharp.Size(inpWidth, inpHeight));
Normalize(temp_image);

Mat blob = CvDnn.BlobFromImage(temp_image);

//配置图片输入数据
net.SetInput(blob);

Mat result_mat = net.Forward();

float* ptr_feat = (float*)result_mat.Data;

for (int i = 0; i < 2048; i++)
{
input_tensor[0, i] = ptr_feat[i];
}

//将 input_tensor 放入一个输入参数的容器，并指定名称
input_container.Add(NamedOnnxValue.CreateFromTensor("fc_feats", input_tensor));

//运行 Inference 并获取结果
result_infer = onnx_session.Run(input_container);

// 将输出结果转为DisposableNamedOnnxValue数组
results_onnxvalue = result_infer.ToArray();

// 读取第一个节点输出并转为Tensor数据
result_tensors = results_onnxvalue[0].AsTensor<Int64>();

Int64[] result_array = result_tensors.ToArray();

string words = "";
for (int k = 0; k < D; k++)
{
if (result_array[k] > 0)
{
if (words.Length > 0)
{
words += " ";
}
words += ix_to_word[result_array[k].ToString()];
}
else
{
break;
}
}

result_image = image.Clone();

Cv2.PutText(result_image, words
, new OpenCvSharp.Point(10, 60)
, HersheyFonts.HersheySimplex
, 1
, new Scalar(0, 0, 255)
, 2
);

pictureBox2.Image = new Bitmap(result_image.ToMemoryStream());

textBox1.Text = words;

button2.Enabled = true;
}

public void Normalize(Mat src)
{
src.ConvertTo(src, MatType.CV_32FC3, 1.0 / 255);

Mat[] bgr = src.Split();
for (int i = 0; i < bgr.Length; ++i)
{
bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1 / std[i], (0.0 - mean[i]) / std[i]);
}

Cv2.Merge(bgr, src);

foreach (Mat channel in bgr)
{
channel.Dispose();
}
}

private void Form1_Load(object sender, EventArgs e)
{
startupPath = System.Windows.Forms.Application.StartupPath;

model_path = "model/decoder_fc_nsc.onnx";

// 创建输出会话，用于输出模型读取信息
options = new SessionOptions();
options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
options.AppendExecutionProvider_CPU(0);// 设置为CPU上运行

// 创建推理模型类，读取本地模型文件
onnx_session = new InferenceSession(model_path, options);//model_path 为onnx模型文件的路径

// 输入Tensor
input_tensor = new DenseTensor<float>(new[] { 1, 2048 });
// 创建输入容器
input_container = new List<NamedOnnxValue>();

feat_len = 2048;
D = 20;

//初始化网络类，读取本地模型
net = CvDnn.ReadNetFromOnnx("model/encoder.onnx");

StreamReader sr = new StreamReader("model/vocab.txt");
string line;
while ((line = sr.ReadLine()) != null)
{
ix_to_word.Add(line.Split(':')[0], line.Split(':')[1]);
}

image_path = "test_img/1.jpg";
pictureBox1.Image = new Bitmap(image_path);
image = new Mat(image_path);
}

private void pictureBox1_DoubleClick(object sender, EventArgs e)
{
Common.ShowNormalImg(pictureBox1.Image);
}

private void pictureBox2_DoubleClick(object sender, EventArgs e)
{
Common.ShowNormalImg(pictureBox2.Image);
}

SaveFileDialog sdf = new SaveFileDialog();
private void button3_Click(object sender, EventArgs e)
{
if (pictureBox2.Image == null)
{
return;
}
Bitmap output = new Bitmap(pictureBox2.Image);
sdf.Title = "保存";
sdf.Filter = "Images (*.jpg)|*.jpg|Images (*.png)|*.png|Images (*.bmp)|*.bmp|Images (*.emf)|*.emf|Images (*.exif)|*.exif|Images (*.gif)|*.gif|Images (*.ico)|*.ico|Images (*.tiff)|*.tiff|Images (*.wmf)|*.wmf";
if (sdf.ShowDialog() == DialogResult.OK)
{
switch (sdf.FilterIndex)
{
case 1:
{
output.Save(sdf.FileName, ImageFormat.Jpeg);
break;
}
case 2:
{
output.Save(sdf.FileName, ImageFormat.Png);
break;
}
case 3:
{
output.Save(sdf.FileName, ImageFormat.Bmp);
break;
}
case 4:
{
output.Save(sdf.FileName, ImageFormat.Emf);
break;
}
case 5:
{
output.Save(sdf.FileName, ImageFormat.Exif);
break;
}
case 6:
{
output.Save(sdf.FileName, ImageFormat.Gif);
break;
}
case 7:
{
output.Save(sdf.FileName, ImageFormat.Icon);
break;
}

case 8:
{
output.Save(sdf.FileName, ImageFormat.Tiff);
break;
}
case 9:
{
output.Save(sdf.FileName, ImageFormat.Wmf);
break;
}
}
MessageBox.Show("保存成功，位置：" + sdf.FileName);
}
}
}
}

using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using OpenCvSharp;
using OpenCvSharp.Dnn;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Linq;
using System.Windows.Forms;namespace ImageCaption
{public partial class Form1 : Form{public Form1(){InitializeComponent();}string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";string image_path = "";string startupPath;string classer_path;DateTime dt1 = DateTime.Now;DateTime dt2 = DateTime.Now;string model_path;Mat image;Mat result_image;SessionOptions options;InferenceSession onnx_session;Tensor<float> input_tensor;List<NamedOnnxValue> input_container;IDisposableReadOnlyCollection<DisposableNamedOnnxValue> result_infer;DisposableNamedOnnxValue[] results_onnxvalue;Tensor<Int64> result_tensors;Net net;int feat_len;int D;int inpWidth = 640;int inpHeight = 640;float[] mean = new float[] { 0.485f, 0.456f, 0.406f };float[] std = new float[] { 0.229f, 0.224f, 0.225f };Dictionary<string, string> ix_to_word = new Dictionary<string, string>();private void button1_Click(object sender, EventArgs e){OpenFileDialog ofd = new OpenFileDialog();ofd.Filter = fileFilter;if (ofd.ShowDialog() != DialogResult.OK) return;pictureBox1.Image = null;image_path = ofd.FileName;pictureBox1.Image = new Bitmap(image_path);textBox1.Text = "";image = new Mat(image_path);pictureBox2.Image = null;}private unsafe void button2_Click(object sender, EventArgs e){if (image_path == ""){return;}button2.Enabled = false;pictureBox2.Image = null;textBox1.Text = "";pictureBox2.Image = null;Application.DoEvents();//图片缩放image = new Mat(image_path);Mat temp_image = new Mat();Cv2.Resize(image, temp_image, new OpenCvSharp.Size(inpWidth, inpHeight));Normalize(temp_image);Mat blob = CvDnn.BlobFromImage(temp_image);//配置图片输入数据net.SetInput(blob);Mat result_mat = net.Forward();float* ptr_feat = (float*)result_mat.Data;for (int i = 0; i < 2048; i++){input_tensor[0, i] = ptr_feat[i];}//将 input_tensor 放入一个输入参数的容器，并指定名称input_container.Add(NamedOnnxValue.CreateFromTensor("fc_feats", input_tensor));//运行 Inference 并获取结果result_infer = onnx_session.Run(input_container);// 将输出结果转为DisposableNamedOnnxValue数组results_onnxvalue = result_infer.ToArray();// 读取第一个节点输出并转为Tensor数据result_tensors = results_onnxvalue[0].AsTensor<Int64>();Int64[] result_array = result_tensors.ToArray();string words = "";for (int k = 0; k < D; k++){if (result_array[k] > 0){if (words.Length > 0){words += " ";}words += ix_to_word[result_array[k].ToString()];}else{break;}}result_image = image.Clone();Cv2.PutText(result_image, words, new OpenCvSharp.Point(10, 60), HersheyFonts.HersheySimplex, 1, new Scalar(0, 0, 255), 2);pictureBox2.Image = new Bitmap(result_image.ToMemoryStream());textBox1.Text = words;button2.Enabled = true;}public void Normalize(Mat src){src.ConvertTo(src, MatType.CV_32FC3, 1.0 / 255);Mat[] bgr = src.Split();for (int i = 0; i < bgr.Length; ++i){bgr[i].ConvertTo(bgr[i], MatType.CV_32FC1, 1 / std[i], (0.0 - mean[i]) / std[i]);}Cv2.Merge(bgr, src);foreach (Mat channel in bgr){channel.Dispose();}}private void Form1_Load(object sender, EventArgs e){startupPath = System.Windows.Forms.Application.StartupPath;model_path = "model/decoder_fc_nsc.onnx";// 创建输出会话，用于输出模型读取信息options = new SessionOptions();options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;options.AppendExecutionProvider_CPU(0);// 设置为CPU上运行// 创建推理模型类，读取本地模型文件onnx_session = new InferenceSession(model_path, options);//model_path 为onnx模型文件的路径// 输入Tensorinput_tensor = new DenseTensor<float>(new[] { 1, 2048 });// 创建输入容器input_container = new List<NamedOnnxValue>();feat_len = 2048;D = 20;//初始化网络类，读取本地模型net = CvDnn.ReadNetFromOnnx("model/encoder.onnx");StreamReader sr = new StreamReader("model/vocab.txt");string line;while ((line = sr.ReadLine()) != null){ix_to_word.Add(line.Split(':')[0], line.Split(':')[1]);}image_path = "test_img/1.jpg";pictureBox1.Image = new Bitmap(image_path);image = new Mat(image_path);}private void pictureBox1_DoubleClick(object sender, EventArgs e){Common.ShowNormalImg(pictureBox1.Image);}private void pictureBox2_DoubleClick(object sender, EventArgs e){Common.ShowNormalImg(pictureBox2.Image);}SaveFileDialog sdf = new SaveFileDialog();private void button3_Click(object sender, EventArgs e){if (pictureBox2.Image == null){return;}Bitmap output = new Bitmap(pictureBox2.Image);sdf.Title = "保存";sdf.Filter = "Images (*.jpg)|*.jpg|Images (*.png)|*.png|Images (*.bmp)|*.bmp|Images (*.emf)|*.emf|Images (*.exif)|*.exif|Images (*.gif)|*.gif|Images (*.ico)|*.ico|Images (*.tiff)|*.tiff|Images (*.wmf)|*.wmf";if (sdf.ShowDialog() == DialogResult.OK){switch (sdf.FilterIndex){case 1:{output.Save(sdf.FileName, ImageFormat.Jpeg);break;}case 2:{output.Save(sdf.FileName, ImageFormat.Png);break;}case 3:{output.Save(sdf.FileName, ImageFormat.Bmp);break;}case 4:{output.Save(sdf.FileName, ImageFormat.Emf);break;}case 5:{output.Save(sdf.FileName, ImageFormat.Exif);break;}case 6:{output.Save(sdf.FileName, ImageFormat.Gif);break;}case 7:{output.Save(sdf.FileName, ImageFormat.Icon);break;}case 8:{output.Save(sdf.FileName, ImageFormat.Tiff);break;}case 9:{output.Save(sdf.FileName, ImageFormat.Wmf);break;}}MessageBox.Show("保存成功，位置：" + sdf.FileName);}}}
}