Microsoft.Baidu.Ali.語音識別/人臉識別


 

在第一篇博客里提過圖片識別的底層.最精准的圖片識別需要海量的數據磨煉.自己寫的底層沒有以億為單位的數據支持其實也是個殘廢品.

         此篇不是為了教學.而且在需要的時候抄下來就能用

                    在此介紹Microsoft.Baidu.Ali的幾個人工智能接口吧.

                        沒啥技術含量.都是HTTP.POST請求一類的.

                                  個人可以申請30天免費試用.

                                        以下是微軟的人工智能接口(還嘗試了百度.阿里的人工智能.微軟識別的是最精准的)

public class FaceHelper
{
private const string uriBase = "https://westcentralus.api.cognitive.microsoft.com/face/v1.0/detect";
private static string subscriptionKey = string.Empty;
public FaceHelper(string Key,string imageFilePath)
{
if (!String.IsNullOrWhiteSpace(Key))
{
subscriptionKey = Key;
MakeAnalysisRequest(imageFilePath);
}
}

static async void MakeAnalysisRequest(string imageFilePath)
{
HttpClient client = new HttpClient();

client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", subscriptionKey);

string requestParameters = "returnFaceId=true&returnFaceLandmarks=false&returnFaceAttributes=age,gender,headPose,smile,facialHair,glasses,emotion,hair,makeup,occlusion,accessories,blur,exposure,noise";

string uri = uriBase + "?" + requestParameters;

HttpResponseMessage response;

byte[] byteData = GetImageAsByteArray(imageFilePath);

using (ByteArrayContent content = new ByteArrayContent(byteData))
{
content.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream");

response = await client.PostAsync(uri, content);

string contentString = await response.Content.ReadAsStringAsync();

Console.WriteLine("\nResponse:\n");
Console.WriteLine(JsonPrettyPrint(contentString));
}
}

static byte[] GetImageAsByteArray(string imageFilePath)
{
FileStream fileStream = new FileStream(imageFilePath, FileMode.Open, FileAccess.Read);
BinaryReader binaryReader = new BinaryReader(fileStream);
return binaryReader.ReadBytes((int)fileStream.Length);
}

static string JsonPrettyPrint(string json)
{
if (string.IsNullOrEmpty(json))
return string.Empty;

json = json.Replace(Environment.NewLine, "").Replace("\t", "");

StringBuilder sb = new StringBuilder();
bool quote = false;
bool ignore = false;
int offset = 0;
int indentLength = 3;

foreach (char ch in json)
{
switch (ch)
{
case '"':
if (!ignore) quote = !quote;
break;
case '\'':
if (quote) ignore = !ignore;
break;
}

if (quote)
sb.Append(ch);
else
{
switch (ch)
{
case '{':
case '[':
sb.Append(ch);
sb.Append(Environment.NewLine);
sb.Append(new string(' ', ++offset * indentLength));
break;
case '}':
case ']':
sb.Append(Environment.NewLine);
sb.Append(new string(' ', --offset * indentLength));
sb.Append(ch);
break;
case ',':
sb.Append(ch);
sb.Append(Environment.NewLine);
sb.Append(new string(' ', offset * indentLength));
break;
case ':':
sb.Append(ch);
sb.Append(' ');
break;
default:
if (ch != ' ') sb.Append(ch);
break;
}
}
}

return sb.ToString().Trim();
}

}

 

臉識別 API.檢測、識別、分析、組織和標記照片中的人臉

FaceHelper face = new FaceHelper("你的密鑰",ConfigurationManager.AppSettings["Face"] );

 

 

 

返回值很多很詳細.人臉在圖片的那個區域。性別.有沒有頭發。有沒有胡子。有沒有眼鏡都寫的很清楚.在此不一一列舉

以下是聲音識別.分REST 和SOCKET 語音識別也分中英美法.傳遞的音頻也要分長短.以下配置為英文識別.REST.15秒以下音頻

public class VoiceHelper
{
/// <summary>
/// 識別模式
///有認可的三種模式:interactive,conversation,和dictation。識別模式根據用戶如何說話來調整語音識別。為您的應用程序選擇適當的識別模式。
/// </summary>
public VoiceHelper(string file,string key)
{
string url = "https://speech.platform.bing.com/speech/recognition/dictation/cognitiveservices/v1?language=en-US&format=simple";

string responseString = string.Empty;
HttpWebRequest request = null;
request = (HttpWebRequest)HttpWebRequest.Create(url);
request.SendChunked = true;
request.Accept = @"application/json;text/xml";
request.Method = "POST";
request.ProtocolVersion = HttpVersion.Version11;
request.ContentType = @"audio/wav; codec=audio/pcm; samplerate=16000";
request.Headers["Ocp-Apim-Subscription-Key"] = key;

using (FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read))
{

byte[] buffer = null;
int bytesRead = 0;
using (Stream requestStream = request.GetRequestStream())
{

buffer = new Byte[checked((uint)Math.Min(1024, (int)fs.Length))];
while ((bytesRead = fs.Read(buffer, 0, buffer.Length)) != 0)
{
requestStream.Write(buffer, 0, bytesRead);
}

requestStream.Flush();
}
}

using (WebResponse response = request.GetResponse())
{
Console.WriteLine(((HttpWebResponse)response).StatusCode);

using (StreamReader sr = new StreamReader(response.GetResponseStream()))
{
responseString = sr.ReadToEnd();
}

Console.WriteLine(responseString);
}


}
}

 

 

  

 

 

VoiceHelper voice = new VoiceHelper(@ConfigurationManager.AppSettings["Voice"], "你的密鑰");

 

 

 

 

 這個語音識別還是可以的.Displaytext就是我在音頻中說的話.重復了三遍 TEST.聲音很沙啞也很低沉.識別率很贊.

不過要注意只支持15秒帶有PCM單聲道(單聲道),16 KHz的WAV文件

以下是圖片識別.這個就可好玩了.我放了一個大飛機.返回的數據中.飛機藍天都識別了

 

public class OCRHelper
{
const string subscriptionKey = "你的密鑰";

const string uriBase = "https://westcentralus.api.cognitive.microsoft.com/vision/v1.0/analyze";

public OCRHelper(string file)
{
// Get the path and filename to process from the user.
Console.WriteLine("Analyze an image:");
Console.Write("Enter the path to an image you wish to analzye: ");
// Execute the REST API call.
MakeAnalysisRequest(file);

Console.WriteLine("\nPlease wait a moment for the results to appear. Then, press Enter to exit...\n");

}
/// <summary>
/// Gets the analysis of the specified image file by using the Computer Vision REST API.
/// </summary>
/// <param name="imageFilePath">The image file.</param>
static async void MakeAnalysisRequest(string imageFilePath)
{
HttpClient client = new HttpClient();

// Request headers.
client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", subscriptionKey);

// Request parameters. A third optional parameter is "details".
string requestParameters = "visualFeatures=Categories,Description,Color&language=en";

// Assemble the URI for the REST API Call.
string uri = uriBase + "?" + requestParameters;

HttpResponseMessage response;

// Request body. Posts a locally stored JPEG image.
byte[] byteData = GetImageAsByteArray(imageFilePath);

using (ByteArrayContent content = new ByteArrayContent(byteData))
{
// This example uses content type "application/octet-stream".
// The other content types you can use are "application/json" and "multipart/form-data".
content.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream");

// Execute the REST API call.
response = await client.PostAsync(uri, content);

// Get the JSON response.
string contentString = await response.Content.ReadAsStringAsync();

// Display the JSON response.
Console.WriteLine("\nResponse:\n");
Console.WriteLine(JsonPrettyPrint(contentString));
//description.captions.text 對圖片的英文描述
}
}


/// <summary>
/// Returns the contents of the specified file as a byte array.
/// </summary>
/// <param name="imageFilePath">The image file to read.</param>
/// <returns>The byte array of the image data.</returns>
static byte[] GetImageAsByteArray(string imageFilePath)
{
FileStream fileStream = new FileStream(imageFilePath, FileMode.Open, FileAccess.Read);
BinaryReader binaryReader = new BinaryReader(fileStream);
return binaryReader.ReadBytes((int)fileStream.Length);
}


/// <summary>
/// Formats the given JSON string by adding line breaks and indents.
/// </summary>
/// <param name="json">The raw JSON string to format.</param>
/// <returns>The formatted JSON string.</returns>
static string JsonPrettyPrint(string json)
{
if (string.IsNullOrEmpty(json))
return string.Empty;

json = json.Replace(Environment.NewLine, "").Replace("\t", "");

StringBuilder sb = new StringBuilder();
bool quote = false;
bool ignore = false;
int offset = 0;
int indentLength = 3;

foreach (char ch in json)
{
switch (ch)
{
case '"':
if (!ignore) quote = !quote;
break;
case '\'':
if (quote) ignore = !ignore;
break;
}

if (quote)
sb.Append(ch);
else
{
switch (ch)
{
case '{':
case '[':
sb.Append(ch);
sb.Append(Environment.NewLine);
sb.Append(new string(' ', ++offset * indentLength));
break;
case '}':
case ']':
sb.Append(Environment.NewLine);
sb.Append(new string(' ', --offset * indentLength));
sb.Append(ch);
break;
case ',':
sb.Append(ch);
sb.Append(Environment.NewLine);
sb.Append(new string(' ', offset * indentLength));
break;
case ':':
sb.Append(ch);
sb.Append(' ');
break;
default:
if (ch != ' ') sb.Append(ch);
break;
}
}
}

return sb.ToString().Trim();
}
}

 

 

 

 OCRHelper ocr = new OCRHelper(@"C:\Users\Administrator\Desktop\test2.png");

 下圖是輸入參數

 

 

 下面是輸出參數

 

 以下是阿里的人工智能接口

 

   /// <summary>
        /// 人臉屬性
        /// </summary>
        /// <param name="file"></param>
        private static void Face(string file)
        {
         String host = "http://rlsxsb.market.alicloudapi.com";
         String path = "/face/attribute";
         String method = "POST";
         String appcode = "b009c20b62664344a794fe0a4535b2ab";

            String querys = "";
            string base64 = ImageHelper.ImgToBase64String(file);
            //String bodys = "{\"type\":0,#0:通過url識別,參數image_url不為空;1:通過圖片content識別,參數content不為空\"image_url\":\"http://a.com/a.jgp\",#輸入圖像URL\"content\":\"\"#圖像內容,base64編碼}";
            String bodys = "{\"type\":1,\"image_url\":\" \",\"content\":\"" + base64 + " \"}";
            String url = host + path;
            HttpWebRequest httpRequest = null;
            HttpWebResponse httpResponse = null;

            if (0 < querys.Length)
            {
                url = url + "?" + querys;
            }

            if (host.Contains("https://"))
            {
                ServicePointManager.ServerCertificateValidationCallback = new RemoteCertificateValidationCallback(CheckValidationResult);
                httpRequest = (HttpWebRequest)WebRequest.CreateDefault(new Uri(url));
            }
            else
            {
                httpRequest = (HttpWebRequest)WebRequest.Create(url);
            }
            httpRequest.Method = method;
            httpRequest.Headers.Add("Authorization", "APPCODE " + appcode);
            //根據API的要求,定義相對應的Content-Type
            httpRequest.ContentType = "application/json; charset=UTF-8";
            if (0 < bodys.Length)
            {
                byte[] data = Encoding.UTF8.GetBytes(bodys);
                using (Stream stream = httpRequest.GetRequestStream())
                {
                    stream.Write(data, 0, data.Length);
                }
            }
            try
            {
                httpResponse = (HttpWebResponse)httpRequest.GetResponse();
            }
            catch (WebException ex)
            {
                httpResponse = (HttpWebResponse)ex.Response;
            }

            Console.WriteLine(httpResponse.StatusCode);
            Console.WriteLine(httpResponse.Method);
            Console.WriteLine(httpResponse.Headers);
            Stream st = httpResponse.GetResponseStream();
            StreamReader reader = new StreamReader(st, Encoding.GetEncoding("utf-8"));
            string result = reader.ReadToEnd();
            Console.WriteLine(result);
            Console.WriteLine("\n");
        }

 

 

在放一個圖片轉Base64幫助

 

using System;
using System.Drawing;
using System.Drawing.Drawing2D;
using System.Drawing.Imaging;
using System.IO;

namespace Microsofot.Core
{
    /// <summary>
    /// 圖片幫助
    /// </summary>
    public static class ImageHelper
    {
        /// <summary>
        /// 圖片轉base64
        /// </summary>
        /// <param name="filename"></param>
        public static string ImgToBase64String(string filename)
        {
            if (!File.Exists(@"" + filename) || String.IsNullOrWhiteSpace(filename))
            {
                return null;
            }
                Bitmap bmp = new Bitmap(filename);

                MemoryStream ms = new MemoryStream();
                bmp.Save(ms, ImageFormat.Jpeg);
                byte[] arr = new byte[ms.Length];
                ms.Position = 0;
                ms.Read(arr, 0, (int)ms.Length);
                ms.Close();
                String strbaser64 = Convert.ToBase64String(arr);
            return strbaser64;
        }
        /// <summary>
        /// base64轉圖片
        /// </summary>
        /// <param name="base64Code"></param>
        public static Bitmap Base64StringToImage(string base64Code)
        {
                byte[] arr = Convert.FromBase64String(base64Code);
                MemoryStream ms = new MemoryStream(arr);
                Bitmap bmp = new Bitmap(ms);
                ms.Close();
            return bmp;
        }
        /// <summary>
        /// 生成縮略圖
        /// </summary>
        /// <param name="img">原始圖片</param>
        /// <param name="thumbImagePath">縮略圖地址</param>
        /// <param name="width">圖片寬度</param>
        /// <param name="height">圖片高度</param>
        /// <param name="p"></param>
        public static void GenerateThumbImage(System.Drawing.Image img, string thumbImagePath, int width, int height)
        {
            System.Drawing.Image serverImage = img;
            //畫板大小
            int towidth = width;
            int toheight = height;
            //縮略圖矩形框的像素點
            int ow = serverImage.Width;
            int oh = serverImage.Height;

            if (ow > oh)
            {
                toheight = serverImage.Height * width / serverImage.Width;
            }
            else
            {
                towidth = serverImage.Width * height / serverImage.Height;
            }
            //新建一個bmp圖片
            System.Drawing.Image bm = new Bitmap(width, height);
            //新建一個畫板
            Graphics g = Graphics.FromImage(bm);
            //設置高質量插值法
            g.InterpolationMode = InterpolationMode.High;
            //設置高質量,低速度呈現平滑程度
            g.SmoothingMode = SmoothingMode.HighQuality;
            //清空畫布並以透明背景色填充
            g.Clear(Color.White);
            //在指定位置並且按指定大小繪制原圖片的指定部分
            g.DrawImage(serverImage, new Rectangle((width - towidth) / 2, (height - toheight) / 2, towidth, toheight),
                0, 0, ow, oh,
                GraphicsUnit.Pixel);

            //以jpg格式保存縮略圖
            bm.Save(thumbImagePath, ImageFormat.Jpeg);
            serverImage.Dispose();
            bm.Dispose();
            g.Dispose();
        }
    }
}

  

 

 

 

 

   在此就不放阿里的語音識別接口了.

               因為

                     (至2018/1/10免費版本僅支持小量的調用,首先請手動將音頻文件放到阿里服務器上,然后將生成的URL拉下來作為發起識別的參數.並不能支持並發)

                               https://help.aliyun.com/document_detail/32378.html?spm=5176.product30413.3.4.IegjQU

 

               以下是百度的人工智能接口

                      在使用百度的接口時.需要先獲取Token

    /// <summary>
    /// 獲取百度人工智能token
    /// </summary>
    public static class AccessToken

    {
        // 調用getAccessToken()獲取的 access_token建議根據expires_in 時間 設置緩存
        // 返回token示例
        //public static String TOKEN = "24.adda70c11b9786206253ddb70affdc46.2592000.1493524354.282335-1234567";

        // 百度雲中開通對應服務應用的 API Key 建議開通應用的時候多選服務
        private static String clientId = "bQWhFMDTvIZpHXr8ZYwT0r9d";
        // 百度雲中開通對應服務應用的 Secret Key
        private static String clientSecret = "EQGW33PLeYnWozRzafAcpiMBdxH8fLs2";

        public static String getAccessToken()
        {
            String authHost = "https://aip.baidubce.com/oauth/2.0/token";
            HttpClient client = new HttpClient();
            List<KeyValuePair<String, String>> paraList = new List<KeyValuePair<string, string>>();
            paraList.Add(new KeyValuePair<string, string>("grant_type", "client_credentials"));
            paraList.Add(new KeyValuePair<string, string>("client_id", clientId));
            paraList.Add(new KeyValuePair<string, string>("client_secret", clientSecret));

            HttpResponseMessage response = client.PostAsync(authHost, new FormUrlEncodedContent(paraList)).Result;
            String result = response.Content.ReadAsStringAsync().Result;
            Console.WriteLine(result);
            return result;
        }
    }

 

  然后調用人臉識別

 /// <summary>
    /// 人臉識別
    /// </summary>
    public class FaceDetect
    {
        // 人臉探測
        public static string detect(string imageFile,string token)
        {
            //string token = "[調用鑒權接口獲取的token]";
            string host = "https://aip.baidubce.com/rest/2.0/face/v1/detect?access_token=" + token;
            Encoding encoding = Encoding.Default;
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(host);
            request.Method = "post";
            request.KeepAlive = true;
            // 圖片的base64編碼
            string base64 = ImageHelper.ImgToBase64String(imageFile);
            String str = "max_face_num=" + 5 + "&face_fields=" + "age,beauty,expression,faceshape,gender,glasses,landmark,race,qualities" + "&image=" + HttpUtility.UrlEncode(base64);
            byte[] buffer = encoding.GetBytes(str);
            request.ContentLength = buffer.Length;
            request.GetRequestStream().Write(buffer, 0, buffer.Length);
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.Default);
            string result = reader.ReadToEnd();
            Console.WriteLine("人臉探測:");
            Console.WriteLine(result);
            return result;
        }
    }

 

 

 

百度的語音識別就很簡單了.在NUGET上搜索baidu.ai安裝就行了

 

 

 private readonly Asr _asrClient;
        private readonly Tts _ttsClient;

        public SpeechDemo()
        {
            _asrClient = new Asr("你的KEY", "你的密鑰");
            _ttsClient = new Tts("你的KEY", "EQGW33PLeYnWozRzafAcpiMBdxH8fLs2");
        }

        // 識別本地文件
        public void AsrData(string file)
        {
            
            var data = File.ReadAllBytes(file);
            var result = _asrClient.Recognize(data, "wav", 16000);
            Console.Write(result);
        }

 

 

(至2018/01/09百度語音服務保持免費.但是在調試過程中同一段音頻.時而能識別.時而不能.並且存在漏詞現象.)

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM