UGC内容审核设计步骤
2021/6/19 23:26:46
本文主要是介绍UGC内容审核设计步骤,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
1. 检测标题与内容是否相关
当相关度返回值 >0 的时候才算做通过
import java.text.NumberFormat; import java.util.Locale; public class Compute { public static void main(String[] args) { String content = "最近公司由于业务拓展,需要进行小程序相关的开发,本着朝全栈开发者努力,决定学习下Vue,去年csdn送了一本《Vue.js权威指南》"; String title = "VueVueVue"; double ss = SimilarDegree(content, title); System.out.println(ss); } /* * 计算相似度 * */ public static double SimilarDegree(String strA, String strB) { String newStrA = removeSign(strA); String newStrB = removeSign(strB); //用较大的字符串长度作为分母,相似子串作为分子计算出字串相似度 int temp = Math.max(newStrA.length(), newStrB.length()); int temp2 = longestCommonSubstring(newStrA, newStrB).length(); return temp2 * 1.0 / temp; } /* * 将字符串的所有数据依次写成一行 * */ public static String removeSign(String str) { StringBuffer sb = new StringBuffer(); //遍历字符串str,如果是汉字数字或字母,则追加到ab上面 for (char item : str.toCharArray()) { if (charReg(item)) { sb.append(item); } } return sb.toString(); } /* * 判断字符是否为汉字,数字和字母, * 因为对符号进行相似度比较没有实际意义,故符号不加入考虑范围。 * */ public static boolean charReg(char charValue) { return (charValue >= 0x4E00 && charValue <= 0X9FA5) || (charValue >= 'a' && charValue <= 'z') || (charValue >= 'A' && charValue <= 'Z') || (charValue >= '0' && charValue <= '9'); } /* * 求公共子串,采用动态规划算法。 * 其不要求所求得的字符在所给的字符串中是连续的。 * * */ public static String longestCommonSubstring(String strA, String strB) { char[] chars_strA = strA.toCharArray(); char[] chars_strB = strB.toCharArray(); int m = chars_strA.length; int n = chars_strB.length; /* * 初始化矩阵数据,matrix[0][0]的值为0, * 如果字符数组chars_strA和chars_strB的对应位相同,则matrix[i][j]的值为左上角的值加1, * 否则,matrix[i][j]的值等于左上方最近两个位置的较大值, * 矩阵中其余各点的值为0. */ int[][] matrix = new int[m + 1][n + 1]; for (int i = 1; i <= m; i++) { for (int j = 1; j <= n; j++) { if (chars_strA[i - 1] == chars_strB[j - 1]) { matrix[i][j] = matrix[i - 1][j - 1] + 1; } else { matrix[i][j] = Math.max(matrix[i][j - 1], matrix[i - 1][j]); } } } /* * 矩阵中,如果matrix[m][n]的值不等于matrix[m-1][n]的值也不等于matrix[m][n-1]的值, * 则matrix[m][n]对应的字符为相似字符元,并将其存入result数组中。 * */ char[] result = new char[matrix[m][n]]; int currentIndex = result.length - 1; while (matrix[m][n] != 0) { if (matrix[n] == matrix[n - 1]){ n--; } else if (matrix[m][n] == matrix[m - 1][n]){ m--; }else { result[currentIndex] = chars_strA[m - 1]; currentIndex--; n--; m--; } } return new String(result); } /* * 结果转换成百分比形式 * */ public static String similarityResult(double resule) { return NumberFormat.getPercentInstance(new Locale("en ", "US ")).format(resule); } }
2. 阿里云内容审核
内容安全_云盾_违规内容识别_安全-阿里云 (aliyun.com)
文字审核
package com.heima.common.aliyun; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import com.aliyuncs.DefaultAcsClient; import com.aliyuncs.IAcsClient; import com.aliyuncs.exceptions.ClientException; import com.aliyuncs.green.model.v20180509.TextScanRequest; import com.aliyuncs.http.FormatType; import com.aliyuncs.http.HttpResponse; import com.aliyuncs.profile.DefaultProfile; import com.aliyuncs.profile.IClientProfile; import lombok.Getter; import lombok.Setter; import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.PropertySource; import java.util.*; @Getter @Setter @Configuration @ConfigurationProperties(prefix="aliyun") @PropertySource("classpath:aliyun.properties") public class AliyunTextScanRequest { private String accessKey; private String secret; public String textScanRequest(String content) throws Exception { IClientProfile profile = DefaultProfile.getProfile("cn-shanghai", accessKey, secret); IAcsClient client = new DefaultAcsClient(profile); TextScanRequest textScanRequest = new TextScanRequest(); textScanRequest.setAcceptFormat(FormatType.JSON); // 指定api返回格式 textScanRequest.setHttpContentType(FormatType.JSON); textScanRequest.setMethod(com.aliyuncs.http.MethodType.POST); // 指定请求方法 textScanRequest.setEncoding("UTF-8"); textScanRequest.setRegionId("cn-shanghai"); List<Map<String, Object>> tasks = new ArrayList<Map<String, Object>>(); Map<String, Object> task1 = new LinkedHashMap<String, Object>(); task1.put("dataId", UUID.randomUUID().toString()); /** * 待检测的文本,长度不超过10000个字符 */ task1.put("content", content); tasks.add(task1); JSONObject data = new JSONObject(); /** * 检测场景,文本垃圾检测传递:antispam **/ data.put("scenes", Arrays.asList("antispam")); data.put("tasks", tasks); System.out.println(JSON.toJSONString(data, true)); textScanRequest.setHttpContent(data.toJSONString().getBytes("UTF-8"), "UTF-8", FormatType.JSON); // 请务必设置超时时间 textScanRequest.setConnectTimeout(3000); textScanRequest.setReadTimeout(6000); try { HttpResponse httpResponse = client.doAction(textScanRequest); if(httpResponse.isSuccess()){ JSONObject scrResponse = JSON.parseObject(new String(httpResponse.getHttpContent(), "UTF-8")); System.out.println(JSON.toJSONString(scrResponse, true)); if (200 == scrResponse.getInteger("code")) { JSONArray taskResults = scrResponse.getJSONArray("data"); for (Object taskResult : taskResults) { if(200 == ((JSONObject)taskResult).getInteger("code")){ JSONArray sceneResults = ((JSONObject)taskResult).getJSONArray("results"); for (Object sceneResult : sceneResults) { String scene = ((JSONObject)sceneResult).getString("scene"); String suggestion = ((JSONObject)sceneResult).getString("suggestion"); //根据scene和suggetion做相关处理 //suggestion == pass 未命中垃圾 review 人工审核 suggestion == block 命中了垃圾,可以通过label字段查看命中的垃圾分类 System.out.println("args = [" + scene + "]"); System.out.println("args = [" + suggestion + "]"); return suggestion; } }else{ System.out.println("task process fail:" + ((JSONObject)taskResult).getInteger("code")); } } } else { System.out.println("detect not success. code:" + scrResponse.getInteger("code")); } }else{ System.out.println("response not success. status:" + httpResponse.getStatus()); } } catch (ClientException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } return null; } }
图片审核
package com.heima.common.aliyun; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import com.aliyuncs.DefaultAcsClient; import com.aliyuncs.IAcsClient; import com.aliyuncs.green.model.v20180509.ImageSyncScanRequest; import com.aliyuncs.http.FormatType; import com.aliyuncs.http.HttpResponse; import com.aliyuncs.http.MethodType; import com.aliyuncs.http.ProtocolType; import com.aliyuncs.profile.DefaultProfile; import com.aliyuncs.profile.IClientProfile; import lombok.Getter; import lombok.Setter; import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.PropertySource; import java.util.*; @Getter @Setter @Configuration @ConfigurationProperties(prefix="aliyun") @PropertySource("classpath:aliyun.properties") public class AliyunImageScanRequest { private String accessKey; private String secret; public String imageScanRequest(List<String> images) throws Exception { IClientProfile profile = DefaultProfile.getProfile("cn-shanghai",accessKey,secret); DefaultProfile.addEndpoint("cn-shanghai", "cn-shanghai", "Green", "green.cn-shanghai.aliyuncs.com"); IAcsClient client = new DefaultAcsClient(profile); ImageSyncScanRequest imageSyncScanRequest = new ImageSyncScanRequest(); // 指定api返回格式 imageSyncScanRequest.setAcceptFormat(FormatType.JSON); // 指定请求方法 imageSyncScanRequest.setMethod(MethodType.POST); imageSyncScanRequest.setEncoding("utf-8"); //支持http和https imageSyncScanRequest.setProtocol(ProtocolType.HTTP); JSONObject httpBody = new JSONObject(); /** * 设置要检测的场景, 计费是按照该处传递的场景进行 * 一次请求中可以同时检测多张图片,每张图片可以同时检测多个风险场景,计费按照场景计算 * 例如:检测2张图片,场景传递porn,terrorism,计费会按照2张图片鉴黄,2张图片暴恐检测计算 * porn: porn表示色情场景检测 */ httpBody.put("scenes", Arrays.asList("logo","porn","ad","terrorism")); /** * 设置待检测图片, 一张图片一个task, * 多张图片同时检测时,处理的时间由最后一个处理完的图片决定。 * 通常情况下批量检测的平均rt比单张检测的要长, 一次批量提交的图片数越多,rt被拉长的概率越高 * 这里以单张图片检测作为示例, 如果是批量图片检测,请自行构建多个task */ List<JSONObject> list = new ArrayList<JSONObject>(); for (String image : images) { JSONObject task = new JSONObject(); task.put("dataId", UUID.randomUUID().toString()); //设置图片链接为上传后的url task.put("url", image); task.put("time", new Date()); list.add(task); } httpBody.put("tasks", list); imageSyncScanRequest.setHttpContent(org.apache.commons.codec.binary.StringUtils.getBytesUtf8(httpBody.toJSONString()), "UTF-8", FormatType.JSON); /** * 请设置超时时间, 服务端全链路处理超时时间为10秒,请做相应设置 * 如果您设置的ReadTimeout 小于服务端处理的时间,程序中会获得一个read timeout 异常 */ imageSyncScanRequest.setConnectTimeout(3000); imageSyncScanRequest.setReadTimeout(10000); HttpResponse httpResponse = null; try { httpResponse = client.doAction(imageSyncScanRequest); } catch (Exception e) { e.printStackTrace(); } //服务端接收到请求,并完成处理返回的结果 if (httpResponse != null && httpResponse.isSuccess()) { JSONObject scrResponse = JSON.parseObject(org.apache.commons.codec.binary.StringUtils.newStringUtf8(httpResponse.getHttpContent())); System.out.println(JSON.toJSONString(scrResponse, true)); int requestCode = scrResponse.getIntValue("code"); //每一张图片的检测结果 JSONArray taskResults = scrResponse.getJSONArray("data"); if (200 == requestCode) { for (Object taskResult : taskResults) { //单张图片的处理结果 int taskCode = ((JSONObject) taskResult).getIntValue("code"); //图片要检测的场景的处理结果, 如果是多个场景,则会有每个场景的结果 JSONArray sceneResults = ((JSONObject) taskResult).getJSONArray("results"); if (200 == taskCode) { for (Object sceneResult : sceneResults) { String scene = ((JSONObject) sceneResult).getString("scene"); String suggestion = ((JSONObject) sceneResult).getString("suggestion"); //根据scene和suggetion做相关处理 //do something //pass 通过 review 人工审核 block 不通过 System.out.println("scene = [" + scene + "]"); System.out.println("suggestion = [" + suggestion + "]"); return suggestion; } } else { //单张图片处理失败, 原因是具体的情况详细分析 System.out.println("task process fail. task response:" + JSON.toJSONString(taskResult)); } } } else { /** * 表明请求整体处理失败,原因视具体的情况详细分析 */ System.out.println("the whole image scan request failed. response:" + JSON.toJSONString(scrResponse)); } } return null; } }
依次进行 文本审核 / 图片审核
当有一项为未命中时, 就进行人工审核,
否则就是全自动的啦
我刚才阿里云看了下价格, 真的是好贵
这篇关于UGC内容审核设计步骤的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!
- 2024-11-26RocketMQ入门指南:搭建与使用全流程详解
- 2024-11-26RocketMQ入门教程:轻松搭建与使用指南
- 2024-11-26手写RocketMQ:从入门到实践的简单教程
- 2024-11-25【机器学习(二)】分类和回归任务-决策树(Decision Tree,DT)算法-Sentosa_DSML社区版
- 2024-11-23增量更新怎么做?-icode9专业技术文章分享
- 2024-11-23压缩包加密方案有哪些?-icode9专业技术文章分享
- 2024-11-23用shell怎么写一个开机时自动同步远程仓库的代码?-icode9专业技术文章分享
- 2024-11-23webman可以同步自己的仓库吗?-icode9专业技术文章分享
- 2024-11-23在 Webman 中怎么判断是否有某命令进程正在运行?-icode9专业技术文章分享
- 2024-11-23如何重置new Swiper?-icode9专业技术文章分享