新Alpha前端页面


生词分析接口

<p><strong>简要描述:</strong> </p> <ul> <li>生词分析接口</li> </ul> <p><strong>底层:</strong> 底层调用函数:analyzeArticleData 底层函数路径为:PandoraSearch\SearchAndRecommentdation\SentenceSegmentation\SentTokenization6.py 底层函数返回结果格式:</p> <pre><code>入参:[{'p':u'这些路段正在施工,请绕行!还有这些信息,一定要看'},{'s':u'秋天养球根花卉,不注意这5点开花难;做好这4点,花开艳丽满屋香'}] { 'kw_smap': { u '秋天': [('para2_0', 1.0)], u '满屋': [('para2_1', 0.23846153846153847)], u '球根': [('para2_0', 0.8252941176470587)] }, 'kw_dict': { u '秋天': { 'adj_score': 1.0, 'w': 1.0, 'freq': 1, 'txt': u '秋天', 'type': 'normal', 'nw': 1 }, u '满屋': { 'pkey': u 'c41_Uksjrzfo', 'adj_score': 0.23846153846153847, 'w': 292.0, 'freq': 1, 'txt': u '满屋', 'type': 'normal', 'nw': 0 }, u '球根': { 'pkey': u 'c77_MwyTca2C', 'adj_score': 0.8252941176470587, 'w': 241.0, 'freq': 1, 'txt': u '球根', 'type': 'normal', 'nw': 0 } }, 'sent_info': [{ 'id': 'para1_0', 'mr': u '这些||路段||正在||施工||,||请||绕行||!||还有||这些||信息||,||一定||要||看' }, { 'id': 'para2_0', 'mr': u '秋天||养||球根||花卉||,||不||注意||这||5||点||开花||难' }, { 'id': 'para2_1', 'mr': u '做好||这||4||点||,||花||开||艳丽||满屋||香' }], 'articleInfo': { 'ccomp': { u 'c41': 1, u 'c77': 1 }, 'new_words': [(u '秋天', 1)], 'all_tags': [(u '秋天', 1.0), (u '球根', 0.8252941176470587), (u '满屋', 0.23846153846153847)] } }</code></pre> <p><strong>本接口请求URL:</strong> </p> <ul> <li><code>/api/v3/xadmin/objects/new_words_analyze/</code></li> </ul> <p><strong>请求方式:</strong></p> <ul> <li>post</li> </ul> <p><strong>参数:</strong> </p> <table> <thead> <tr> <th style="text-align: left;">参数名</th> <th style="text-align: left;">必选</th> <th style="text-align: left;">类型</th> <th>说明</th> </tr> </thead> <tbody> <tr> <td style="text-align: left;">target_text</td> <td style="text-align: left;">是</td> <td style="text-align: left;">list</td> <td>匹配的文本列表,是列表内套字典形式,字典的键名自拟,值为待匹配的文本</td> </tr> <tr> <td style="text-align: left;">only_res</td> <td style="text-align: left;">是</td> <td style="text-align: left;">int</td> <td>1代表 只返回分析结果, 0代表 结果写入数据库</td> </tr> <tr> <td style="text-align: left;">db_host</td> <td style="text-align: left;">否</td> <td style="text-align: left;">str</td> <td>数据库host</td> </tr> <tr> <td style="text-align: left;">db_user</td> <td style="text-align: left;">否</td> <td style="text-align: left;">str</td> <td>数据库用户名</td> </tr> <tr> <td style="text-align: left;">db_pwd</td> <td style="text-align: left;">否</td> <td style="text-align: left;">str</td> <td>数据库密码</td> </tr> <tr> <td style="text-align: left;">db_port</td> <td style="text-align: left;">否</td> <td style="text-align: left;">str</td> <td>数据库端口</td> </tr> <tr> <td style="text-align: left;">db_name</td> <td style="text-align: left;">否</td> <td style="text-align: left;">str</td> <td>数据库的库名</td> </tr> <tr> <td style="text-align: left;">tb_name</td> <td style="text-align: left;">否</td> <td style="text-align: left;">str</td> <td>生词表名</td> </tr> <tr> <td style="text-align: left;">sent_tb_name</td> <td style="text-align: left;">否</td> <td style="text-align: left;">str</td> <td>句子表名</td> </tr> <tr> <td style="text-align: left;">min_freq</td> <td style="text-align: left;">否</td> <td style="text-align: left;">int</td> <td>最小频度,指定返回文本的最小频度</td> </tr> <tr> <td style="text-align: left;">each_len</td> <td style="text-align: left;">否</td> <td style="text-align: left;">int</td> <td>切词字数上限,默认8000</td> </tr> <tr> <td style="text-align: left;">convert_time</td> <td style="text-align: left;">否</td> <td style="text-align: left;">bool</td> <td>是否时间格式转换,默认否False</td> </tr> </tbody> </table> <p><strong> 注: 当only_res为0时,db_host,db_user,db_pwd,db_port,db_name,tb_name,sent_tb_name全部不允许为空,库必须存在,接口会自动创建两张数据表,一个是生词表,一个是句子表 </strong></p> <p><strong>入参示例</strong> 入参: target_text:[{'p':'秋天养球根花卉,不注意这5点开花难;做好这4点,花开艳丽满屋香'},{'s':&quot;圆肩显胖没商量?看看马思纯、蒋欣穿搭术让圆肩不显胖&quot;}]</p> <p>成功结果:</p> <pre><code>{ "status": 1, "message": "success", "resultObj": { "sent_info": [ { "input_key": "p", "second_cut_sent": "秋天||养||球根||花卉||,||不||注意||这5点开花难||;||做好||这||4||点||,||花||开||艳丽||满屋||香", # 最终切词结果 "input_text": "秋天养球根花卉,不注意这5点开花难;做好这4点,花开艳丽满屋香", # 原文本 "id": "para1_0", "mr": "秋天||养||球根||花卉||,||不||注意||这||5||点||开花||难||;||做好||这||4||点||,||花||开||艳丽||满屋||香" }, { "input_key": "s", "second_cut_sent": "圆肩||显胖||没商量||?||看看||马思纯||、||蒋欣||穿||搭术||让||圆||肩不显||胖", "input_text": "圆肩显胖没商量?看看马思纯、蒋欣穿搭术让圆肩不显胖", "id": "para2_0", "mr": "圆肩||显胖||没商量||?||看看||马思纯||、||蒋欣||穿||搭术||让||圆||肩不显||胖" } ], "words_info": { ",花开艳丽满屋": { "concept": "", "adj_score": 0.26548387096774195, "weight": 1.0, "text": ",花开艳丽满屋", "is_new": 1, "obj_key": "", "alias": "", "frequency": 1, "cname": "", "type": null }, "蒋欣": { "concept": "演员", # 主键对应的概念 "adj_score": 0.4456, "weight": 9768.0, # 权重 "text": "蒋欣", # 文本 "is_new": 0, # 是否新词,1新词,0非新词 "obj_key": "c54_o82DZA3E", # 主键 "alias": "", "frequency": 1, # 频度 "cname": "", "type": "normal" # 类型 }, "没商量": { "concept": "", "adj_score": 0.8416, "weight": 1.0, "text": "没商量", "is_new": 1, "obj_key": "", "alias": "", "frequency": 1, "cname": "", "type": "normal" }, "艳丽": { "concept": "成语", "adj_score": 0.1696774193548387, "weight": 651.0, "text": "艳丽", "is_new": 0, "obj_key": "c317_oImNS46o", "alias": "", "frequency": 1, "cname": "", "type": "normal" }, "这5点开花难": { "concept": "", "adj_score": 0.6487096774193548, "weight": 1.0, "text": "这5点开花难", "is_new": 1, "obj_key": "", "alias": "", "frequency": 1, "cname": "", "type": null }, "马思纯": { "concept": "演员", "adj_score": 0.604, "weight": 5859.0, "text": "马思纯", "is_new": 0, "obj_key": "c54_X7pFJdak", "alias": "", "frequency": 1, "cname": "", "type": "normal" }, "看看": { "concept": "语言", "adj_score": 0.6832, "weight": 526.0, "text": "看看", "is_new": 0, "obj_key": "c70_3NOnG46v", "alias": "", "frequency": 1, "cname": "", "type": "normal" }, "秋天": { "concept": "", "adj_score": 1.0, "weight": 1.0, "text": "秋天", "is_new": 1, "obj_key": "", "alias": "", "frequency": 1, "cname": "", "type": "normal" }, "满屋": { "concept": "科学", "adj_score": 0.10580645161290322, "weight": 472.0, "text": "满屋", "is_new": 0, "obj_key": "c41_Uksjrzfo", "alias": "", "frequency": 1, "cname": "", "type": "normal" }, "显胖": { "concept": "语言", "adj_score": 0.9208000000000001, "weight": 570.0, "text": "显胖", "is_new": 0, "obj_key": "c70_urtztEVx", "alias": "", "frequency": 1, "cname": "", "type": "normal" }, "做好": { "concept": "语言", "adj_score": 0.42516129032258065, "weight": 570.0, "text": "做好", "is_new": 0, "obj_key": "c70_xSa1nEVt", "alias": "", "frequency": 1, "cname": "", "type": "normal" }, "球根": { "concept": "植物", "adj_score": 0.9041935483870968, "weight": 351.0, "text": "球根", "is_new": 0, "obj_key": "c77_MwyTca2C", "alias": "", "frequency": 1, "cname": "", "type": "normal" }, "开花": { "concept": "语言", "adj_score": 0.5529032258064516, "weight": 4635.0, "text": "开花", "is_new": 0, "obj_key": "c70_FmQKmFNP", "alias": "", "frequency": 1, "cname": "", "type": "normal" }, "注意": { "concept": "语言", "adj_score": 0.7125806451612904, "weight": 1449.0, "text": "注意", "is_new": 0, "obj_key": "c70_vrvz2EBt", "alias": "", "frequency": 1, "cname": "", "type": "normal" }, "点,花开艳丽满屋": { "concept": "", "adj_score": 0.2974193548387097, "weight": 1.0, "text": "点,花开艳丽满屋", "is_new": 1, "obj_key": "", "alias": "", "frequency": 1, "cname": "", "type": null }, "圆肩": { "concept": "", "adj_score": 1.0, "weight": 100, "text": "圆肩", "is_new": 1, "obj_key": "", "alias": "", "frequency": 1, "cname": "", "type": "normal" }, "花卉": { "concept": "语言", "adj_score": 0.8403225806451613, "weight": 9999999.0, "text": "花卉", "is_new": 0, "obj_key": "c70_qItGYd2K", "alias": "", "frequency": 1, "cname": "植物", "type": "normal" }, "搭术": { "concept": "", "adj_score": 0.3268, "weight": 100, "text": "搭术", "is_new": 1, "obj_key": "", "alias": "", "frequency": 1, "cname": "", "type": "normal" }, "肩不显": { "concept": "", "adj_score": 0.1684, "weight": 100, "text": "肩不显", "is_new": 1, "obj_key": "", "alias": "", "frequency": 1, "cname": "", "type": "normal" } }, "extra_info": "quote_keywords:\n\ntime_keywords:\n\nnumber_keywords:\n5,4\nfiltered_words_msg:\nword:,,freq:2\n,花开艳丽满屋,7,0.5\t,不注意这5点,7,0.5\t,不注意这5,6,0.5\t,花开艳丽,5,0.5\t,不注意这,5,0.5\t,不注意,4,0.5\t,花开,3,0.5\t,不,2,0.5\t,花,2,0.5\nword:点,freq:2\n点,花开艳丽满屋,8,0.5\t点开花难;做好这,8,0.5\t点开花难;做好,7,0.5\t点,花开艳丽,6,0.5\t点开花难,4,0.5\t点,花开,4,0.5\t点开花,3,0.5\t点,花,3,0.5\nword:这,freq:2\n这5点开花难,6,0.5\t这4点,花开,6,0.5\t这5点开花,5,0.5\t这4点,花,5,0.5\t这5点,3,0.5\t这4点,3,0.5\t这5,2,0.5\t这4,2,0.5" } }</code></pre> <p>失败结果如下</p> <pre><code>{ "status": 2, "message": { "target_text": [ "target_text内的元素必须是字典" ] }, "resultObj": {} }</code></pre> <pre><code>{ "status": 2, "message": { "tb_name": [ "tb_name can not be null" ], "sent_tb_name": [ "sent_tb_name can not be null" ], "db_user": [ "db_user can not be null" ], "db_pwd": [ "db_pwd can not be null" ], "db_port": [ "db_port can not be null" ], "db_name": [ "db_name can not be null" ], "db_host": [ "db_host can not be null" ] }, "resultObj": {} }</code></pre> <ul> <li>写入生词表中的格式</li> </ul> <table> <thead> <tr> <th style="text-align: left;">字段</th> <th style="text-align: left;">类型</th> <th style="text-align: left;">是否可为空</th> <th>索引</th> <th>注释</th> </tr> </thead> <tbody> <tr> <td style="text-align: left;">id</td> <td style="text-align: left;">int</td> <td style="text-align: left;">否</td> <td>是</td> <td>自增id</td> </tr> <tr> <td style="text-align: left;">match_text</td> <td style="text-align: left;">varchar(255)</td> <td style="text-align: left;">否</td> <td>否</td> <td>文本</td> </tr> <tr> <td style="text-align: left;">obj_key</td> <td style="text-align: left;">varchar(50)</td> <td style="text-align: left;">是</td> <td>否</td> <td>主键</td> </tr> <tr> <td style="text-align: left;">frequency</td> <td style="text-align: left;">int</td> <td style="text-align: left;">是</td> <td>是</td> <td>频度</td> </tr> <tr> <td style="text-align: left;">is_new</td> <td style="text-align: left;">int</td> <td style="text-align: left;">是</td> <td>是</td> <td>是否新词,0为非新词,1为新词</td> </tr> <tr> <td style="text-align: left;">type</td> <td style="text-align: left;">varchar(50)</td> <td style="text-align: left;">是</td> <td>否</td> <td>类型</td> </tr> <tr> <td style="text-align: left;">adj_score</td> <td style="text-align: left;">float</td> <td style="text-align: left;">是</td> <td>否</td> <td>分值</td> </tr> <tr> <td style="text-align: left;">weight</td> <td style="text-align: left;">float</td> <td style="text-align: left;">是</td> <td>否</td> <td>权重</td> </tr> <tr> <td style="text-align: left;">concept</td> <td style="text-align: left;">varchar(50)</td> <td style="text-align: left;">是</td> <td>否</td> <td>主键对应的概念</td> </tr> <tr> <td style="text-align: left;">alias</td> <td style="text-align: left;">varchar(255)</td> <td style="text-align: left;">是</td> <td>否</td> <td></td> </tr> <tr> <td style="text-align: left;">cname</td> <td style="text-align: left;">varchar(50)</td> <td style="text-align: left;">是</td> <td>否</td> <td></td> </tr> </tbody> </table> <ul> <li>写入句子表中的格式</li> </ul> <table> <thead> <tr> <th style="text-align: left;">字段</th> <th style="text-align: left;">类型</th> <th style="text-align: left;">是否可为空</th> <th>索引</th> <th>注释</th> </tr> </thead> <tbody> <tr> <td style="text-align: left;">id</td> <td style="text-align: left;">int</td> <td style="text-align: left;">否</td> <td>是</td> <td>自增id</td> </tr> <tr> <td style="text-align: left;">input_key</td> <td style="text-align: left;">varchar(255)</td> <td style="text-align: left;">是</td> <td>是</td> <td>输入的键</td> </tr> <tr> <td style="text-align: left;">sent</td> <td style="text-align: left;">longtext</td> <td style="text-align: left;">是</td> <td>否</td> <td>输入的文本</td> </tr> <tr> <td style="text-align: left;">cut_sent</td> <td style="text-align: left;">longtext</td> <td style="text-align: left;">是</td> <td>否</td> <td>切分后的文本</td> </tr> <tr> <td style="text-align: left;">cut_words</td> <td style="text-align: left;">longtext</td> <td style="text-align: left;">是</td> <td>否</td> <td>二次切分后的文本</td> </tr> <tr> <td style="text-align: left;">para_id</td> <td style="text-align: left;">varchar(255)</td> <td style="text-align: left;">是</td> <td>否</td> <td>文本在列表中的位置</td> </tr> </tbody> </table> <p><strong>返回参数说明</strong> </p> <table> <thead> <tr> <th style="text-align: left;">参数名</th> <th style="text-align: left;">类型</th> <th>说明</th> </tr> </thead> <tbody> <tr> <td style="text-align: left;"></td> <td style="text-align: left;"></td> <td></td> </tr> </tbody> </table> <ul> <li>备注:无</li> </ul>

页面列表

ITEM_HTML