算法-sklearn特征工程
2021/10/26 20:40:20
本文主要是介绍算法-sklearn特征工程,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer from sklearn.feature_extraction import DictVectorizer import jieba from sklearn.preprocessing import MinMaxScaler, StandardScaler from sklearn.impute import SimpleImputer import numpy as np # #实例化 # vector = CountVectorizer() # # 将需要转换成one-hot编码的数据传进去 # res = vector.fit_transform(["life is short,i like python","life is too long,i dislike python"]) # print(vector.get_feature_names()) # print(res.toarray()) def dictvec(): # 实例化 dict = DictVectorizer(sparse=False) data = dict.fit_transform([{'city': '北京','temperature': 100}, {'city': '上海','temperature':60}, {'city': '深圳','temperature': 30}]) print(dict.get_feature_names()) print(dict.inverse_transform(data)) print(data) def countvec(): cv = CountVectorizer() data = cv.fit_transform(["人生 苦短,我 喜欢 python", "人生漫长,不用 python"]) print(cv.get_feature_names()) print(data) def cutword(): con1 = jieba.cut("今天很残酷,明天更残酷,后天很美好,但绝对大部分是死在明天晚上,所以每个人不要放弃今天。") con2 = jieba.cut("我们看到的从很远星系来的光是在几百万年之前发出的,这样当我们看到宇宙时,我们是在看它的过去。") con3 = jieba.cut("如果只用一种方式了解某样事物,你就不会真正了解它。了解事物真正含义的秘密取决于如何将其与我们所了解的事物相联系。") content1 = list(con1) content2 = list(con2) content3 = list(con3) c1 = ' '.join(content1) c2 = ' '.join(content2) c3 = ' '.join(content3) return c1, c2, c3 def hanzivec(): c1, c2, c3 = cutword() cv = CountVectorizer() data = cv.fit_transform([c1, c2, c3]) print(cv.get_feature_names()) print(data.toarray()) def tfidfvec(): c1, c2, c3 = cutword() tf = TfidfVectorizer() data = tf.fit_transform([c1, c2, c3]) print(tf.get_feature_names()) print(data.toarray()) def mm(): mm = MinMaxScaler(feature_range=(2, 3)) data = mm.fit_transform([[90,2,10,40],[60,4,15,45],[75,3,13,46]]) print(data) def stand(): sd = StandardScaler() data = sd.fit_transform([[ 1., -1., 3.],[ 2., 4., 2.],[ 4., 6., -1.]]) print(data) def im(): im = SimpleImputer() data = im.fit_transform([[1, 2], [np.nan, 3], [7, 6]]) print(data) # dictvec() # countvec() # cutword() # hanzivec() # tfidfvec() # mm() # stand() im()
这篇关于算法-sklearn特征工程的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!
- 2024-11-02Java管理系统项目实战入门教程
- 2024-11-02Java监控系统项目实战教程
- 2024-11-02Java就业项目项目实战:从入门到初级工程师的必备技能
- 2024-11-02Java全端项目实战入门教程
- 2024-11-02Java全栈项目实战:从入门到初级应用
- 2024-11-02Java日志系统项目实战:初学者完全指南
- 2024-11-02Java微服务系统项目实战入门教程
- 2024-11-02Java微服务项目实战:新手入门指南
- 2024-11-02Java项目实战:新手入门教程
- 2024-11-02Java小程序项目实战:从入门到简单应用