Python社区  »  机器学习算法

文本分类的机器学习算法

marouane • 2 年前 • 185 次点击  
def TextClassification(a):    

    import pandas as pd
    df = pd.read_excel('../Desktop/Stage/Classeur1.xlsx')
    #Removing NULL element
    df=df[pd.notnull(df['Réclamation'])]
    df=df[pd.notnull(df['Catégorie'])]
 #Removing punctuation 
    df['Réclamation'] = [''.join(c for c in s if c not in string.punctuation) for s in df['Réclamation']]
    df['Catégorie'] = [''.join(c for c in s if c not in string.punctuation) for s in df['Catégorie']]
 #Removing é è and all numbers
    ch = ['0','1','2','3','4','5','6','7','8','9']

    for c in ch:
                df['Réclamation'] = [w.replace(c, '') for w in df['Réclamation']]
                df['Catégorie'] = [w.replace(c, '') for w in df['Catégorie']]


    df['Réclamation']= [w.replace('è', 'e') for w in df['Réclamation']]
    df['Réclamation']= [w.replace('é', 'e') for w in df['Réclamation']] 
    df['Catégorie']= [w.replace('è', 'e') for w in df['Catégorie']]
    df['Catégorie']= [w.replace('é', 'e') for w in df['Catégorie']]


#Lower case
    df['Réclamation']=df['Réclamation'].apply(lambda x: " ".join(x.lower() for x in x.split()))
    df['Catégorie']=df['Catégorie'].apply(lambda x: " ".join(x.lower() for x in x.split()))


    df['category_id'] = df['Catégorie'].factorize()[0]
    category_id_df = df[['Catégorie', 'category_id']].sort_values('category_id')
    category_to_id = dict(category_id_df.values)
    id_to_category = dict(category_id_df[['category_id', 'Catégorie']].values)
#library for data preprocessing and maodel building        
    from sklearn.model_selection import train_test_split
    from sklearn.feature_extraction.text import CountVectorizer
    from sklearn.feature_extraction.text import TfidfTransformer
    from sklearn.svm import LinearSVC 
#split data(train/test)    
    X_train, X_test, y_train, y_test = train_test_split(df['Réclamation'], df['Catégorie'], random_state = 0)
    count_vect = CountVectorizer()
    X_train_counts = count_vect.fit_transform(X_train)
    tfidf_transformer = TfidfTransformer()
    X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
    clf = LinearSVC().fit(X_train_tfidf, y_train)
    return(print(clf.predict(count_vect.transform(a)))

如何解决这个问题:我在最后一行的帮助中得到了eof,pb pls在哪里? 什么是EOF?

 File "<ipython-input-25-e6d13da1f205>", line 50
    return(print(clf.predict(count_vect.transform(a)))
                                                      ^
SyntaxError: unexpected EOF while parsing
Python社区是高质量的Python/Django开发社区
本文地址:http://www.python88.com/topic/30539
 
185 次点击  
分享到微博
文章 [ 1 ]  |  最新文章 2 年前
Dan D. Yogi
Reply   •   1 楼
Dan D. Yogi    2 年前

你错过了一个 )

return(print(clf.predict(count_vect.transform(a)))
                                                  ^