-
Notifications
You must be signed in to change notification settings - Fork 0
/
cooo.py
31 lines (30 loc) · 1.16 KB
/
cooo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 27 11:30:14 2019
@author: USER
"""
import sklearn
import random
from nltk import NaiveBayesClassifier
from nltk.classify import accuracy as nltk_accuracy
from nltk.corpus import names
def extract_features(word, N=2):
last_n_letters = word[-N:]
return {'feature': last_n_letters.lower()}
if __name__=='__main__':
male_list = [(name, 'male') for name in names.words('male.txt')]
female_list = [(name, 'female') for name in names.words('female.txt')]
data = (male_list + female_list)
random.seed(5)
random.shuffle(data)
namesInput = ['Rajesh', 'Gaurav', 'Swati', 'Shubha']
train_sample = int(0.8 * len(data))
for i in range(1, 6):
print('\nNumber of end letters:', i)
features = [(extract_features(n, i), gender) for (n, gender) in data]
train_data, test_data = features[:train_sample], features[train_sample:]
classifier = NaiveBayesClassifier.train(train_data)
accuracy_classifier = round(100 * nltk_accuracy(classifier, test_data), 2)
print('Accuracy = ' + str(accuracy_classifier) + '%')
for name in namesInput:
print(name, '==>', classifier.classify(extract_features(name, i)))