-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathexample.py
executable file
·83 lines (46 loc) · 2.34 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#! /usr/bin/env python
# -*- coding: utf8 -*-
import timbl
import os
#We are building a very simple context-aware translator Word Sense Disambiguator for the word "bank", based on the occurrence of some keywords in the same sentence:
# The features are binary and represent presence or absence of certain keywords. We choose:
# - money
# - sit
# - river
#They have a value of 0 or 1 (but note that Timbl support string features just as well!)
#The classes we predict are:
# - financial
# - furniture
# - geographic
#Build the classifier training
classifier = timbl.TimblClassifier("wsd-bank", "-a 0 -k 1" ) #wsd-bank will be the prefix of any files written for timbl
classifier.append( (1,0,0), 'financial') #append is used to add training instances
classifier.append( (0,1,0), 'furniture')
classifier.append( (0,0,1), 'geographic')
#Train the classifier
classifier.train()
#Save
classifier.save()
#We start anew and load the classifier again (of course we could have just skipped this and the save step and continued immediately)
classifier = timbl.TimblClassifier("wsd-bank", "-a 0 -k 1" ) #wsd-bank will be the prefix of any files written for timbl
classifier.load() #even if this is omitted it will still work, the first classify() call will invoke load()
#Let's classify an instance:
classlabel, distribution, distance = classifier.classify( (1,0,0) )
if classlabel == "financial":
print("Classified correctly! Our accuracy is " + str(classifier.getAccuracy()))
#Let's classify an ambiguous one:
winningclasslabel, distribution, distance = classifier.classify( (1,1,1) )
for classlabel, score in distribution.items():
print(classlabel + ": " + str(score))
print("Distance: ", distance)
#We again start anew and build a test file
if os.path.exists("testfile"): #delete if it already exists
os.unlink("testfile")
classifier = timbl.TimblClassifier("wsd-bank", "-a 0 -k 1" )
classifier.load()
classifier.addinstance("testfile", (1,0,0),'financial' ) #addinstance can be used to add instances to external files (use append() for training)
classifier.addinstance("testfile", (0,1,0),'furniture' )
classifier.addinstance("testfile", (0,0,1),'geograpic' )
classifier.addinstance("testfile", (1,1,0),'geograpic' ) #this one will be wrongly classified as financial & furniture
classifier.test("testfile")
print("Accuracy: ", classifier.getAccuracy())