Skip to content

Commit ca422b5

Browse files
committed
Adding fast flow counter.
1 parent 39a78e2 commit ca422b5

File tree

2 files changed

+75
-0
lines changed

2 files changed

+75
-0
lines changed

python/cute/utils.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,55 @@ def print_usage():
113113
protocol_index, aggregator_index, accept_numerical_payload)
114114

115115
serialize_dataset(dataset)
116+
117+
class Trie(object):
118+
def __init__(self):
119+
self._trie_dict = dict()
120+
self._protocol_count = dict()
121+
122+
def add(self, term):
123+
if len(term) == 0:
124+
return
125+
126+
ch = term[0]
127+
next_level = self._trie_dict.get(ch)
128+
if not next_level:
129+
next_level = Trie()
130+
self._trie_dict[ch] = next_level
131+
132+
next_level.add(term[1:])
133+
134+
def contains(self, term):
135+
if len(term) == 0:
136+
return self
137+
138+
ch = term[0]
139+
next_level = self._trie_dict.get(ch);
140+
141+
return next_level.contains(term[1:]) if next_level else False
142+
143+
def inc_if_contains(self, term, protocol):
144+
last_trie = self.contains(term)
145+
if not last_trie:
146+
return
147+
148+
count = last_trie._protocol_count.get(protocol, 0)
149+
last_trie._protocol_count[protocol] = count + 1
150+
151+
if len(term) == 0:
152+
return True
153+
154+
def get_protocol_counts(self, term):
155+
trie = self.contains(term)
156+
return trie._protocol_count if trie else dict()
157+
158+
def get_protocol_count(self, term, protocol):
159+
return self.get_protocol_counts(term).get(protocol)
160+
161+
def print_protocol_counts(self, prefix=''):
162+
for protocol, count in self._protocol_count.items():
163+
print('%s|%d|%s' % (protocol, count, prefix))
164+
165+
for ch, trie in self._trie_dict.items():
166+
trie.print_protocol_counts(prefix + ch)
167+

python/fastcounter.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import sys
2+
import cute.utils
3+
4+
if __name__ == '__main__':
5+
if len(sys.argv) < 3:
6+
print('fastcounter <term_file> <length_threshold>')
7+
8+
term_file = sys.argv[1]
9+
length_threshold = int(sys.argv[2])
10+
11+
trie = cute.utils.Trie()
12+
for term in open(term_file):
13+
trie.add(term[:-1])
14+
15+
for flow in sys.stdin:
16+
protocol_index = flow.find('|')
17+
protocol = flow[:protocol_index]
18+
payload = flow[protocol_index + 1:-1]
19+
for i in range(0, len(payload) - length_threshold + 1):
20+
for j in range(i + length_threshold, len(payload) + 1):
21+
trie.inc_if_contains(payload[i:j], protocol)
22+
23+
trie.print_protocol_counts()

0 commit comments

Comments
 (0)