Skip to content
This repository was archived by the owner on Jun 17, 2023. It is now read-only.

Commit c8d4cc9

Browse files
authored
Make group filtering safer for upsert searches (#517)
1 parent 166e326 commit c8d4cc9

4 files changed

Lines changed: 119 additions & 20 deletions

File tree

cif/httpd/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
VALID_FILTERS = {
88
'indicator', 'itype', 'confidence', 'provider', 'limit', 'application', 'nolog', 'tags', 'days',
99
'hours', 'groups', 'reporttime', 'cc', 'asn', 'asn_desc', 'rdata', 'firsttime', 'lasttime', 'region', 'id',
10-
'portlist', 'protocol', 'tlp', 'sort', 'group'
10+
'portlist', 'protocol', 'tlp', 'sort',
1111
}
1212
TOKEN_FILTERS = ['username', 'token']
1313

cif/store/zelasticsearch/filters.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -275,15 +275,7 @@ def filter_build(s, filters, token=None):
275275

276276
s = filter_reporttime(s, q_filters)
277277

278-
# transform all other filters into term=
279-
s = filter_terms(s, q_filters)
280-
281-
# indicator search/submit should mostly use singular 'group' field, but the cifsdk uses
282-
# groups (plural) for both indicators and tokens
283-
if q_filters.get('group'):
284-
q_filters['groups'] = q_filters.pop('group')
285-
s = filter_groups(s, q_filters)
286-
elif q_filters.get('groups'):
278+
if q_filters.get('groups'):
287279
s = filter_groups(s, q_filters)
288280
else:
289281
if token and (not token.get('admin') or token.get('admin') == ''):
@@ -292,4 +284,7 @@ def filter_build(s, filters, token=None):
292284
if q_filters.get('tags'):
293285
s = filter_tags(s, q_filters)
294286

287+
# transform all other filters into term=
288+
s = filter_terms(s, q_filters)
289+
295290
return s

cif/store/zelasticsearch/indicator.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -244,23 +244,25 @@ def upsert(self, token, indicators, flush=False):
244244
for d in agg:
245245
d = agg[d]
246246

247+
# start assembling search filters
247248
filters = {'limit': 1}
248249
for x in UPSERT_MATCH:
249250
if d.get(x):
250251
if x == 'confidence':
251252
filters[x] = '{},{}'.format(d[x], d[x])
253+
elif x == 'group':
254+
# indicator submit api expects 'group' (singular)
255+
# but search api expects 'groups' (plural)
256+
filters['groups'] = d[x]
257+
elif x == 'rdata':
258+
# if wildcard in rdata, don't add it to upsert search;
259+
# urls can contain asterisks, and complex wildcard queries can
260+
# create ES timeouts
261+
if '*' not in d['rdata']:
262+
filters[x] = d[x]
252263
else:
253264
filters[x] = d[x]
254265

255-
if d.get('tags'):
256-
filters['tags'] = d['tags']
257-
258-
if d.get('rdata'):
259-
# if wildcard in rdata, don't add it to upsert search; urls can contain asterisks,
260-
# and complex wildcard queries can create ES timeouts
261-
if '*' not in d['rdata']:
262-
filters['rdata'] = d['rdata']
263-
264266
# search for existing, return latest record
265267
try:
266268
# search the current index only

test/zelasticsearch/test_store_elasticsearch_indicators_upsert.py

Lines changed: 103 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,19 @@ def indicator_diff_group():
183183
confidence=7.0
184184
)
185185

186+
@pytest.fixture
187+
def indicator_diff_rdata():
188+
return Indicator(
189+
indicator='example.com',
190+
tags='botnet',
191+
provider='csirtg.io',
192+
group='everyone',
193+
lasttime=arrow.utcnow().datetime,
194+
reporttime=arrow.utcnow().datetime,
195+
confidence=7.0,
196+
rdata='ns 10.1.1.1'
197+
)
198+
186199
@pytest.fixture
187200
def new_indicator():
188201
return Indicator(
@@ -510,4 +523,93 @@ def test_store_elasticsearch_indicators_upsert8(store, token, indicator, indicat
510523
assert i['count'] == 2
511524
# the indicator with group 'everyone2' should only have a count of 1
512525
else:
513-
assert i['count'] == 1
526+
assert i['count'] == 1
527+
528+
## test duplicate indicator submission, different rdata;
529+
# ensure upserts are NOT matching on diff rdata
530+
@pytest.mark.skipif(DISABLE_TESTS, reason='need to set CIF_ELASTICSEARCH_TEST=1 to run')
531+
def test_store_elasticsearch_indicators_upsert9(store, token, indicator, indicator_diff_rdata):
532+
533+
pprint(indicator)
534+
535+
indicator_dict = indicator.__dict__()
536+
537+
x = store.handle_indicators_create(token, indicator_dict, flush=True)
538+
assert x == 1
539+
540+
pprint(indicator_diff_group)
541+
542+
indicator_rdata_dict = indicator_diff_rdata.__dict__()
543+
544+
y = store.handle_indicators_create(token, indicator_rdata_dict, flush=True)
545+
assert y == 1
546+
547+
x = store.handle_indicators_search(token, {
548+
'indicator': 'example.com',
549+
'nolog': 1
550+
})
551+
552+
z = json.loads(x)
553+
z = [i['_source'] for i in z['hits']['hits']]
554+
555+
pprint(z)
556+
557+
assert len(z) == 2
558+
559+
# refresh 1st indicator times and resubmit to upsert/increase count
560+
# ensure it doesn't upsert into 2nd indicator (that has the same tag but one additional)
561+
indicator_dict['lasttime'] = indicator_dict['reporttime'] = arrow.utcnow().datetime
562+
new_observation = Indicator(**indicator_dict)
563+
564+
x = store.handle_indicators_create(token, new_observation.__dict__(), flush=True)
565+
assert x == 1
566+
567+
y = store.handle_indicators_search(token, {
568+
'indicator': 'example.com',
569+
'nolog': 1
570+
})
571+
572+
z = json.loads(y)
573+
z = [i['_source'] for i in z['hits']['hits']]
574+
575+
assert len(z) == 2 # should still have 2 indicators, but should have upserted into 1st
576+
577+
pprint(z)
578+
579+
for i in z:
580+
# orig indicator (w/o rdata) should have upsert matched once for a total count of 2
581+
if not i.get('rdata'):
582+
assert i['count'] == 2
583+
# the indicator with rdata (different) should only have a count of 1
584+
else:
585+
assert i['count'] == 1
586+
587+
# refresh 2nd indicator times and resubmit to test upsert
588+
# ensure it doesn't upsert into 2nd indicator (that has the same rdata but
589+
# new observation contains an asterisk which should be ignored)
590+
indicator_rdata_dict['lasttime'] = indicator_rdata_dict['reporttime'] = arrow.utcnow().datetime
591+
indicator_rdata_dict['rdata'] = 'some*test'
592+
new_rdata_observation = Indicator(**indicator_rdata_dict)
593+
594+
x = store.handle_indicators_create(token, new_rdata_observation.__dict__(), flush=True)
595+
assert x == 1
596+
597+
y = store.handle_indicators_search(token, {
598+
'indicator': 'example.com',
599+
'nolog': 1
600+
})
601+
602+
z = json.loads(y)
603+
z = [i['_source'] for i in z['hits']['hits']]
604+
605+
assert len(z) == 2 # should still have 2 indicators, but latest should have upserted into 1st
606+
607+
pprint(z)
608+
609+
for i in z:
610+
# orig indicator (w/o rdata) should have upsert matched twice now for a total count of 3
611+
if not i.get('rdata'):
612+
assert i['count'] == 3
613+
# the indicator with rdata (different) should only have a count of 1
614+
else:
615+
assert i['count'] == 1

0 commit comments

Comments
 (0)