Commit | Line | Data |
---|---|---|
15637ed4 RG |
1 | /* |
2 | * Copyright (c) 1990 The Regents of the University of California. | |
3 | * All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that: (1) source code distributions | |
7 | * retain the above copyright notice and this paragraph in its entirety, (2) | |
8 | * distributions including binary code include the above copyright notice and | |
9 | * this paragraph in its entirety in the documentation or other materials | |
10 | * provided with the distribution, and (3) all advertising materials mentioning | |
11 | * features or use of this software display the following acknowledgement: | |
12 | * ``This product includes software developed by the University of California, | |
13 | * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of | |
14 | * the University nor the names of its contributors may be used to endorse | |
15 | * or promote products derived from this software without specific prior | |
16 | * written permission. | |
17 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED | |
18 | * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF | |
19 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. | |
20 | */ | |
21 | #ifndef lint | |
22 | static char rcsid[] = | |
23 | "@(#)$Header: search.c,v 1.3 92/05/01 15:14:45 vern Exp $ (LBL)"; | |
24 | #endif | |
25 | ||
26 | /* | |
27 | * search.c - supports fast searching through tcpdump files for timestamps | |
28 | */ | |
29 | ||
30 | #include <stdio.h> | |
31 | #include <sys/types.h> | |
32 | #include <sys/time.h> | |
33 | ||
34 | #include "interface.h" | |
35 | #include "savefile.h" | |
36 | ||
37 | ||
38 | /* Maximum number of seconds that we can conceive of a dump file spanning. */ | |
39 | #define MAX_REASONABLE_FILE_SPAN (3600*24*366) /* one year */ | |
40 | ||
41 | /* Maximum packet length we ever expect to see. */ | |
42 | #define MAX_REASONABLE_PACKET_LENGTH 65535 | |
43 | ||
44 | /* Size of a packet header in bytes; easier than typing the sizeof() all | |
45 | * the time ... | |
46 | */ | |
47 | #define PACKET_HDR_LEN (sizeof( struct packet_header )) | |
48 | ||
49 | /* The maximum size of a packet, including its header. */ | |
50 | #define MAX_PACKET_SIZE (PACKET_HDR_LEN + snaplen) | |
51 | ||
52 | /* Number of contiguous bytes from a dumpfile in which there's guaranteed | |
53 | * to be enough information to find a "definite" header if one exists | |
54 | * therein. This takes 3 full packets - the first to be just misaligned | |
55 | * (one byte short of a full packet), missing its timestamp; the second | |
56 | * to have the legitimate timestamp; and the third to provide confirmation | |
57 | * that the second is legit, making it a "definite" header. We could | |
58 | * scrimp a bit here since not the entire third packet is required, but | |
59 | * it doesn't seem worth it | |
60 | */ | |
61 | #define MAX_BYTES_FOR_DEFINITE_HEADER (3 * MAX_PACKET_SIZE) | |
62 | ||
63 | /* Maximum number of seconds that might reasonably separate two headers. */ | |
64 | #define MAX_REASONABLE_HDR_SEPARATION (3600 * 24 * 7) /* one week */ | |
65 | ||
66 | /* When searching a file for a packet, if we think we're within this many | |
67 | * bytes of the packet we just search linearly. Since linear searches are | |
68 | * probably much faster than random ones (random ones require searching for | |
69 | * the beginning of the packet, which may be unaligned in memory), we make | |
70 | * this value pretty hefty. | |
71 | */ | |
72 | #define STRAIGHT_SCAN_THRESHOLD (100 * MAX_PACKET_SIZE) | |
73 | ||
74 | /* Extracts a long integer from a possibly unaligned buffer containing | |
75 | * unsigned characters. | |
76 | */ | |
77 | #define EXTRACT_LONG(buf) (buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3]) | |
78 | ||
79 | ||
80 | /* Given a header and an acceptable first and last time stamp, returns non-zero | |
81 | * if the header looks reasonable and zero otherwise. | |
82 | */ | |
83 | static int reasonable_header( hdr, first_time, last_time ) | |
84 | struct packet_header *hdr; | |
85 | long first_time, last_time; | |
86 | { | |
87 | if ( last_time == 0 ) | |
88 | last_time = first_time + MAX_REASONABLE_FILE_SPAN; | |
89 | ||
90 | return hdr->ts.tv_sec >= first_time && | |
91 | hdr->ts.tv_sec <= last_time && | |
92 | hdr->len > 0 && | |
93 | hdr->len <= MAX_REASONABLE_PACKET_LENGTH && | |
94 | hdr->caplen > 0 && | |
95 | hdr->caplen <= MAX_REASONABLE_PACKET_LENGTH; | |
96 | } | |
97 | ||
98 | ||
99 | /* Given a buffer, extracts a (properly aligned) packet header from it. */ | |
100 | ||
101 | static void extract_header( buf, hdr ) | |
102 | u_char *buf; | |
103 | struct packet_header *hdr; | |
104 | { | |
105 | hdr->ts.tv_sec = EXTRACT_LONG(buf); | |
106 | buf += sizeof( long ); | |
107 | hdr->ts.tv_usec = EXTRACT_LONG(buf); | |
108 | buf += sizeof( long ); | |
109 | hdr->len = EXTRACT_LONG(buf); | |
110 | buf += sizeof( long ); | |
111 | hdr->caplen = EXTRACT_LONG(buf); | |
112 | ||
113 | if ( sf_swapped ) | |
114 | { | |
115 | hdr->ts.tv_sec = SWAPLONG(hdr->ts.tv_sec); | |
116 | hdr->ts.tv_usec = SWAPLONG(hdr->ts.tv_usec); | |
117 | hdr->len = SWAPLONG(hdr->len); | |
118 | hdr->caplen = SWAPLONG(hdr->caplen); | |
119 | } | |
120 | } | |
121 | ||
122 | ||
123 | /* Search a buffer to locate the first header within it. Return values | |
124 | * are HEADER_NONE, HEADER_CLASH, HEADER_PERHAPS, and HEADER_DEFINITELY. | |
125 | * The first indicates that no evidence of a header was found; the second | |
126 | * that two or more possible headers were found, neither more convincing | |
127 | * than the other(s); the third that exactly one "possible" header was | |
128 | * found; and the fourth that exactly one "definite" header was found. | |
129 | * | |
130 | * Headers are detected by looking for positions in the buffer which have | |
131 | * reasonable timestamps and lengths. If there is enough room in the buffer | |
132 | * for another header to follow a candidate header, a check is made for | |
133 | * that following header. If it is present then the header is *definite* | |
134 | * (unless another "perhaps" or "definite" header is found); if not, then | |
135 | * the header is discarded. If there is not enough room in the buffer for | |
136 | * another header then the candidate is *perhaps* (unless another header | |
137 | * is subsequently found). A "tie" between a "definite" header and a | |
138 | * "perhaps" header is resolved in favor of the definite header. Any | |
139 | * other tie leads to HEADER_CLASH. | |
140 | * | |
141 | * The buffer position of the header is returned in hdrpos_addr and | |
142 | * for convenience the corresponding header in return_hdr. | |
143 | * | |
144 | * first_time is the earliest possible acceptable timestamp in the | |
145 | * header. last_time, if non-zero, is the last such timestamp. If | |
146 | * zero, then up to MAX_REASONABLE_FILE_SPAN seconds after first_time | |
147 | * is acceptable. | |
148 | */ | |
149 | ||
150 | #define HEADER_NONE 0 | |
151 | #define HEADER_CLASH 1 | |
152 | #define HEADER_PERHAPS 2 | |
153 | #define HEADER_DEFINITELY 3 | |
154 | ||
155 | static int find_header( buf, buf_len, first_time, last_time, | |
156 | hdrpos_addr, return_hdr ) | |
157 | u_char *buf; | |
158 | unsigned buf_len; | |
159 | long first_time, last_time; | |
160 | u_char **hdrpos_addr; | |
161 | struct packet_header *return_hdr; | |
162 | { | |
163 | u_char *bufptr, *bufend, *last_pos_to_try; | |
164 | struct packet_header hdr, hdr2; | |
165 | int status = HEADER_NONE; | |
166 | int saw_PERHAPS_clash = 0; | |
167 | ||
168 | /* Initially, try each buffer position to see whether it looks like | |
169 | * a valid packet header. We may later restrict the positions we look | |
170 | * at to avoid seeing a sequence of legitimate headers as conflicting | |
171 | * with one another. | |
172 | */ | |
173 | bufend = buf + buf_len; | |
174 | last_pos_to_try = bufend - PACKET_HDR_LEN; | |
175 | ||
176 | for ( bufptr = buf; bufptr < last_pos_to_try; ++bufptr ) | |
177 | { | |
178 | extract_header( bufptr, &hdr ); | |
179 | ||
180 | if ( reasonable_header( &hdr, first_time, last_time ) ) | |
181 | { | |
182 | u_char *next_header = bufptr + PACKET_HDR_LEN + hdr.caplen; | |
183 | ||
184 | if ( next_header + PACKET_HDR_LEN < bufend ) | |
185 | { /* check for another good header */ | |
186 | extract_header( next_header, &hdr2 ); | |
187 | ||
188 | if ( reasonable_header( &hdr2, hdr.ts.tv_sec, | |
189 | hdr.ts.tv_sec + MAX_REASONABLE_HDR_SEPARATION ) ) | |
190 | { /* a confirmed header */ | |
191 | switch ( status ) | |
192 | { | |
193 | case HEADER_NONE: | |
194 | case HEADER_PERHAPS: | |
195 | status = HEADER_DEFINITELY; | |
196 | *hdrpos_addr = bufptr; | |
197 | *return_hdr = hdr; | |
198 | ||
199 | /* Make sure we don't demote this "definite" | |
200 | * to a "clash" if we stumble across its | |
201 | * successor. | |
202 | */ | |
203 | last_pos_to_try = next_header - PACKET_HDR_LEN; | |
204 | break; | |
205 | ||
206 | case HEADER_DEFINITELY: | |
207 | return HEADER_CLASH; | |
208 | ||
209 | default: | |
210 | error( "bad status in find_header()" ); | |
211 | } | |
212 | } | |
213 | ||
214 | /* ... else the header is bogus - we've verified that it's | |
215 | * not followed by a reasonable header. | |
216 | */ | |
217 | } | |
218 | ||
219 | else | |
220 | { /* can't check for another good header */ | |
221 | switch ( status ) | |
222 | { | |
223 | case HEADER_NONE: | |
224 | status = HEADER_PERHAPS; | |
225 | *hdrpos_addr = bufptr; | |
226 | *return_hdr = hdr; | |
227 | break; | |
228 | ||
229 | case HEADER_PERHAPS: | |
230 | /* We don't immediately turn this into a | |
231 | * clash because perhaps we'll later see a | |
232 | * "definite" which will save us ... | |
233 | */ | |
234 | saw_PERHAPS_clash = 1; | |
235 | break; | |
236 | ||
237 | case HEADER_DEFINITELY: | |
238 | /* Keep the definite in preference to this one. */ | |
239 | break; | |
240 | ||
241 | default: | |
242 | error( "bad status in find_header()" ); | |
243 | } | |
244 | } | |
245 | } | |
246 | } | |
247 | ||
248 | if ( status == HEADER_PERHAPS && saw_PERHAPS_clash ) | |
249 | status = HEADER_CLASH; | |
250 | ||
251 | return status; | |
252 | } | |
253 | ||
254 | ||
255 | /* Positions the sf_readfile stream such that the next sf_read() will | |
256 | * read the final full packet in the file. Returns non-zero if | |
257 | * successful, zero if unsuccessful. If successful, returns the | |
258 | * timestamp of the last packet in last_timestamp. | |
259 | * | |
260 | * Note that this routine is a special case of sf_find_packet(). In | |
261 | * order to use sf_find_packet(), one first must use this routine in | |
262 | * order to give sf_find_packet() an upper bound on the timestamps | |
263 | * present in the dump file. | |
264 | */ | |
265 | int sf_find_end( first_timestamp, last_timestamp ) | |
266 | struct timeval *first_timestamp; | |
267 | struct timeval *last_timestamp; | |
268 | { | |
269 | long first_time = first_timestamp->tv_sec; | |
270 | unsigned num_bytes; | |
271 | u_char *buf, *bufpos, *bufend; | |
272 | u_char *hdrpos; | |
273 | struct packet_header hdr, successor_hdr; | |
274 | int status; | |
275 | ||
276 | /* Allow enough room for at least two full (untruncated) packets, | |
277 | * perhaps followed by a truncated packet, so we have a shot at | |
278 | * finding a "definite" header and following its chain to the | |
279 | * end of the file. | |
280 | */ | |
281 | num_bytes = MAX_BYTES_FOR_DEFINITE_HEADER; | |
282 | if ( fseek( sf_readfile, (long) -num_bytes, 2 ) < 0 ) | |
283 | return 0; | |
284 | ||
285 | buf = (u_char *)malloc((unsigned) num_bytes); | |
286 | if ( ! buf ) | |
287 | return 0; | |
288 | ||
289 | status = 0; | |
290 | bufpos = buf; | |
291 | bufend = buf + num_bytes; | |
292 | ||
293 | if ( fread( (char *) bufpos, num_bytes, 1, sf_readfile ) != 1 ) | |
294 | goto done; | |
295 | ||
296 | if ( find_header( bufpos, num_bytes, first_time, 0L, &hdrpos, &hdr ) != | |
297 | HEADER_DEFINITELY ) | |
298 | goto done; | |
299 | ||
300 | /* Okay, we have a definite header in our hands. Follow its | |
301 | * chain till we find the last valid packet in the file ... | |
302 | */ | |
303 | for ( ; ; ) | |
304 | { | |
305 | /* move to the next header position */ | |
306 | bufpos = hdrpos + PACKET_HDR_LEN + hdr.caplen; | |
307 | ||
308 | /* bufpos now points to a candidate packet, which if valid | |
309 | * should replace the current packet pointed to by hdrpos as | |
310 | * the last valid packet ... | |
311 | */ | |
312 | if ( bufpos >= bufend - PACKET_HDR_LEN ) | |
313 | /* not enough room for another header */ | |
314 | break; | |
315 | ||
316 | extract_header( bufpos, &successor_hdr ); | |
317 | ||
318 | first_time = hdr.ts.tv_sec; | |
319 | if ( ! reasonable_header( &successor_hdr, first_time, 0L ) ) | |
320 | /* this bodes ill - it means bufpos is perhaps a | |
321 | * bogus packet header after all ... | |
322 | */ | |
323 | break; | |
324 | ||
325 | /* Note that the following test is for whether the next | |
326 | * packet starts at a position > bufend, *not* for a | |
327 | * position >= bufend. If this is the last packet in the | |
328 | * file and there isn't a subsequent partial packet, then | |
329 | * we expect the first buffer position beyond this packet | |
330 | * to be just beyond the end of the buffer, i.e., at bufend | |
331 | * itself. | |
332 | */ | |
333 | if ( bufpos + PACKET_HDR_LEN + successor_hdr.caplen > bufend ) | |
334 | /* the packet is truncated */ | |
335 | break; | |
336 | ||
337 | /* Accept this packet as fully legit. */ | |
338 | hdrpos = bufpos; | |
339 | hdr = successor_hdr; | |
340 | } | |
341 | ||
342 | /* Success! Last valid packet is at hdrpos. */ | |
343 | *last_timestamp = hdr.ts; | |
344 | status = 1; | |
345 | ||
346 | /* Seek so that the next read will start at last valid packet. */ | |
347 | if ( fseek( sf_readfile, (long) -(bufend - hdrpos), 2 ) < 0 ) | |
348 | error( "final fseek() failed in sf_find_end()" ); | |
349 | ||
350 | done: | |
351 | free( (char *) buf ); | |
352 | ||
353 | return status; | |
354 | } | |
355 | ||
356 | ||
357 | /* Takes two timeval's and returns the difference, tv2 - tv1, as a double. */ | |
358 | ||
359 | static double timeval_diff( tv1, tv2 ) | |
360 | struct timeval *tv1, *tv2; | |
361 | { | |
362 | double result = (tv2->tv_sec - tv1->tv_sec); | |
363 | result += (tv2->tv_usec - tv1->tv_usec) / 1000000.0; | |
364 | ||
365 | return result; | |
366 | } | |
367 | ||
368 | ||
369 | /* Returns true if timestamp t1 is chronologically less than timestamp t2. */ | |
370 | ||
371 | int sf_timestamp_less_than( t1, t2 ) | |
372 | struct timeval *t1, *t2; | |
373 | { | |
374 | return t1->tv_sec < t2->tv_sec || | |
375 | (t1->tv_sec == t2->tv_sec && | |
376 | t1->tv_usec < t2->tv_usec); | |
377 | } | |
378 | ||
379 | ||
380 | /* Given two timestamps on either side of desired_time and their positions, | |
381 | * returns the interpolated position of the desired_time packet. Returns a | |
382 | * negative value if the desired_time is outside the given range. | |
383 | */ | |
384 | ||
385 | static | |
386 | long interpolated_position( min_time, min_pos, max_time, max_pos, desired_time ) | |
387 | struct timeval *min_time, *max_time, *desired_time; | |
388 | long min_pos, max_pos; | |
389 | { | |
390 | double full_span = timeval_diff( max_time, min_time ); | |
391 | double desired_span = timeval_diff( desired_time, min_time ); | |
392 | long full_span_pos = max_pos - min_pos; | |
393 | double fractional_offset = desired_span / full_span; | |
394 | ||
395 | if ( fractional_offset < 0.0 || fractional_offset > 1.0 ) | |
396 | return -1; | |
397 | ||
398 | return min_pos + (long) (fractional_offset * (double) full_span_pos); | |
399 | } | |
400 | ||
401 | ||
402 | /* Reads packets linearly until one with a time >= the given desired time | |
403 | * is found; positions the dump file so that the next read will start | |
404 | * at the given packet. Returns non-zero on success, 0 if an EOF was | |
405 | * first encountered. | |
406 | */ | |
407 | ||
408 | static int read_up_to( desired_time ) | |
409 | struct timeval *desired_time; | |
410 | { | |
411 | int status = 1; | |
412 | struct packet_header hdr; | |
413 | u_char *buf; | |
414 | long pos; | |
415 | ||
416 | buf = (u_char *) malloc( (unsigned) snaplen ); | |
417 | ||
418 | for ( ; ; ) | |
419 | { | |
420 | struct timeval *timestamp; | |
421 | ||
422 | pos = ftell( sf_readfile ); | |
423 | status = sf_next_packet( &hdr, buf, snaplen ); | |
424 | ||
425 | if ( status ) | |
426 | { | |
427 | if ( status == SFERR_EOF ) | |
428 | { | |
429 | status = 0; | |
430 | break; | |
431 | } | |
432 | ||
433 | error( "bad status %d in read_up_to()", status ); | |
434 | } | |
435 | ||
436 | timestamp = &hdr.ts; | |
437 | ||
438 | if ( ! sf_timestamp_less_than( timestamp, desired_time ) ) | |
439 | break; | |
440 | } | |
441 | ||
442 | if ( fseek( sf_readfile, pos, 0 ) < 0 ) | |
443 | error( "fseek() failed in read_up_to()" ); | |
444 | ||
445 | free( (char *) buf ); | |
446 | ||
447 | return status; | |
448 | } | |
449 | ||
450 | ||
451 | /* Positions the sf_readfile stream so that the next sf_read() will | |
452 | * return the first packet with a time greater than or equal to | |
453 | * desired_time. desired_time must be greater than min_time and less | |
454 | * than max_time, which should correspond to actual packets in the | |
455 | * file. min_pos is the file position (byte offset) corresponding to | |
456 | * the min_time packet and max_pos is the same for the max_time packet. | |
457 | * | |
458 | * Returns non-zero on success, 0 if the given position is beyond max_pos. | |
459 | * | |
460 | * NOTE: when calling this routine, the sf_readfile stream *must* be | |
461 | * already aligned so that the next call to sf_next_packet() will yield | |
462 | * a valid packet. | |
463 | */ | |
464 | ||
465 | int sf_find_packet( min_time, min_pos, max_time, max_pos, desired_time ) | |
466 | struct timeval *min_time, *max_time; | |
467 | long min_pos, max_pos; | |
468 | struct timeval *desired_time; | |
469 | { | |
470 | int status = 1; | |
471 | struct timeval min_time_copy, max_time_copy; | |
472 | unsigned num_bytes = MAX_BYTES_FOR_DEFINITE_HEADER; | |
473 | int num_bytes_read; | |
474 | long desired_pos, present_pos; | |
475 | u_char *buf, *hdrpos; | |
476 | struct packet_header hdr; | |
477 | ||
478 | buf = (u_char *) malloc( num_bytes ); | |
479 | if ( ! buf ) | |
480 | error( "malloc() failured in sf_find_packet()" ); | |
481 | ||
482 | min_time_copy = *min_time; | |
483 | min_time = &min_time_copy; | |
484 | ||
485 | max_time_copy = *max_time; | |
486 | max_time = &max_time_copy; | |
487 | ||
488 | for ( ; ; ) /* loop until positioned correctly */ | |
489 | { | |
490 | desired_pos = | |
491 | interpolated_position( min_time, min_pos, | |
492 | max_time, max_pos, | |
493 | desired_time ); | |
494 | ||
495 | if ( desired_pos < 0 ) | |
496 | { | |
497 | status = 0; | |
498 | break; | |
499 | } | |
500 | ||
501 | present_pos = ftell( sf_readfile ); | |
502 | ||
503 | if ( present_pos <= desired_pos && | |
504 | desired_pos - present_pos < STRAIGHT_SCAN_THRESHOLD ) | |
505 | { /* we're close enough to just blindly read ahead */ | |
506 | status = read_up_to( desired_time ); | |
507 | break; | |
508 | } | |
509 | ||
510 | /* Undershoot the target a little bit - it's much easier to | |
511 | * then scan straight forward than to try to read backwards ... | |
512 | */ | |
513 | desired_pos -= STRAIGHT_SCAN_THRESHOLD / 2; | |
514 | if ( desired_pos < min_pos ) | |
515 | desired_pos = min_pos; | |
516 | ||
517 | if ( fseek( sf_readfile, desired_pos, 0 ) < 0 ) | |
518 | error( "fseek() failed in sf_find_packet()" ); | |
519 | ||
520 | num_bytes_read = | |
521 | fread( (char *) buf, 1, num_bytes, sf_readfile ); | |
522 | ||
523 | if ( num_bytes_read == 0 ) | |
524 | /* This shouldn't ever happen because we try to | |
525 | * undershoot, unless the dump file has only a | |
526 | * couple packets in it ... | |
527 | */ | |
528 | error( "fread() failed in sf_find_packet()" ); | |
529 | ||
530 | if ( find_header( buf, num_bytes, min_time->tv_sec, | |
531 | max_time->tv_sec, &hdrpos, &hdr ) != | |
532 | HEADER_DEFINITELY ) | |
533 | error( "can't find header at position %ld in dump file", | |
534 | desired_pos ); | |
535 | ||
536 | /* Correct desired_pos to reflect beginning of packet. */ | |
537 | desired_pos += (hdrpos - buf); | |
538 | ||
539 | /* Seek to the beginning of the header. */ | |
540 | if ( fseek( sf_readfile, desired_pos, 0 ) < 0 ) | |
541 | error( "fseek() failed in sf_find_packet()" ); | |
542 | ||
543 | if ( sf_timestamp_less_than( &hdr.ts, desired_time ) ) | |
544 | { /* too early in the file */ | |
545 | *min_time = hdr.ts; | |
546 | min_pos = desired_pos; | |
547 | } | |
548 | ||
549 | else if ( sf_timestamp_less_than( desired_time, &hdr.ts ) ) | |
550 | { /* too late in the file */ | |
551 | *max_time = hdr.ts; | |
552 | max_pos = desired_pos; | |
553 | } | |
554 | ||
555 | else | |
556 | /* got it! */ | |
557 | break; | |
558 | } | |
559 | ||
560 | free( (char *) buf ); | |
561 | ||
562 | return status; | |
563 | } |