// Copyright 2017, CZ.NIC z.s.p.o. (http://www.nic.cz/)
//
// This file is part of the pakon system.
//
// Pakon is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
//  (at your option) any later version.
//
// Pakon is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Pakon.  If not, see <http://www.gnu.org/licenses/>.

//! Data structures describing a query to the data contained in the keeper.
//!
//! The module contains definition of queries and ability to deserialize them from JSON. Note that
//! more „columns“ may appear in future, as pakon-guts supports more information.

use libdata::column::{Ident, Headers};
use libdata::stats::Stats;
use libdata::time::{Time, Timeline};

/// The whole query.
///
/// This represents a query for some data. Note that any option can be omitted in the JSON
/// representation and a sensible default will be used.
///
/// If a flow fits into the time interval only partially, only the relevant part (with some
/// rounding to whole time slices) is included.
#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct Query {
    /// Include only data from this time on.
    ///
    /// If not present, data from the beginning of the time itself (or at least from the oldest
    /// data present) is included.
    ///
    /// It is represented as number of milliseconds since 1.1.1970. Note that the resolution of
    /// stored data is usually not this good, but the milliseconds are used for consistency with
    /// the rest of the system.
    #[serde(default)]
    pub start: Option<Time>,
    /// Include only data up to this time.
    ///
    /// If not present, data up to now are used.
    ///
    /// It is represented as number of milliseconds since 1.1.1970. Note that the resolution of
    /// stored data is usually not this good, but the milliseconds are used for consistency with
    /// the rest of the system.
    #[serde(default)]
    pub end: Option<Time>,
    /// Filter data according to the given criteria.
    ///
    /// Each supplied filter must be satisfied. Note that the filters allow specifying multiple
    /// alternative values each.
    ///
    /// If not present, no filters apply and all the data (in the given time interval) are
    /// included.
    #[serde(default)]
    pub filter: Headers,
    /// Aggregate according to these „columns“.
    ///
    /// The columns that have the same values form the results. The other columns are summed up
    /// together. Note that some reasonable way of summing is used for each column (eg. the set of
    /// names are unified, while amount of sent data is added up).
    ///
    /// If no aggregation is present, every flow would form a separate result. Note however, that
    /// the system doesn't keep such detailed information for the whole history and may refuse to
    /// answer if too historical data is requested without any aggregation.
    #[serde(default)]
    pub aggregate: Vec<Ident>,
    /// In addition to „aggregate“, include these columns in each result.
    #[serde(default)]
    pub columns: Vec<Ident>,
    /// Produce further details about the results.
    ///
    /// If this is false (the default), then each result is simply the sum over the whole time. If
    /// set to true, the time is split into non-overlapping intervals and each result is sequence
    /// of these intervals. Some of the columns are at the „head“ of the result and are the same
    /// over the whole time (eg. the requested domain name), while others are present in each
    /// interval (amount of transferred data or average speed). The length of the intervals depend
    /// on how far into the past the request goes (records close to now are finer grained).
    #[serde(default)]
    pub details: bool,
}

/// Information about a group of flows.
///
/// The bucket contains a group of flows of certain criteria. The structure describes these flows.
#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize)]
pub struct ResponseBucket {
    /// Information about the flows.
    ///
    /// The headers contain the discrete information, like IP addresses or ports. If there are
    /// multiple flows, it is possible to have multiple values of a single criterion. In that case,
    /// all are returned inside a set.
    pub headers: Headers,
    /// The statistics about the flows.
    ///
    /// The „accumulative“ statistics of the flows ‒ their sizes, speeds, etc.
    ///
    /// Each value in this vector corresponds to one interval in the timeline inside
    /// [`Response`](struct.Response.html).
    pub stats: Vec<Stats>,
}

impl ResponseBucket {
    /// Flattens all the statistics into a single one.
    ///
    /// Combines all the statistics in `stats` sequentially to form a single all-encompassing
    /// statistic.
    pub fn flatten_stats(&mut self) {
        let mut result = Stats::default();
        for stat in &self.stats {
            result.combine_seq(stat);
        }
        self.stats = vec![result];
    }
}

/// The whole query response.
#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize)]
pub struct Response {
    /// The timeline.
    ///
    /// The timeline describes into what intervals the continuum is split. This corresponds to the
    /// `stats` inside the buckets.
    ///
    /// If the timeline is just a single interval encompassing the whole continuum, it is omitted.
    #[serde(skip_serializing_if = "Timeline::is_unsplit")]
    pub timeline: Timeline,
    /// The buckets holding the flows.
    ///
    /// Each bucket maps flows of some criteria across the whole time of the query.
    pub buckets: Vec<ResponseBucket>,
}

impl Response {
    /// Checks internal invariants.
    ///
    /// # Panics
    ///
    /// If some invariant is broken, this function panics. This is a helper function for unit
    /// tests.
    pub fn assert_sane(&self) {
        for buck in &self.buckets {
            assert_eq!(self.timeline.interval_count(),
                       buck.stats.len(),
                       "The bucket {:?} doesn't match the timeline {:?}",
                       buck,
                       self.timeline);
        }
    }
}

#[cfg(test)]
mod tests {
    use std::net::IpAddr;
    use std::time::{Duration, UNIX_EPOCH};

    use eui48::MacAddress;
    use serde_json;

    use super::*;
    use libdata::column::{self, Local, Remote};
    use libdata::flow::{self, Bytes, Count, Direction, Speed};
    use libdata::stats::Stat;
    use test_help;

    /// Makes sure all types needed in the tests are registered.
    fn register_types() {
        column::register::<Direction>();
        column::register_endpoint::<IpAddr>();
        column::register_endpoint::<MacAddress>();
    }

    /// Tests deserialization of the query.
    #[test]
    fn query_deser() {
        register_types();
        // Note that all the values in Query are optional. Such a query should probably be refused
        // due to being to general and dumping all the data, but that's for some further layers.
        test_help::deser("{}", &Query {
            start: None,
            end: None,
            filter: Headers::new(),
            aggregate: vec![],
            columns: vec![],
            details: false,
        });
        let mut filter = Headers::new();
        filter.insert(Direction::In);
        test_help::deser(
            r#"{
                "start": 10,
                "end": -20,
                "filter": {
                    "direction": ["IN"]
                },
                "aggregate": [
                    "remote-mac"
                ],
                "columns": [
                    "local-ip"
                ],
                "details": true
            }"#,
            &Query {
                start: Some(Time::Absolute(flow::system_time_from_ms(10))),
                end: Some(Time::Relative(Duration::from_millis(20))),
                filter,
                aggregate: vec![Ident::of::<Remote<MacAddress>>()],
                columns: vec![Ident::of::<Local<IpAddr>>()],
                details: true,
            });
        // A typo is detected
        serde_json::from_str::<Query>(r#"{"detals": true}"#).unwrap_err();
    }

    /// Tests serialization of the response.
    ///
    /// Note that we check it against the `Value` type, not a string. This allows us to ignore
    /// certain unimportant parts of the JSON (whitespace, order inside maps) and we can use a more
    /// readable representation here.
    #[test]
    fn response_ser() {
        register_types();
        // A response with nothing in it.
        let empty_response = Response::default();
        let empty_expected = json!({
            "buckets": []
        });
        assert_eq!(empty_expected, serde_json::to_value(&empty_response).unwrap());

        // A response with a non-empty timeline
        let borders = [3, 7, 92]
            .iter()
            .map(|ms| flow::system_time_from_ms(*ms))
            .collect();
        let with_timeline_response = Response {
            buckets: Vec::new(),
            timeline: Timeline::from_borders(borders),
        };
        let with_timeline_expected = json!({
            "buckets": [],
            "timeline": [
                { "end": 3 },
                { "start": 3, "end": 7 },
                { "start": 7, "end": 92 },
                { "start": 92 }
            ]
        });
        assert_eq!(with_timeline_expected, serde_json::to_value(&with_timeline_response).unwrap());

        // Some buckets are present
        let bucket_empty = ResponseBucket::default();
        let mut headers = Headers::new();
        headers.insert(Local("192.0.2.1".parse::<IpAddr>().unwrap()));
        let bucket_full = ResponseBucket {
            headers: headers,
            stats: vec![
                Stats::default(),
                Stats {
                    dir_in: Stat {
                        packets: Count(1),
                        size: Bytes(120),
                        max_speed: Speed(20),
                        speed_duration: Duration::from_secs(60),
                        flows: Count(4),
                        flows_started: Count(2),
                        start: None,
                        end: None,
                    },
                    dir_out: Stat {
                        packets: Count(2),
                        size: Bytes(240),
                        max_speed: Speed(40),
                        speed_duration: Duration::from_secs(60),
                        flows: Count(4),
                        flows_started: Count(2),
                        start: Some(UNIX_EPOCH),
                        end: Some(UNIX_EPOCH + Duration::from_secs(60)),
                    }
                }
            ]
        };
        let bucket_response = Response {
            buckets: vec![bucket_empty, bucket_full],
            timeline: Timeline::from_borders(vec![flow::system_time_from_ms(2)]),
        };
        let bucket_expected = json!({
            "buckets": [
                {
                    "headers": {},
                    "stats": []
                },
                {
                    "headers": {
                        "local-ip": ["192.0.2.1"]
                    },
                    "stats": [
                        {},
                        {
                            "in": {
                                "packets": 1,
                                "size": 120,
                                "avg-speed": 2,
                                "max-speed": 20,
                                "flows": 4,
                                "start": null,
                                "end": null,
                            },
                            "out": {
                                "packets": 2,
                                "size": 240,
                                "avg-speed": 4,
                                "max-speed": 40,
                                "flows": 4,
                                "start": 0,
                                "end": 60000,
                            }
                        }
                    ]
                }
            ],
            "timeline": [
                { "end": 2 },
                { "start": 2 }
            ]
        });
        assert_eq!(bucket_expected, serde_json::to_value(&bucket_response).unwrap());
    }

    /// Tests flattening of response bucket's statistics
    #[test]
    fn bucket_flatten() {
        let s1 = Stats {
            dir_in: Stat {
                packets: Count(2),
                size: Bytes(1300),
                max_speed: Speed(1300),
                speed_duration: Duration::from_secs(10),
                flows: Count(2),
                flows_started: Count(1),
                start: Some(UNIX_EPOCH),
                end: Some(UNIX_EPOCH + Duration::from_secs(10)),
            },
            dir_out: Stat {
                packets: Count(1),
                size: Bytes(100),
                max_speed: Speed(50),
                speed_duration: Duration::from_secs(6),
                flows: Count(4),
                flows_started: Count(0),
                start: Some(UNIX_EPOCH + Duration::from_secs(4)),
                end: Some(UNIX_EPOCH + Duration::from_secs(10)),
            },
        };
        let s2 = Stats {
            dir_in: Stat {
                packets: Count(3),
                size: Bytes(1500),
                max_speed: Speed(500),
                speed_duration: Duration::from_secs(5),
                flows: Count(3),
                flows_started: Count(2),
                start: Some(UNIX_EPOCH + Duration::from_secs(5)),
                end: Some(UNIX_EPOCH + Duration::from_secs(10)),
            },
            dir_out: Stat {
                packets: Count(2),
                size: Bytes(400),
                max_speed: Speed(150),
                speed_duration: Duration::from_secs(6),
                flows: Count(4),
                flows_started: Count(2),
                start: Some(UNIX_EPOCH + Duration::from_secs(5)),
                end: Some(UNIX_EPOCH + Duration::from_secs(11)),
            },
        };
        let exp_seq = Stats {
            dir_in: Stat {
                packets: Count(5),
                size: Bytes(2800),
                max_speed: Speed(1300),
                speed_duration: Duration::from_secs(15),
                flows: Count(4),
                flows_started: Count(3),
                start: Some(UNIX_EPOCH),
                end: Some(UNIX_EPOCH + Duration::from_secs(10)),
            },
            dir_out: Stat {
                packets: Count(3),
                size: Bytes(500),
                max_speed: Speed(150),
                speed_duration: Duration::from_secs(12),
                flows: Count(6),
                flows_started: Count(2),
                start: Some(UNIX_EPOCH + Duration::from_secs(4)),
                end: Some(UNIX_EPOCH + Duration::from_secs(11)),
            },
        };
        let mut bucket = ResponseBucket {
            headers: Headers::new(),
            stats: vec![
                Stats::default(),
                s1.clone(),
                s2.clone(),
                Stats::default(),
            ],
        };
        bucket.flatten_stats();
        let expected = ResponseBucket {
            headers: Headers::new(),
            stats: vec![exp_seq.clone()],
        };
        assert_eq!(expected, bucket);
    }
}