1
use crate::{single, utf8_seq};
2
use lazy_static::lazy_static;
3
use nom::branch::alt;
4
use nom::bytes::complete::{take_while, take_while1, take_while_m_n};
5
use nom::bytes::streaming::tag_no_case;
6
use nom::character::streaming::{char, crlf};
7
use nom::combinator::{cut, recognize};
8
use nom::error::{ErrorKind, FromExternalError, ParseError};
9
use nom::multi::{fold_many0, many0, separated_list1};
10
use nom::sequence::separated_pair;
11
use nom::{AsChar, IResult, Parser};
12
use nom_language::error::{VerboseError, VerboseErrorKind};
13
use std::str::FromStr;
14
use std::sync::Mutex;
15

            
16
mod component;
17
mod first_pass;
18
mod language_tag;
19
mod object;
20
mod param;
21
mod property;
22

            
23
/// Types produced by the parser.
24
///
25
/// These types represent the structure of the iCalendar format.
26
pub mod types;
27

            
28
use crate::parser::types::{ContentLine, ParamValue};
29
pub use first_pass::content_line_first_pass;
30
pub use object::{ical_object, ical_stream};
31
pub use param::value::*;
32
pub use param::{property_param, property_params};
33
pub use property::component::*;
34
pub use property::recur::prop_value_recur;
35
pub use property::uri::param_value_uri;
36
pub use property::value::*;
37

            
38
#[derive(Clone, Debug, PartialEq)]
39
pub struct Error<'a> {
40
    pub input: &'a [u8],
41
    pub error: InnerError,
42
}
43

            
44
#[derive(Clone, Debug, PartialEq)]
45
pub enum InnerError {
46
    Nom(ErrorKind),
47
    XNameTooShort,
48
    EncodingError(String, std::str::Utf8Error),
49
    InvalidDateNum,
50
    InvalidTimeNum,
51
    InvalidDurationNum,
52
    InvalidFloatNum,
53
    InvalidIntegerNum,
54
    InvalidRecurNum,
55
    InvalidRecurPart(String),
56
    InvalidOctet,
57
    InvalidIpv6,
58
    InvalidPort,
59
    MismatchedComponentEnd(Vec<u8>, Vec<u8>),
60
    UnknownParamName(String),
61
    InvalidValueParam,
62
    InvalidBinaryValueSpec,
63
}
64

            
65
impl<'a> Error<'a> {
66
    pub fn new(input: &'a [u8], error: InnerError) -> Error<'a> {
67
        Error { input, error }
68
    }
69
}
70

            
71
impl<'a> ParseError<&'a [u8]> for Error<'a> {
72
123398
    fn from_error_kind(input: &'a [u8], kind: ErrorKind) -> Self {
73
123398
        Error {
74
123398
            input,
75
123398
            error: InnerError::Nom(kind),
76
123398
        }
77
123398
    }
78

            
79
12244
    fn append(input: &'a [u8], kind: ErrorKind, _other: Self) -> Self {
80
12244
        Error {
81
12244
            input,
82
12244
            error: InnerError::Nom(kind),
83
12244
        }
84
12244
    }
85
}
86

            
87
// Enables use of `map_res` with nom::Err for the custom Error type.
88
impl<'a> FromExternalError<&'a [u8], nom::Err<Error<'a>>> for Error<'a> {
89
    fn from_external_error(input: &'a [u8], kind: ErrorKind, e: nom::Err<Error<'a>>) -> Self {
90
        match e {
91
            nom::Err::Error(e) | nom::Err::Failure(e) => Error {
92
                input: e.input,
93
                error: e.error,
94
            },
95
            nom::Err::Incomplete(_) => Error {
96
                input,
97
                error: InnerError::Nom(kind),
98
            },
99
        }
100
    }
101
}
102

            
103
impl<'a> From<(&'a [u8], ErrorKind)> for Error<'a> {
104
    fn from((input, kind): (&'a [u8], ErrorKind)) -> Self {
105
        Error {
106
            input,
107
            error: InnerError::Nom(kind),
108
        }
109
    }
110
}
111

            
112
lazy_static! {
113
    static ref ERROR_HOLD: Mutex<Vec<(usize, usize)>> = Mutex::new(Vec::new());
114
}
115

            
116
#[cfg(test)]
117
pub(crate) unsafe fn clear_errors() {
118
    for (ptr, len) in ERROR_HOLD.lock().unwrap().drain(..) {
119
        unsafe { String::from_raw_parts(ptr as *mut u8, len, len) };
120
    }
121
}
122

            
123
impl<'a> From<Error<'a>> for VerboseError<&'a [u8]> {
124
    fn from(value: Error<'a>) -> Self {
125
        let ctx = Box::leak(format!("{:?}", value.error).to_string().into_boxed_str());
126

            
127
        ERROR_HOLD
128
            .lock()
129
            .unwrap()
130
            .push((ctx.as_ptr() as usize, ctx.len()));
131

            
132
        VerboseError {
133
            errors: vec![(value.input, VerboseErrorKind::Context(ctx))],
134
        }
135
    }
136
}
137

            
138
/// All ASCII control characters except tab (%x09).
139
#[inline]
140
3200
const fn is_control(b: u8) -> bool {
141
3200
    matches!(b, b'\0'..=b'\x08' | b'\x0A'..=b'\x1F' | b'\x7F')
142
3200
}
143

            
144
674
fn param_text<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E>
145
674
where
146
674
    E: ParseError<&'a [u8]> + From<Error<'a>>,
147
674
{
148
3628
    take_while(|c| c != b'\"' && c != b';' && c != b':' && c != b',' && !is_control(c))(input)
149
674
}
150

            
151
634
fn quoted_string<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E>
152
634
where
153
634
    E: ParseError<&'a [u8]> + From<Error<'a>>,
154
634
{
155
634
    let (input, (_, content, _)) = (char('"'), cut(safe_char), char('"')).parse(input)?;
156

            
157
12
    Ok((input, content))
158
634
}
159

            
160
634
fn param_value<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E>
161
634
where
162
634
    E: ParseError<&'a [u8]> + From<Error<'a>>,
163
634
{
164
634
    let (input, value) = alt((quoted_string, param_text)).parse(input)?;
165

            
166
634
    Ok((input, value))
167
634
}
168

            
169
12
fn safe_char<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E>
170
12
where
171
12
    E: ParseError<&'a [u8]> + From<Error<'a>>,
172
12
{
173
258
    take_while(|c| c != b'\"' && !is_control(c))(input)
174
12
}
175

            
176
2710
fn iana_token<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E>
177
2710
where
178
2710
    E: ParseError<&'a [u8]> + From<Error<'a>>,
179
2710
{
180
34340
    take_while1(|c: u8| c.is_alphanum() || c == b'-')(input)
181
2710
}
182

            
183
966
fn x_name<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E>
184
966
where
185
966
    E: ParseError<&'a [u8]> + From<Error<'a>>,
186
966
{
187
966
    let (input, x_name) = recognize((
188
966
        tag_no_case("X-"),
189
1672
        cut(take_while1(|c: u8| c.is_alphanum() || c == b'-')),
190
966
    ))
191
966
    .parse(input)?;
192

            
193
228
    Ok((input, x_name))
194
966
}
195

            
196
100
fn name<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E>
197
100
where
198
100
    E: ParseError<&'a [u8]> + From<Error<'a>>,
199
100
{
200
100
    alt((iana_token, x_name)).parse(input)
201
100
}
202

            
203
608
fn param_name<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E>
204
608
where
205
608
    E: ParseError<&'a [u8]> + From<Error<'a>>,
206
608
{
207
608
    alt((iana_token, x_name)).parse(input)
208
608
}
209

            
210
#[inline]
211
224
const fn is_reg_name_char(b: u8) -> bool {
212
224
    matches!(b, b'\x41'..=b'\x5A' | b'\x61'..=b'\x7A' | b'\x30'..=b'\x39' | b'\x21' | b'\x23' | b'\x24' | b'\x26' | b'\x2E' | b'\x2B' | b'\x2D' | b'\x5E' | b'\x5F')
213
224
}
214

            
215
// See https://www.rfc-editor.org/rfc/rfc4288 section 4.2
216
36
fn reg_name<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E>
217
36
where
218
36
    E: ParseError<&'a [u8]> + From<Error<'a>>,
219
36
{
220
36
    take_while_m_n(1, 127, is_reg_name_char)(input)
221
36
}
222

            
223
238
fn read_string<'a, E>(input: &'a [u8], context: &str) -> Result<String, nom::Err<E>>
224
238
where
225
238
    E: ParseError<&'a [u8]>,
226
238
    E: From<Error<'a>>,
227
238
{
228
238
    Ok(std::str::from_utf8(input)
229
238
        .map_err(|e| {
230
            nom::Err::Failure(
231
                Error::new(input, InnerError::EncodingError(context.to_string(), e)).into(),
232
            )
233
238
        })?
234
238
        .to_string())
235
238
}
236

            
237
150
fn read_int<'a, E, N>(input: &'a [u8]) -> Result<N, nom::Err<E>>
238
150
where
239
150
    E: ParseError<&'a [u8]>,
240
150
    E: From<Error<'a>>,
241
150
    N: FromStr,
242
150
{
243
150
    std::str::from_utf8(input)
244
150
        .map_err(|e| {
245
            nom::Err::Error(
246
                Error::new(
247
                    input,
248
                    InnerError::EncodingError("Invalid integer number text".to_string(), e),
249
                )
250
                .into(),
251
            )
252
150
        })?
253
150
        .parse()
254
150
        .map_err(|_| nom::Err::Error(Error::new(input, InnerError::InvalidIntegerNum).into()))
255
150
}
256

            
257
100
fn line_value<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], Vec<u8>, E>
258
100
where
259
100
    E: ParseError<&'a [u8]> + From<Error<'a>>,
260
100
{
261
822
    let (input, v) = fold_many0(value_char, Vec::new, |mut acc, item| {
262
822
        acc.extend_from_slice(&item);
263
822
        acc
264
822
    })
265
100
    .parse(input)?;
266

            
267
100
    Ok((input, v))
268
100
}
269

            
270
2520
fn value_char<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], Vec<u8>, E>
271
2520
where
272
2520
    E: ParseError<&'a [u8]> + From<Error<'a>>,
273
2520
{
274
2520
    alt((
275
2520
        single(|b| matches!(b, b' ' | b'\t' | b'\x21'..=b'\x7E')).map(|c| vec![c]),
276
2520
        utf8_seq.map(|c| c.to_vec()),
277
2520
    ))
278
2520
    .parse(input)
279
2520
}
280

            
281
164
fn value<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], Vec<u8>, E>
282
164
where
283
164
    E: ParseError<&'a [u8]> + From<Error<'a>>,
284
164
{
285
1434
    fold_many0(value_char, Vec::new, |mut acc, item| {
286
1434
        acc.extend_from_slice(&item);
287
1434
        acc
288
1434
    })
289
164
    .parse(input)
290
164
}
291

            
292
16
fn param<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], ParamValue<'a>, E>
293
16
where
294
16
    E: ParseError<&'a [u8]> + From<Error<'a>>,
295
16
{
296
16
    let (input, (name, values)) = separated_pair(
297
16
        param_name,
298
16
        char('='),
299
16
        cut(separated_list1(char(','), param_value)),
300
16
    )
301
16
    .parse(input)?;
302

            
303
    Ok((
304
16
        input,
305
16
        if values.len() == 1 {
306
16
            ParamValue::Other {
307
16
                name,
308
16
                value: values[0],
309
16
            }
310
        } else {
311
            ParamValue::Others { name, values }
312
        },
313
    ))
314
16
}
315

            
316
100
fn content_line<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], ContentLine<'a>, E>
317
100
where
318
100
    E: ParseError<&'a [u8]> + From<Error<'a>>,
319
100
{
320
100
    let (input, (property_name, params, _, value, _)) = (
321
100
        name,
322
104
        many0((char(';'), cut(param))).map(|v| v.into_iter().map(|(_, p)| p).collect()),
323
100
        char(':'),
324
100
        cut(line_value),
325
100
        crlf,
326
100
    )
327
100
        .parse(input)?;
328

            
329
100
    Ok((
330
100
        input,
331
100
        ContentLine {
332
100
            property_name,
333
100
            params,
334
100
            value,
335
100
        },
336
100
    ))
337
100
}
338

            
339
#[cfg(test)]
340
mod tests {
341
    use super::*;
342
    use crate::test_utils::check_rem;
343

            
344
    #[test]
345
2
    fn iana_token_desc() {
346
2
        let (rem, token) = iana_token::<Error>(b"DESCRIPTION").unwrap();
347
2
        check_rem(rem, 0);
348
2
        assert_eq!(b"DESCRIPTION", token);
349
2
    }
350

            
351
    #[test]
352
2
    fn simple_x_name() {
353
2
        let (rem, x_name) = x_name::<Error>(b"X-TEST ").unwrap();
354
2
        check_rem(rem, 1);
355
2
        assert_eq!(b"X-TEST", x_name);
356
2
    }
357

            
358
    #[test]
359
2
    fn simple_x_name_with_vendor() {
360
2
        let (rem, x_name) = x_name::<Error>(b"X-ESL-TEST ").unwrap();
361
2
        check_rem(rem, 1);
362
2
        assert_eq!(b"X-ESL-TEST", x_name);
363
2
    }
364

            
365
    #[test]
366
2
    fn simple_content_line() {
367
2
        let (rem, content_line) = content_line::<Error>(
368
2
            b"DESCRIPTION:This is a long description that exists on a long line.\r\nnext",
369
2
        )
370
2
        .unwrap();
371
2
        check_rem(rem, 4);
372
2
        assert_eq!(b"DESCRIPTION", content_line.property_name);
373
2
        assert_eq!(
374
2
            b"This is a long description that exists on a long line.",
375
2
            content_line.value.as_slice()
376
2
        );
377
2
    }
378

            
379
    #[test]
380
2
    fn simple_content_line_utf8() {
381
2
        let (rem, content_line) = content_line::<Error>(
382
2
            "DESCRIPTION:This is a long description of a happy face - 😁.\r\n;".as_bytes(),
383
2
        )
384
2
        .unwrap();
385
2
        check_rem(rem, 1);
386
2
        assert_eq!(b"DESCRIPTION", content_line.property_name);
387
2
        assert_eq!(
388
2
            "This is a long description of a happy face - 😁.".as_bytes(),
389
2
            content_line.value.as_slice()
390
2
        );
391
2
    }
392
}