1
use crate::parser::types::{Authority, Host, IpAddr, Uri};
2
use crate::parser::{Error, InnerError};
3
use crate::single;
4
use nom::branch::alt;
5
use nom::bytes::streaming::{tag, take_while, take_while1, take_while_m_n};
6
use nom::character::streaming::char;
7
use nom::character::{is_alphabetic, is_digit};
8
use nom::combinator::{map_res, opt, recognize, verify};
9
use nom::error::ParseError;
10
use nom::multi::{fold_many0, fold_many1, many0, many1, separated_list0};
11
use nom::sequence::tuple;
12
use nom::{IResult, Parser};
13
use std::fmt::{Display, Formatter, Write};
14
use std::net::{Ipv4Addr, Ipv6Addr};
15

            
16
// TODO can be a property or a param value, rename
17
542
pub fn param_value_uri<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], Uri<'a>, E>
18
542
where
19
542
    E: ParseError<&'a [u8]>
20
542
        + nom::error::FromExternalError<&'a [u8], nom::Err<E>>
21
542
        + From<Error<'a>>,
22
542
{
23
542
    let (input, (scheme, _, (authority, path), query, fragment)) = tuple((
24
542
        scheme,
25
542
        char(':'),
26
542
        alt((
27
542
            tuple((tag("//"), authority, opt(path_absolute_empty))).map(|(_, a, b)| (Some(a), b)),
28
542
            path_absolute.map(|p| (None, Some(p))),
29
542
            path_rootless.map(|p| (None, Some(p))),
30
542
        )),
31
542
        opt(tuple((char('?'), query_or_fragment)).map(|(_, v)| v)),
32
542
        opt(tuple((char('#'), query_or_fragment)).map(|(_, v)| v)),
33
542
    ))(input)?;
34

            
35
538
    Ok((
36
538
        input,
37
538
        Uri {
38
538
            scheme,
39
538
            authority,
40
538
            path: path.unwrap_or_default().to_vec(),
41
538
            query,
42
538
            fragment,
43
538
        },
44
538
    ))
45
542
}
46

            
47
#[inline]
48
3026
const fn is_scheme_char(b: u8) -> bool {
49
3026
    matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'-' | b'.')
50
3026
}
51

            
52
542
fn scheme<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E>
53
542
where
54
542
    E: ParseError<&'a [u8]> + From<Error<'a>>,
55
542
{
56
542
    verify(take_while1(is_scheme_char), |sch: &[u8]| {
57
542
        is_alphabetic(sch[0])
58
542
    })(input)
59
542
}
60

            
61
#[inline]
62
42
const fn is_hex_digit_upper(b: u8) -> bool {
63
42
    matches!(b, b'0'..=b'9' | b'A'..=b'F')
64
42
}
65

            
66
#[inline]
67
48
const fn is_hex_digit(b: u8) -> bool {
68
48
    b.is_ascii_hexdigit()
69
48
}
70

            
71
#[inline]
72
13412
const fn is_unreserved(b: u8) -> bool {
73
13412
    matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~')
74
12456
}
75

            
76
1372
fn pct_encoded<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], Vec<u8>, E>
77
1372
where
78
1372
    E: ParseError<&'a [u8]> + From<Error<'a>>,
79
1372
{
80
1372
    tuple((
81
1372
        char('%'),
82
1372
        take_while_m_n(2, 2, is_hex_digit_upper).map(|v| {
83
12
            // TODO do without a dep here?
84
12
            hex::decode(v).unwrap()
85
1372
        }),
86
1372
    ))
87
1372
    .map(|(_, v)| v)
88
1372
    .parse(input)
89
1372
}
90

            
91
#[inline]
92
1360
const fn is_sub_delim(b: u8) -> bool {
93
1318
    matches!(
94
1360
        b,
95
        b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b',' | b';' | b'='
96
    )
97
1360
}
98

            
99
266
fn authority<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], Authority, E>
100
266
where
101
266
    E: ParseError<&'a [u8]>
102
266
        + nom::error::FromExternalError<&'a [u8], nom::Err<E>>
103
266
        + From<Error<'a>>,
104
266
{
105
266
    tuple((
106
266
        opt(tuple((user_info, char('@'))).map(|(u, _)| u)),
107
266
        host,
108
266
        opt(tuple((char(':'), port)).map(|(_, p)| p)),
109
266
    ))
110
266
    .map(|(user_info, host, port)| Authority {
111
266
        user_info,
112
266
        host,
113
266
        port,
114
266
    })
115
266
    .parse(input)
116
266
}
117

            
118
10
fn port<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], u16, E>
119
10
where
120
10
    E: ParseError<&'a [u8]>
121
10
        + nom::error::FromExternalError<&'a [u8], nom::Err<E>>
122
10
        + From<Error<'a>>,
123
10
{
124
10
    map_res(take_while(is_digit), |c| {
125
10
        std::str::from_utf8(c)
126
10
            .map_err(|e| {
127
                nom::Err::Error(
128
                    Error::new(
129
                        input,
130
                        InnerError::EncodingError("Recur month list".to_string(), e),
131
                    )
132
                    .into(),
133
                )
134
10
            })?
135
10
            .parse::<u16>()
136
10
            .map_err(|_| nom::Err::Error(Error::new(input, InnerError::InvalidPort).into()))
137
10
    })
138
10
    .parse(input)
139
10
}
140

            
141
266
fn user_info<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], Vec<u8>, E>
142
266
where
143
266
    E: ParseError<&'a [u8]> + From<Error<'a>>,
144
266
{
145
266
    fold_many1(
146
266
        alt((
147
2982
            single(is_unreserved).map(|c| vec![c]),
148
266
            pct_encoded,
149
266
            single(is_sub_delim).map(|c| vec![c]),
150
266
            tag(":").map(|c: &[u8]| c.to_vec()),
151
266
        )),
152
266
        Vec::new,
153
2992
        |mut acc, item| {
154
2992
            acc.extend(item);
155
2992
            acc
156
2992
        },
157
266
    )(input)
158
266
}
159

            
160
266
fn host<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], Host, E>
161
266
where
162
266
    E: ParseError<&'a [u8]>
163
266
        + nom::error::FromExternalError<&'a [u8], nom::Err<E>>
164
266
        + From<Error<'a>>,
165
266
{
166
266
    alt((
167
266
        ip_literal.map(Host::IpAddr),
168
266
        ip_v4_addr
169
266
            .map(|ip| IpAddr::V4(Ipv4Addr::from(ip)))
170
266
            .map(Host::IpAddr),
171
266
        reg_name.map(Host::RegName),
172
266
    ))(input)
173
266
}
174

            
175
266
fn ip_literal<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], IpAddr, E>
176
266
where
177
266
    E: ParseError<&'a [u8]>
178
266
        + nom::error::FromExternalError<&'a [u8], nom::Err<E>>
179
266
        + From<Error<'a>>,
180
266
{
181
266
    tuple((
182
266
        tag("["),
183
266
        alt((
184
266
            ip_v6_addr.map(IpAddr::V6),
185
266
            ip_v_future_addr.map(|ip| IpAddr::VFuture(ip.to_vec())),
186
266
        )),
187
266
        tag("]"),
188
266
    ))
189
266
    .map(|(_, v, _)| v)
190
266
    .parse(input)
191
266
}
192

            
193
fn ip_v_future_addr<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], &[u8], E>
194
where
195
    E: ParseError<&'a [u8]> + From<Error<'a>>,
196
{
197
    recognize(tuple((
198
        char('v').map(|a| a as u8),
199
        take_while1(is_hex_digit),
200
        char('.'),
201
        many1(alt((
202
            single(is_unreserved),
203
            single(is_sub_delim),
204
            char(':').map(|c| c as u8),
205
        ))),
206
    )))(input)
207
}
208

            
209
4
fn ip_v6_addr<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], Ipv6Addr, E>
210
4
where
211
4
    E: ParseError<&'a [u8]>
212
4
        + nom::error::FromExternalError<&'a [u8], nom::Err<E>>
213
4
        + From<Error<'a>>,
214
4
{
215
4
    let (input, prefix_parts) = separated_list0(char(':'), h_16)(input)?;
216

            
217
4
    if prefix_parts.len() > 7 {
218
        return Err(nom::Err::Error(
219
            Error::new(input, InnerError::InvalidIpv6).into(),
220
        ));
221
4
    }
222

            
223
4
    let (input, found_collapse) = opt(tag("::"))(input)?;
224
4
    let fill_zeroes = found_collapse.is_some();
225

            
226
4
    let (input, suffix_parts) = separated_list0(char(':'), h_16)(input)?;
227

            
228
4
    if suffix_parts.len() > 8 {
229
        return Err(nom::Err::Error(
230
            Error::new(input, InnerError::InvalidIpv6).into(),
231
        ));
232
4
    }
233

            
234
4
    let (input, ipv4_post) = opt(tuple((char(':'), ip_v4_addr)))(input)?;
235

            
236
4
    let mut content = [0u8; 16];
237

            
238
4
    let provided_len =
239
4
        prefix_parts.len() * 2 + suffix_parts.len() * 2 + if ipv4_post.is_some() { 4 } else { 0 };
240

            
241
4
    if provided_len > 16 || (provided_len < 16 && !fill_zeroes) {
242
        return Err(nom::Err::Error(
243
            Error::new(input, InnerError::InvalidIpv6).into(),
244
        ));
245
4
    }
246
4

            
247
4
    let mut i = 0;
248
12
    for [a, b] in prefix_parts {
249
8
        content[i] = a;
250
8
        content[i + 1] = b;
251
8
        i += 2;
252
8
    }
253

            
254
4
    if fill_zeroes {
255
4
        let zeroes = 16 - provided_len;
256
4
        i += zeroes;
257
4
    }
258

            
259
8
    for [a, b] in suffix_parts {
260
4
        content[i] = a;
261
4
        content[i + 1] = b;
262
4
        i += 2;
263
4
    }
264

            
265
4
    if let Some((_, ipv4)) = ipv4_post {
266
        content[12] = ipv4[0];
267
        content[13] = ipv4[1];
268
        content[14] = ipv4[2];
269
        content[15] = ipv4[3];
270
4
    }
271

            
272
4
    Ok((input, Ipv6Addr::from(content)))
273
4
}
274

            
275
16
fn h_16<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], [u8; 2], E>
276
16
where
277
16
    E: ParseError<&'a [u8]> + From<Error<'a>>,
278
16
{
279
16
    take_while_m_n(1, 4, is_hex_digit)
280
16
        .map(|c: &[u8]| {
281
12
            let mut src = c.to_vec();
282
28
            while src.len() < 4 {
283
16
                src.insert(0, b'0');
284
16
            }
285
12
            let mut dst = [0, 0];
286
12
            hex::decode_to_slice(src, &mut dst).unwrap();
287
12
            dst
288
16
        })
289
16
        .parse(input)
290
16
}
291

            
292
264
fn ip_v4_addr<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], [u8; 4], E>
293
264
where
294
264
    E: ParseError<&'a [u8]>
295
264
        + nom::error::FromExternalError<&'a [u8], nom::Err<E>>
296
264
        + From<Error<'a>>,
297
264
{
298
264
    tuple((
299
264
        dec_octet,
300
264
        char('.'),
301
264
        dec_octet,
302
264
        char('.'),
303
264
        dec_octet,
304
264
        char('.'),
305
264
        dec_octet,
306
264
    ))
307
264
    .map(|(a, _, b, _, c, _, d)| [a, b, c, d])
308
264
    .parse(input)
309
264
}
310

            
311
270
fn dec_octet<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], u8, E>
312
270
where
313
270
    E: ParseError<&'a [u8]>
314
270
        + nom::error::FromExternalError<&'a [u8], nom::Err<E>>
315
270
        + From<Error<'a>>,
316
270
{
317
270
    map_res(
318
270
        verify(take_while_m_n(1, 3, is_digit), |b: &[u8]| {
319
8
            // May not have a 0 prefix
320
8
            if b.len() == 2 {
321
2
                b[0] != b'0'
322
6
            } else if b.len() == 3 {
323
2
                if b[0] == b'0' && b[1] == b'0' {
324
                    false
325
                } else {
326
2
                    b[0] != b'0'
327
                }
328
            } else {
329
4
                true
330
            }
331
270
        }),
332
270
        |b| {
333
8
            std::str::from_utf8(b)
334
8
                .unwrap()
335
8
                .parse::<u8>()
336
8
                .map_err(|_| nom::Err::Error(Error::new(input, InnerError::InvalidOctet).into()))
337
270
        },
338
270
    )(input)
339
270
}
340

            
341
262
fn reg_name<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], Vec<u8>, E>
342
262
where
343
262
    E: ParseError<&'a [u8]> + From<Error<'a>>,
344
262
{
345
262
    fold_many0(
346
262
        alt((
347
2926
            single(is_unreserved).map(|c| vec![c]),
348
262
            pct_encoded,
349
262
            single(is_sub_delim).map(|c| vec![c]),
350
262
        )),
351
262
        Vec::new,
352
2926
        |mut acc, item| {
353
2926
            acc.extend(item);
354
2926
            acc
355
2926
        },
356
262
    )(input)
357
262
}
358

            
359
538
fn path_absolute_empty<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E>
360
538
where
361
538
    E: ParseError<&'a [u8]> + From<Error<'a>>,
362
538
{
363
538
    recognize(many0(tuple((char('/'), segment))))(input)
364
538
}
365

            
366
272
fn path_absolute<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E>
367
272
where
368
272
    E: ParseError<&'a [u8]> + From<Error<'a>>,
369
272
{
370
272
    recognize(tuple((segment_nz, path_absolute_empty)))(input)
371
272
}
372

            
373
fn path_rootless<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E>
374
where
375
    E: ParseError<&'a [u8]> + From<Error<'a>>,
376
{
377
    recognize(tuple((segment_nz, many0(tuple((char('/'), segment))))))(input)
378
}
379

            
380
304
fn segment<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], Vec<u8>, E>
381
304
where
382
304
    E: ParseError<&'a [u8]> + From<Error<'a>>,
383
304
{
384
1644
    fold_many0(p_char, Vec::new, |mut acc, item| {
385
1644
        acc.extend(item);
386
1644
        acc
387
1644
    })(input)
388
304
}
389

            
390
272
fn segment_nz<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], Vec<u8>, E>
391
272
where
392
272
    E: ParseError<&'a [u8]> + From<Error<'a>>,
393
272
{
394
4598
    fold_many1(p_char, Vec::new, |mut acc, item| {
395
4598
        acc.extend(item);
396
4598
        acc
397
4598
    })(input)
398
272
}
399

            
400
8
fn query_or_fragment<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], &[u8], E>
401
8
where
402
8
    E: ParseError<&'a [u8]> + From<Error<'a>>,
403
8
{
404
8
    recognize(many0(alt((
405
8
        p_char,
406
8
        tag("/").map(|c: &[u8]| c.to_vec()),
407
14
        tag("?").map(|c: &[u8]| c.to_vec()),
408
8
    ))))(input)
409
8
}
410

            
411
6966
fn p_char<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], Vec<u8>, E>
412
6966
where
413
6966
    E: ParseError<&'a [u8]> + From<Error<'a>>,
414
6966
{
415
6966
    alt((
416
6966
        single(is_unreserved).map(|c| vec![c]),
417
6966
        pct_encoded,
418
6966
        single(is_sub_delim).map(|c| vec![c]),
419
6966
        tag(":").map(|c: &[u8]| c.to_vec()),
420
6966
        tag("@").map(|c: &[u8]| c.to_vec()),
421
6966
    ))(input)
422
6966
}
423

            
424
impl Display for Uri<'_> {
425
60
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
426
60
        write!(f, "{}", String::from_utf8_lossy(self.scheme))?;
427
60
        f.write_char(':')?;
428

            
429
60
        if let Some(authority) = &self.authority {
430
52
            f.write_char('/')?;
431
52
            f.write_char('/')?;
432

            
433
52
            if let Some(user_info) = &authority.user_info {
434
                write!(f, "{}", String::from_utf8_lossy(user_info))?;
435
                f.write_char('@')?;
436
52
            }
437

            
438
4
            match &authority.host {
439
2
                Host::IpAddr(IpAddr::V4(ip)) => write!(f, "{}", ip)?,
440
2
                Host::IpAddr(IpAddr::V6(ip)) => {
441
2
                    f.write_char('[')?;
442
2
                    write!(f, "{}", ip)?;
443
2
                    f.write_char(']')?;
444
                }
445
                Host::IpAddr(IpAddr::VFuture(vf)) => {
446
                    f.write_char('[')?;
447
                    vf.iter().try_for_each(|b| write!(f, "{:02X}", b))?;
448
                    f.write_char(']')?;
449
                }
450
48
                Host::RegName(name) => write!(f, "{}", String::from_utf8_lossy(name))?,
451
            }
452

            
453
52
            if let Some(port) = authority.port {
454
2
                write!(f, ":{}", port)?;
455
50
            }
456
8
        };
457

            
458
60
        write!(f, "{}", String::from_utf8_lossy(&self.path))?;
459

            
460
60
        if let Some(query) = self.query {
461
2
            f.write_char('?')?;
462
2
            write!(f, "{}", String::from_utf8_lossy(query))?;
463
58
        }
464

            
465
60
        if let Some(fragment) = self.fragment {
466
            f.write_char('#')?;
467
            write!(f, "{}", String::from_utf8_lossy(fragment))?;
468
60
        }
469

            
470
60
        Ok(())
471
60
    }
472
}
473

            
474
#[cfg(test)]
475
mod tests {
476
    use super::*;
477
    use crate::parser::types::{Host, IpAddr, Uri};
478
    use crate::test_utils::check_rem;
479

            
480
    #[test]
481
2
    fn ftp() {
482
2
        let raw = b"ftp://ftp.is.co.za/rfc/rfc1808.txt`";
483
2
        let (input, uri) = param_value_uri::<Error>(raw).unwrap();
484
2
        check_rem(input, 1);
485
2
        assert_eq!(uri.scheme, b"ftp");
486
2
        assert_eq!(
487
2
            uri.authority.clone().unwrap().host,
488
2
            Host::RegName(b"ftp.is.co.za".to_vec())
489
2
        );
490
2
        assert_eq!(uri.path, b"/rfc/rfc1808.txt");
491
2
        check_serialize_raw(uri, raw);
492
2
    }
493

            
494
    #[test]
495
2
    fn http() {
496
2
        let raw = b"http://www.ietf.org/rfc/rfc2396.txt`";
497
2
        let (input, uri) = param_value_uri::<Error>(raw).unwrap();
498
2
        check_rem(input, 1);
499
2
        assert_eq!(uri.scheme, b"http");
500
2
        assert_eq!(
501
2
            uri.authority.clone().unwrap().host,
502
2
            Host::RegName(b"www.ietf.org".to_vec())
503
2
        );
504
2
        assert_eq!(uri.path, b"/rfc/rfc2396.txt");
505
2
        check_serialize_raw(uri, raw);
506
2
    }
507

            
508
    #[test]
509
2
    fn ip_v6() {
510
2
        let (input, ipv6) = ip_v6_addr::<Error>(b"2001:db8::7`").unwrap();
511
2
        check_rem(input, 1);
512
2
        assert_eq!(ipv6, Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 7));
513
2
    }
514

            
515
    #[test]
516
2
    fn ldap() {
517
2
        let raw = b"ldap://[2001:db8::7]/c=GB?objectClass?one`";
518
2
        let (input, uri) = param_value_uri::<Error>(raw).unwrap();
519
2
        check_rem(input, 1);
520
2
        assert_eq!(uri.scheme, b"ldap");
521
2
        assert_eq!(
522
2
            uri.authority.clone().unwrap().host,
523
2
            Host::IpAddr(IpAddr::V6(Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 7)))
524
2
        );
525
2
        assert_eq!(uri.path, b"/c=GB");
526
2
        assert_eq!(uri.query.unwrap(), b"objectClass?one");
527
2
        check_serialize_raw(uri, raw);
528
2
    }
529

            
530
    #[test]
531
2
    fn mailto() {
532
2
        let raw = b"mailto:John.Doe@example.com`";
533
2
        let (input, uri) = param_value_uri::<Error>(raw).unwrap();
534
2
        check_rem(input, 1);
535
2
        assert_eq!(uri.scheme, b"mailto");
536
2
        assert_eq!(uri.path, b"John.Doe@example.com".to_vec());
537
2
        check_serialize_raw(uri, raw);
538
2
    }
539

            
540
    #[test]
541
2
    fn news() {
542
2
        let raw = b"news:comp.infosystems.www.servers.unix`";
543
2
        let (input, uri) = param_value_uri::<Error>(raw).unwrap();
544
2
        check_rem(input, 1);
545
2
        assert_eq!(uri.scheme, b"news");
546
2
        assert_eq!(uri.path, b"comp.infosystems.www.servers.unix".to_vec());
547
2
        check_serialize_raw(uri, raw);
548
2
    }
549

            
550
    #[test]
551
2
    fn tel() {
552
2
        let raw = b"tel:+1-816-555-1212`";
553
2
        let (input, uri) = param_value_uri::<Error>(raw).unwrap();
554
2
        check_rem(input, 1);
555
2
        assert_eq!(uri.scheme, b"tel");
556
2
        assert_eq!(uri.path, b"+1-816-555-1212".to_vec());
557
2
        check_serialize_raw(uri, raw);
558
2
    }
559

            
560
    #[test]
561
2
    fn telnet() {
562
2
        let raw = b"telnet://192.0.2.16:80/`";
563
2
        let (input, uri) = param_value_uri::<Error>(raw).unwrap();
564
2
        check_rem(input, 1);
565
2
        assert_eq!(uri.scheme, b"telnet");
566
2
        let authority = uri.authority.clone().unwrap();
567
2
        assert_eq!(
568
2
            authority.host,
569
2
            Host::IpAddr(IpAddr::V4(Ipv4Addr::new(192, 0, 2, 16)))
570
2
        );
571
2
        assert_eq!(authority.port.unwrap(), 80);
572
2
        check_serialize_raw(uri, raw);
573
2
    }
574

            
575
    #[test]
576
2
    fn urn() {
577
2
        let raw = b"urn:oasis:names:specification:docbook:dtd:xml:4.1.2`";
578
2
        let (input, uri) = param_value_uri::<Error>(raw).unwrap();
579
2
        check_rem(input, 1);
580
2
        assert_eq!(uri.scheme, b"urn");
581
2
        assert_eq!(
582
2
            uri.path,
583
2
            b"oasis:names:specification:docbook:dtd:xml:4.1.2".to_vec()
584
2
        );
585
2
        check_serialize_raw(uri, raw);
586
2
    }
587

            
588
16
    fn check_serialize_raw(uri: Uri, raw: &[u8]) {
589
16
        let out = uri.to_string();
590
16
        assert_eq!(out.as_bytes(), &raw[..(raw.len() - 1)]);
591
16
    }
592
}