1use crate::compiler::function::EnumVariant;
2use crate::compiler::prelude::*;
3use std::{
4 borrow::Cow,
5 collections::BTreeMap,
6 fmt,
7 str::FromStr,
8 sync::{Arc, LazyLock},
9};
10use woothee::parser::Parser as WootheeParser;
11
12static UA_EXTRACTOR: LazyLock<ua_parser::Extractor> = LazyLock::new(|| {
13 let regexes = include!(concat!(env!("OUT_DIR"), "/user_agent_regexes.rs"));
14 ua_parser::Extractor::try_from(regexes).expect("Regex file is not valid.")
15});
16
17static DEFAULT_MODE: LazyLock<Value> = LazyLock::new(|| Value::Bytes(Bytes::from("fast")));
18
19static MODE_ENUM: &[EnumVariant] = &[
20 EnumVariant {
21 value: "fast",
22 description: "Fastest mode but most unreliable. Uses parser from project [Woothee](https://github.com/woothee/woothee).",
23 },
24 EnumVariant {
25 value: "reliable",
26 description: indoc! {"
27 Provides greater reliability than `fast` and retains it's speed in common cases.
28 Parses with [Woothee](https://github.com/woothee/woothee) parser and with parser from
29 [uap project](https://github.com/ua-parser/uap-core) if there are some missing fields
30 that the first parser wasn't able to parse out but the second one maybe can.
31 "},
32 },
33 EnumVariant {
34 value: "enriched",
35 description: indoc! {"
36 Parses with both parser from [Woothee](https://github.com/woothee/woothee) and parser from
37 [uap project](https://github.com/ua-parser/uap-core) and combines results. Result has the full schema.
38 "},
39 },
40];
41
42static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
43 vec![
44 Parameter::required("value", kind::BYTES, "The string to parse."),
45 Parameter::optional(
46 "mode",
47 kind::BYTES,
48 "Determines performance and reliability characteristics.",
49 )
50 .default(&DEFAULT_MODE)
51 .enum_variants(MODE_ENUM),
52 ]
53});
54
55#[derive(Clone, Copy, Debug)]
56pub struct ParseUserAgent;
57
58impl Function for ParseUserAgent {
59 fn identifier(&self) -> &'static str {
60 "parse_user_agent"
61 }
62
63 fn summary(&self) -> &'static str {
64 "parse a user agent string"
65 }
66
67 fn usage(&self) -> &'static str {
68 indoc! {"
69 Parses the provided `value` as a user agent, which has
70 [a loosely defined format](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent).
71
72 Parses on the basis of best effort. Returned schema depends only on the configured `mode`,
73 so if the function fails to parse a field it will set it to `null`.
74 "}
75 }
76
77 fn category(&self) -> &'static str {
78 Category::Parse.as_ref()
79 }
80
81 fn return_kind(&self) -> u16 {
82 kind::OBJECT
83 }
84
85 fn notices(&self) -> &'static [&'static str] {
86 &[
87 indoc! {"
88 All values are returned as strings or as null. We recommend manually coercing values
89 to desired types as you see fit.
90 "},
91 "Different modes return different schema.",
92 "Field which were not parsed out are set as `null`.",
93 ]
94 }
95
96 fn parameters(&self) -> &'static [Parameter] {
97 PARAMETERS.as_slice()
98 }
99
100 fn examples(&self) -> &'static [Example] {
101 &[
102 example! {
103 title: "Fast mode",
104 source: indoc! {r#"
105 parse_user_agent(
106 "Mozilla Firefox 1.0.1 Mozilla/5.0 (X11; U; Linux i686; de-DE; rv:1.7.6) Gecko/20050223 Firefox/1.0.1"
107 )
108 "#},
109 result: Ok(indoc! {r#"
110 {
111 "browser": {
112 "family": "Firefox",
113 "version": "1.0.1"
114 },
115 "device": {
116 "category": "pc"
117 },
118 "os": {
119 "family": "Linux",
120 "version": null
121 }
122 }
123 "#}),
124 },
125 example! {
126 title: "Reliable mode",
127 source: indoc! {r#"
128 parse_user_agent(
129 "Mozilla/4.0 (compatible; MSIE 7.66; Windows NT 5.1; SV1; .NET CLR 1.1.4322)",
130 mode: "reliable")
131 "#},
132 result: Ok(indoc! {r#"
133 {
134 "browser": {
135 "family": "Internet Explorer",
136 "version": "7.66"
137 },
138 "device": {
139 "category": "pc"
140 },
141 "os": {
142 "family": "Windows XP",
143 "version": "NT 5.1"
144 }
145 }
146 "#}),
147 },
148 example! {
149 title: "Enriched mode",
150 source: indoc! {r#"
151 parse_user_agent(
152 "Opera/9.80 (J2ME/MIDP; Opera Mini/4.3.24214; iPhone; CPU iPhone OS 4_2_1 like Mac OS X; AppleWebKit/24.783; U; en) Presto/2.5.25 Version/10.54",
153 mode: "enriched"
154 )
155 "#},
156 result: Ok(indoc! {r#"
157 {
158 "browser": {
159 "family": "Opera Mini",
160 "major": "4",
161 "minor": "3",
162 "patch": "24214",
163 "version": "10.54"
164 },
165 "device": {
166 "brand": "Apple",
167 "category": "smartphone",
168 "family": "iPhone",
169 "model": "iPhone"
170 },
171 "os": {
172 "family": "iOS",
173 "major": "4",
174 "minor": "2",
175 "patch": "1",
176 "patch_minor": null,
177 "version": "4.2.1"
178 }
179 }
180 "#}),
181 },
182 ]
183 }
184
185 fn compile(
186 &self,
187 state: &state::TypeState,
188 _ctx: &mut FunctionCompileContext,
189 arguments: ArgumentList,
190 ) -> Compiled {
191 let value = arguments.required("value");
192
193 let mode = arguments
194 .optional_enum("mode", &Mode::all_value(), state)?
195 .unwrap_or_else(|| DEFAULT_MODE.clone())
196 .try_bytes_utf8_lossy()
197 .map(|s| Mode::from_str(&s).expect("validated enum"))
198 .expect("mode not bytes");
199
200 let parser = match mode {
201 Mode::Fast => {
202 let parser = WootheeParser::new();
203
204 Arc::new(move |s: &str| parser.parse_user_agent(s).partial_schema()) as Arc<_>
205 }
206 Mode::Reliable => {
207 let fast = WootheeParser::new();
208 let slow = &UA_EXTRACTOR;
209
210 Arc::new(move |s: &str| {
211 let ua = fast.parse_user_agent(s);
212 let ua = if ua.browser.family.is_none() || ua.os.family.is_none() {
213 let better_ua = slow.parse_user_agent(s);
214 better_ua.or(ua)
215 } else {
216 ua
217 };
218 ua.partial_schema()
219 }) as Arc<_>
220 }
221 Mode::Enriched => {
222 let fast = WootheeParser::new();
223 let slow = &UA_EXTRACTOR;
224
225 Arc::new(move |s: &str| {
226 slow.parse_user_agent(s)
227 .or(fast.parse_user_agent(s))
228 .full_schema()
229 }) as Arc<_>
230 }
231 };
232
233 Ok(ParseUserAgentFn {
234 value,
235 mode,
236 parser,
237 }
238 .as_expr())
239 }
240}
241
242#[derive(Clone)]
243struct ParseUserAgentFn {
244 value: Box<dyn Expression>,
245 mode: Mode,
246 parser: Arc<dyn Fn(&str) -> Value + Send + Sync>,
247}
248
249impl FunctionExpression for ParseUserAgentFn {
250 fn resolve(&self, ctx: &mut Context) -> Resolved {
251 let value = self.value.resolve(ctx)?;
252 let string = value.try_bytes_utf8_lossy()?;
253
254 Ok((self.parser)(&string))
255 }
256
257 fn type_def(&self, _: &state::TypeState) -> TypeDef {
258 self.mode.type_def()
259 }
260}
261
262impl fmt::Debug for ParseUserAgentFn {
263 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
264 write!(
265 f,
266 "ParseUserAgentFn{{ value: {:?}, mode: {:?}}}",
267 self.value, self.mode
268 )
269 }
270}
271
272#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
273pub(crate) enum Mode {
274 #[default]
275 Fast,
276 Reliable,
277 Enriched,
278}
279
280impl Mode {
281 fn all_value() -> Vec<Value> {
282 use Mode::{Enriched, Fast, Reliable};
283
284 vec![Fast, Reliable, Enriched]
285 .into_iter()
286 .map(|u| u.as_str().into())
287 .collect::<Vec<_>>()
288 }
289
290 const fn as_str(self) -> &'static str {
291 use Mode::{Enriched, Fast, Reliable};
292
293 match self {
294 Fast => "fast",
295 Reliable => "reliable",
296 Enriched => "enriched",
297 }
298 }
299
300 fn type_def(self) -> TypeDef {
301 match self {
302 Mode::Fast | Mode::Reliable => TypeDef::object(BTreeMap::from([
303 (
304 "browser".into(),
305 Kind::object(BTreeMap::from([
306 ("family".into(), Kind::bytes().or_null()),
307 ("version".into(), Kind::bytes().or_null()),
308 ])),
309 ),
310 (
311 "os".into(),
312 Kind::object(BTreeMap::from([
313 ("family".into(), Kind::bytes().or_null()),
314 ("version".into(), Kind::bytes().or_null()),
315 ])),
316 ),
317 (
318 "device".into(),
319 Kind::object(BTreeMap::from([(
320 "category".into(),
321 Kind::bytes().or_null(),
322 )])),
323 ),
324 ])),
325 Mode::Enriched => TypeDef::object(BTreeMap::from([
326 (
327 "browser".into(),
328 Kind::object(BTreeMap::from([
329 ("family".into(), Kind::bytes().or_null()),
330 ("version".into(), Kind::bytes().or_null()),
331 ("major".into(), Kind::bytes().or_null()),
332 ("minor".into(), Kind::bytes().or_null()),
333 ("patch".into(), Kind::bytes().or_null()),
334 ])),
335 ),
336 (
337 "os".into(),
338 Kind::object(BTreeMap::from([
339 ("family".into(), Kind::bytes().or_null()),
340 ("version".into(), Kind::bytes().or_null()),
341 ("major".into(), Kind::bytes().or_null()),
342 ("minor".into(), Kind::bytes().or_null()),
343 ("patch".into(), Kind::bytes().or_null()),
344 ("patch_minor".into(), Kind::bytes().or_null()),
345 ])),
346 ),
347 (
348 "device".into(),
349 Kind::object(BTreeMap::from([
350 ("family".into(), Kind::bytes().or_null()),
351 ("category".into(), Kind::bytes().or_null()),
352 ("brand".into(), Kind::bytes().or_null()),
353 ("model".into(), Kind::bytes().or_null()),
354 ])),
355 ),
356 ])),
357 }
358 }
359}
360
361impl FromStr for Mode {
362 type Err = &'static str;
363
364 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
365 use Mode::{Enriched, Fast, Reliable};
366
367 match s {
368 "fast" => Ok(Fast),
369 "reliable" => Ok(Reliable),
370 "enriched" => Ok(Enriched),
371 _ => Err("unknown mode variant"),
372 }
373 }
374}
375
376#[derive(Default)]
377struct UserAgent {
378 browser: Browser,
379 os: Os,
380 device: Device,
381}
382
383impl UserAgent {
384 fn partial_schema(self) -> Value {
385 let Self {
386 browser,
387 os,
388 device,
389 } = self;
390
391 IntoIterator::into_iter([
392 ("browser", browser.partial_schema()),
393 ("os", os.partial_schema()),
394 ("device", device.partial_schema()),
395 ])
396 .map(|(name, value)| (name.to_string(), value))
397 .collect()
398 }
399
400 fn full_schema(self) -> Value {
401 let Self {
402 browser,
403 os,
404 device,
405 } = self;
406
407 IntoIterator::into_iter([
408 ("browser", browser.full_schema()),
409 ("os", os.full_schema()),
410 ("device", device.full_schema()),
411 ])
412 .map(|(name, value)| (name.to_string(), value))
413 .collect()
414 }
415
416 fn or(self, other: Self) -> Self {
417 Self {
418 browser: self.browser.or(other.browser),
419 os: self.os.or(other.os),
420 device: self.device.or(other.device),
421 }
422 }
423}
424
425#[derive(Default)]
426struct Browser {
427 family: Option<String>,
428 version: Option<String>,
429 major: Option<String>,
430 minor: Option<String>,
431 patch: Option<String>,
432}
433
434impl Browser {
435 fn partial_schema(self) -> Value {
436 let Self {
437 family, version, ..
438 } = self;
439
440 into_value([("family", family), ("version", version)])
441 }
442
443 fn full_schema(self) -> Value {
444 let Self {
445 family,
446 version,
447 major,
448 minor,
449 patch,
450 } = self;
451
452 into_value([
453 ("family", family),
454 ("version", version),
455 ("major", major),
456 ("minor", minor),
457 ("patch", patch),
458 ])
459 }
460
461 fn or(self, other: Self) -> Self {
462 Self {
463 family: self.family.or(other.family),
464 version: self.version.or(other.version),
465 major: self.major.or(other.major),
466 minor: self.minor.or(other.minor),
467 patch: self.patch.or(other.patch),
468 }
469 }
470}
471
472#[derive(Default)]
473struct Os {
474 family: Option<String>,
475 version: Option<String>,
476 major: Option<String>,
477 minor: Option<String>,
478 patch: Option<String>,
479 patch_minor: Option<String>,
480}
481
482impl Os {
483 fn partial_schema(self) -> Value {
484 let Self {
485 family, version, ..
486 } = self;
487
488 into_value([("family", family), ("version", version)])
489 }
490
491 fn full_schema(self) -> Value {
492 let Self {
493 family,
494 version,
495 major,
496 minor,
497 patch,
498 patch_minor,
499 } = self;
500
501 into_value([
502 ("family", family),
503 ("version", version),
504 ("major", major),
505 ("minor", minor),
506 ("patch", patch),
507 ("patch_minor", patch_minor),
508 ])
509 }
510
511 fn or(self, other: Self) -> Self {
512 Self {
513 family: self.family.or(other.family),
514 version: self.version.or(other.version),
515 major: self.major.or(other.major),
516 minor: self.minor.or(other.minor),
517 patch: self.patch.or(other.patch),
518 patch_minor: self.patch_minor.or(other.patch_minor),
519 }
520 }
521}
522
523#[derive(Default)]
524struct Device {
525 family: Option<String>,
526 category: Option<String>,
527 brand: Option<String>,
528 model: Option<String>,
529}
530
531impl Device {
532 fn partial_schema(self) -> Value {
533 let Self { category, .. } = self;
534
535 into_value([("category", category)])
536 }
537
538 fn full_schema(self) -> Value {
539 let Self {
540 category,
541 family,
542 brand,
543 model,
544 } = self;
545
546 into_value([
547 ("category", category),
548 ("family", family),
549 ("brand", brand),
550 ("model", model),
551 ])
552 }
553
554 fn or(self, other: Self) -> Self {
555 Self {
556 category: self.category.or(other.category),
557 family: self.family.or(other.family),
558 brand: self.brand.or(other.brand),
559 model: self.model.or(other.model),
560 }
561 }
562}
563
564fn into_value<'a>(iter: impl IntoIterator<Item = (&'a str, Option<String>)>) -> Value {
565 iter.into_iter()
566 .map(|(name, value)| {
567 (
568 name.to_string(),
569 value.map_or(Value::Null, std::convert::Into::into),
570 )
571 })
572 .collect()
573}
574
575trait Parser {
576 fn parse_user_agent(&self, user_agent: &str) -> UserAgent;
577}
578
579impl Parser for WootheeParser {
580 fn parse_user_agent(&self, user_agent: &str) -> UserAgent {
581 fn unknown_to_none<'a>(s: impl Into<Cow<'a, str>>) -> Option<String> {
582 let cow = s.into();
583 match cow.as_ref() {
584 "" | woothee::woothee::VALUE_UNKNOWN => None,
585 _ => Some(cow.into_owned()),
586 }
587 }
588
589 let ua = self.parse(user_agent).unwrap_or_default();
590
591 UserAgent {
592 browser: Browser {
593 family: unknown_to_none(ua.name),
594 version: unknown_to_none(ua.version),
595 ..Default::default()
596 },
597 os: Os {
598 family: unknown_to_none(ua.os),
599 version: unknown_to_none(ua.os_version),
600 ..Default::default()
601 },
602 device: Device {
603 category: unknown_to_none(ua.category),
604 ..Default::default()
605 },
606 }
607 }
608}
609
610impl Parser for ua_parser::Extractor<'_> {
611 fn parse_user_agent(&self, user_agent: &str) -> UserAgent {
612 let browser = self
613 .ua
614 .extract(user_agent)
615 .map(|ua| Browser {
616 family: Some(ua.family.into_owned()),
617 major: ua.major.map(Into::into),
618 minor: ua.minor.map(Into::into),
619 patch: ua.patch.map(Into::into),
620 ..Default::default()
621 })
622 .unwrap_or_default();
623
624 let os = self
625 .os
626 .extract(user_agent)
627 .map(|os| Os {
628 family: Some(os.os.into_owned()),
629 major: os.major.map(Cow::into_owned),
630 minor: os.minor.map(Cow::into_owned),
631 patch: os.patch.map(Cow::into_owned),
632 patch_minor: os.patch_minor.map(Cow::into_owned),
633 ..Default::default()
634 })
635 .unwrap_or_default();
636
637 let device = self
638 .dev
639 .extract(user_agent)
640 .map(|dev| Device {
641 family: Some(dev.device.into_owned()),
642 brand: dev.brand.map(Cow::into_owned),
643 model: dev.model.map(Cow::into_owned),
644 ..Default::default()
645 })
646 .unwrap_or_default();
647
648 UserAgent {
649 browser,
650 os,
651 device,
652 }
653 }
654}
655
656#[cfg(test)]
657mod tests {
658 use super::*;
659 use crate::value;
660
661 test_function![
662 parse_user_agent => ParseUserAgent;
663
664 parses {
665 args: func_args![ value: "Mozilla/4.0 (compatible; MSIE 7.66; Windows NT 5.1; SV1)" ],
666 want: Ok(value!({ browser: { family: "Internet Explorer", version: "7.66" }, device: { category: "pc" }, os: { family: "Windows XP", version: "NT 5.1" } })),
667 tdef: Mode::Fast.type_def(),
668 }
669
670 unknown_user_agent {
671 args: func_args![ value: "w3m/0.3", mode: "enriched"],
672 want: Ok(value!({ browser: { family: null, major: null, minor: null, patch: null, version: null }, device: { brand: null, category: null, family: null, model: null }, os: { family: null, major: null, minor: null, patch: null, patch_minor: null, version: null } })),
673 tdef: Mode::Enriched.type_def(),
674 }
675 ];
676}