use crate::util;
use bitflags::bitflags;
use caseless::Caseless;
use rusqlite::{
self,
types::{FromSql, FromSqlError, FromSqlResult, ToSql, ToSqlOutput, ValueRef},
};
use std::borrow::Cow;
const MAX_CHARS_TO_SEARCH_THROUGH: usize = 255;
#[derive(Clone, Copy, PartialEq, Debug)]
#[repr(u32)]
pub enum MatchBehavior {
Anywhere = 0,
BoundaryAnywhere = 1,
Boundary = 2,
Beginning = 3,
AnywhereUnmodified = 4,
BeginningCaseSensitive = 5,
}
impl FromSql for MatchBehavior {
#[inline]
fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
Ok(match value.as_i64()? {
0 => MatchBehavior::Anywhere,
1 => MatchBehavior::BoundaryAnywhere,
2 => MatchBehavior::Boundary,
3 => MatchBehavior::Beginning,
4 => MatchBehavior::AnywhereUnmodified,
5 => MatchBehavior::BeginningCaseSensitive,
_ => return Err(FromSqlError::InvalidType),
})
}
}
impl ToSql for MatchBehavior {
#[inline]
fn to_sql(&self) -> rusqlite::Result<ToSqlOutput<'_>> {
Ok(ToSqlOutput::from(*self as u32))
}
}
bitflags! {
pub struct SearchBehavior: u32 {
const HISTORY = 1;
const BOOKMARK = 1 << 1;
const TAG = 1 << 2;
const TITLE = 1 << 3;
const URL = 1 << 4;
const TYPED = 1 << 5;
const JAVASCRIPT = 1 << 6;
const OPENPAGE = 1 << 7;
const RESTRICT = 1 << 8;
const SEARCHES = 1 << 9;
}
}
impl Default for SearchBehavior {
fn default() -> SearchBehavior {
SearchBehavior::HISTORY
| SearchBehavior::BOOKMARK
| SearchBehavior::OPENPAGE
| SearchBehavior::SEARCHES
}
}
impl SearchBehavior {
#[inline]
pub fn any() -> Self {
SearchBehavior::all() & !SearchBehavior::RESTRICT
}
}
impl FromSql for SearchBehavior {
#[inline]
fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
SearchBehavior::from_bits(u32::column_result(value)?)
.ok_or_else(|| FromSqlError::InvalidType)
}
}
impl ToSql for SearchBehavior {
#[inline]
fn to_sql(&self) -> rusqlite::Result<ToSqlOutput<'_>> {
Ok(ToSqlOutput::from(self.bits()))
}
}
#[inline(always)]
fn dubious_to_ascii_lower(c: u8) -> u8 {
c | 0x20
}
#[inline(always)]
fn next_search_candidate(to_search: &str, search_for: char) -> Option<usize> {
let search_bytes = to_search.as_bytes();
if (search_for as u32) < 128 {
let target = dubious_to_ascii_lower(search_for as u8);
let special = if target == b'i' {
0xc4u8
} else if target == b'k' {
0xe2u8
} else {
0xffu8
};
let mut ci = 0;
while ci < search_bytes.len() {
let cur = search_bytes[ci];
if dubious_to_ascii_lower(cur) == target || cur == special {
return Some(ci);
}
ci += 1;
}
} else {
let mut ci = 0;
while ci < search_bytes.len() {
let cur = search_bytes[ci];
if (cur & 0x80) != 0 {
return Some(ci);
}
ci += 1;
}
}
None
}
#[inline(always)]
fn is_ascii_lower_alpha(c: u8) -> bool {
c.wrapping_sub(b'a') <= (b'z' - b'a')
}
#[inline(always)]
fn is_on_boundary(text: &str, index: usize) -> bool {
if index == 0 {
return true;
}
let bytes = text.as_bytes();
if is_ascii_lower_alpha(bytes[index]) {
let prev_lower = dubious_to_ascii_lower(bytes[index - 1]);
!is_ascii_lower_alpha(prev_lower)
} else {
true
}
}
#[inline]
fn string_match(token: &str, source: &str) -> bool {
if source.len() < token.len() {
return false;
}
let mut ti = token.chars().default_case_fold();
let mut si = source.chars().default_case_fold();
loop {
match (ti.next(), si.next()) {
(None, _) => return true,
(Some(_), None) => return false,
(Some(x), Some(y)) => {
if x != y {
return false;
}
}
}
}
}
#[inline]
fn char_to_lower_single(c: char) -> char {
c.to_lowercase().next().unwrap()
}
#[inline]
fn next_codepoint_lower(s: &str) -> (char, usize) {
let mut indices = s.char_indices();
let (_, next_char) = indices.next().unwrap();
let next_index = indices
.next()
.map(|(index, _)| index)
.unwrap_or_else(|| s.len());
(char_to_lower_single(next_char), next_index)
}
pub fn find_in_string(token: &str, src: &str, only_boundary: bool) -> bool {
assert!(!token.is_empty(), "Don't search for an empty string");
if src.len() < token.len() {
return false;
}
let token_first_char = next_codepoint_lower(token).0;
let mut cur_offset = 0;
while let Some(src_idx) = next_search_candidate(&src[cur_offset..], token_first_char) {
if cur_offset + src_idx >= src.len() {
break;
}
cur_offset += src_idx;
let src_cur = &src[cur_offset..];
let (src_next_char, next_offset_in_cur) = next_codepoint_lower(src_cur);
if src_next_char == token_first_char
&& (!only_boundary || is_on_boundary(src, cur_offset))
&& string_match(token, src_cur)
{
return true;
}
cur_offset += next_offset_in_cur;
}
false
}
fn find_anywhere(token: &str, source: &str) -> bool {
assert!(!token.is_empty(), "Don't search for an empty token");
find_in_string(token, source, false)
}
fn find_on_boundary(token: &str, source: &str) -> bool {
assert!(!token.is_empty(), "Don't search for an empty token");
find_in_string(token, source, true)
}
fn find_beginning(token: &str, source: &str) -> bool {
assert!(!token.is_empty(), "Don't search for an empty token");
string_match(token, source)
}
fn find_beginning_case_sensitive(token: &str, source: &str) -> bool {
assert!(!token.is_empty(), "Don't search for an empty token");
source.starts_with(token)
}
pub struct AutocompleteMatch<'search, 'url, 'title, 'tags> {
pub search_str: &'search str,
pub url_str: &'url str,
pub title_str: &'title str,
pub tags: &'tags str,
pub visit_count: u32,
pub typed: bool,
pub bookmarked: bool,
pub open_page_count: u32,
pub match_behavior: MatchBehavior,
pub search_behavior: SearchBehavior,
}
impl<'search, 'url, 'title, 'tags> AutocompleteMatch<'search, 'url, 'title, 'tags> {
fn get_search_fn(&self) -> fn(&str, &str) -> bool {
match self.match_behavior {
MatchBehavior::Anywhere | MatchBehavior::AnywhereUnmodified => find_anywhere,
MatchBehavior::Beginning => find_beginning,
MatchBehavior::BeginningCaseSensitive => find_beginning_case_sensitive,
_ => find_on_boundary,
}
}
fn fixup_url_str<'a>(&self, mut s: &'a str) -> Cow<'a, str> {
if self.match_behavior != MatchBehavior::AnywhereUnmodified {
if s.starts_with("http://") {
s = &s[7..];
} else if s.starts_with("https://") {
s = &s[8..];
} else if s.starts_with("ftp://") {
s = &s[6..];
}
}
if memchr::memchr(b'%', s.as_bytes()).is_none() {
return Cow::Borrowed(s);
}
match percent_encoding::percent_decode(s.as_bytes()).decode_utf8() {
Err(_) => Cow::Borrowed(s),
Ok(decoded) => decoded,
}
}
#[inline]
fn has_behavior(&self, behavior: SearchBehavior) -> bool {
self.search_behavior.intersects(behavior)
}
pub fn invoke(&self) -> bool {
if self.match_behavior == MatchBehavior::AnywhereUnmodified
&& self.url_str.starts_with("javascript:")
&& !self.has_behavior(SearchBehavior::JAVASCRIPT)
&& !self.search_str.starts_with("javascript:")
{
return false;
}
let matches = if self.has_behavior(SearchBehavior::RESTRICT) {
(!self.has_behavior(SearchBehavior::HISTORY) || self.visit_count > 0)
&& (!self.has_behavior(SearchBehavior::TYPED) || self.typed)
&& (!self.has_behavior(SearchBehavior::BOOKMARK) || self.bookmarked)
&& (!self.has_behavior(SearchBehavior::TAG) || !self.tags.is_empty())
&& (!self.has_behavior(SearchBehavior::OPENPAGE) || self.open_page_count > 0)
} else {
(self.has_behavior(SearchBehavior::HISTORY) && self.visit_count > 0)
|| (self.has_behavior(SearchBehavior::TYPED) && self.typed)
|| (self.has_behavior(SearchBehavior::BOOKMARK) && self.bookmarked)
|| (self.has_behavior(SearchBehavior::TAG) && !self.tags.is_empty())
|| (self.has_behavior(SearchBehavior::OPENPAGE) && self.open_page_count > 0)
};
if !matches {
return false;
}
let fixed_url = self.fixup_url_str(self.url_str);
let search_fn = self.get_search_fn();
let trimmed_url = util::slice_up_to(fixed_url.as_ref(), MAX_CHARS_TO_SEARCH_THROUGH);
let trimmed_title = util::slice_up_to(self.title_str, MAX_CHARS_TO_SEARCH_THROUGH);
for token in self.search_str.split_ascii_whitespace() {
let matches = match (
self.has_behavior(SearchBehavior::TITLE),
self.has_behavior(SearchBehavior::URL),
) {
(true, true) => {
(search_fn(token, trimmed_title) || search_fn(token, self.tags))
&& search_fn(token, trimmed_url)
}
(true, false) => search_fn(token, trimmed_title) || search_fn(token, self.tags),
(false, true) => search_fn(token, trimmed_url),
(false, false) => {
search_fn(token, trimmed_url)
|| search_fn(token, trimmed_title)
|| search_fn(token, self.tags)
}
};
if !matches {
return false;
}
}
true
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_is_ascii_lower_alpha() {
for c in 0u8..=255u8 {
assert_eq!(
is_ascii_lower_alpha(c),
b'a' <= c && c <= b'z',
"is_lower_ascii_alpha is wrong for {}",
c
);
}
}
#[test]
fn test_casing_assumptions() {
use std::char;
for c in 128..0x11_0000 {
if let Some(ch) = char::from_u32(c) {
let mut li = ch.to_lowercase();
let lc = li.next().unwrap();
if c != 304 && c != 8490 {
assert!(
(lc as u32) >= 128,
"Lower case of non-ascii '{}' ({}) was unexpectedly ascii",
ch,
c
);
assert!(
li.next().is_none(),
"Lower case of '{}' ({}) produced multiple codepoints unexpectedly",
ch,
c
);
} else {
assert!(
(lc as u32) < 128,
"Lower case of non-ascii '{}' ({}) was unexpectedly not ascii",
ch,
c
);
}
}
}
for c in 0..128 {
let ch = char::from_u32(c).unwrap();
let mut li = ch.to_lowercase();
let lc = li.next().unwrap();
assert!(
li.next().is_none() && (lc as u32) < 128,
"Lower case of ascii '{}' ({}) wasn't ascii :(",
ch,
c
);
}
for c in (b'a'..=b'z').chain(b'A'..=b'Z') {
assert_eq!(
dubious_to_ascii_lower(c),
c.to_ascii_lowercase(),
"c: '{}'",
c as char
);
}
}
}