diff --git a/.gitignore b/.gitignore index dbaa546..87fb73a 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ perf.data flamegraph.svg perf.data.old -/docker/logs/* \ No newline at end of file +/docker/logs/* +/downloaded \ No newline at end of file diff --git a/Crawler.toml b/Crawler.toml index d536db5..0048238 100644 --- a/Crawler.toml +++ b/Crawler.toml @@ -3,14 +3,9 @@ surreal_url = "localhost:8000" surreal_username = "root" surreal_password = "root" surreal_ns = "test" -surreal_db = "v1.17" - -# Minio config -s3_bucket = "v1.17" -s3_url = "http://localhost:9000" -s3_access_key = "Ok6s9uQEvKrqRoGZdacm" -s3_secret_key = "qubeSkP787c7QZu4TvtnuwPTGIAq6ETPupCxvv6K" +surreal_db = "v1.19.5" # Crawler config -crawl_filter = "en.wikipedia.com" -budget = 1000 +crawl_filter = "en.wikipedia.org" +start_url = "https://en.wikipedia.org" +budget = 100 diff --git a/README.md b/README.md index 6b2890c..7ec5c6d 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,43 @@ Crawls sites saving all the found links to a surrealdb database. It then proceeds to take batches of 100 uncrawled links untill the crawl budget is reached. It saves the data of each site in a minio database. +## How to use + +1. Clone the repo and `cd` into it. +2. Build the repo with `cargo build -r` +3. Start the docker conatiners + 1. cd into the docker folder `cd docker` + 2. Bring up the docker containers `docker compose up -d` +4. From the project's root, edit the `Crawler.toml` file to your liking. +5. Run with `./target/release/internet_mapper` + +You can view stats of the project at `http://:3000/dashboards` + +```bash +# Untested script but probably works +git clone https://git.oliveratkinson.net/Oliver/internet_mapper.git +cd internet_mapper + +cargo build -r + +cd docker +docker compose up -d +cd .. + +$EDITOR Crawler.toml + +./target/release/internet_mapper + +``` + ### TODO -- [ ] Domain filtering - prevent the crawler from going on alternate versions of wikipedia. +- [x] Domain filtering - prevent the crawler from going on alternate versions of wikipedia. - [ ] Conditionally save content - based on filename or file contents - [x] GUI / TUI ? - Graphana - [x] Better asynchronous getting of the sites. Currently it all happens serially. -- [ ] Allow for storing asynchronously +- [x] Allow for storing asynchronously - dropping the "links to" logic fixes this need +- [x] Control crawler via config file (no recompliation needed) 3/17/25: Took >1hr to crawl 100 pages @@ -17,6 +47,8 @@ This ment we stored 1000 pages, 142,997 urls, and 1,425,798 links between the tw 3/20/25: Took 5min to crawl 1000 pages +3/21/25: Took 3min to crawl 1000 pages + # About ![Screenshot](/pngs/graphana.png) diff --git a/docker/compose.yml b/docker/compose.yml index 7cb6dcc..efe16f2 100644 --- a/docker/compose.yml +++ b/docker/compose.yml @@ -14,22 +14,6 @@ services: - --pass - root - rocksdb:/mydata/database.db - minio: - image: quay.io/minio/minio - ports: - - 9000:9000 - - 9001:9001 - environment: - - MINIO_ROOT_USER=root - - MINIO_ROOT_PASSWORD=an8charpassword - - MINIO_PROMETHEUS_AUTH_TYPE=public - volumes: - - minio_storage:/data - command: - - server - - /data - - --console-address - - ":9001" alloy: image: grafana/alloy:latest @@ -82,4 +66,3 @@ volumes: grafana_storage: alloy_storage: surrealdb_storage: - minio_storage: diff --git a/docker/prometheus.yaml b/docker/prometheus.yaml index cb43a89..ffc1e24 100644 --- a/docker/prometheus.yaml +++ b/docker/prometheus.yaml @@ -8,13 +8,10 @@ scrape_configs: # change this your machine's ip, localhost won't work # because localhost refers to the docker container. - targets: ['172.20.239.48:2500'] + #- targets: ['192.168.8.209:2500'] - job_name: loki static_configs: - targets: ['loki:3100'] - job_name: prometheus static_configs: - targets: ['localhost:9090'] - - job_name: minio - metrics_path: /minio/v2/metrics/cluster - static_configs: - - targets: ['minio:9000'] diff --git a/src/db.rs b/src/db.rs index deceb27..06cddde 100644 --- a/src/db.rs +++ b/src/db.rs @@ -1,34 +1,20 @@ -use base64::{ - alphabet, - engine::{self, general_purpose}, - Engine, -}; use metrics::counter; -use serde::{ser::SerializeStruct, Deserialize, Serialize}; -use std::{fmt::Debug, sync::LazyLock, time::Instant}; +use std::fmt::Debug; +use serde::{Deserialize, Serialize}; use surrealdb::{ engine::remote::ws::{Client, Ws}, opt::auth::Root, sql::Thing, - Response, Surreal, + Surreal, }; -use tokio::sync::Mutex; -use tracing::{error, instrument, trace, warn}; +use tracing::{error, instrument, trace}; use url::Url; use crate::Config; -// static LOCK: LazyLock>> = LazyLock::new(|| Arc::new(Mutex::new(true))); -static LOCK: LazyLock> = LazyLock::new(|| Mutex::new(true)); - -const CUSTOM_ENGINE: engine::GeneralPurpose = - engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD); - -const TIME_SPENT_ON_LOCK: &str = "surql_lock_waiting_ms"; const STORE: &str = "surql_store_calls"; -const LINK: &str = "surql_link_calls"; -#[derive(Deserialize, Clone)] +#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Hash)] pub struct Website { /// The url that this data is found at pub site: Url, @@ -36,18 +22,6 @@ pub struct Website { pub crawled: bool, } -impl Serialize for Website { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer { - let mut state = serializer.serialize_struct("Website", 2)?; - state.serialize_field("crawled", &self.crawled)?; - // to_string() calls the correct naming of site - state.serialize_field("site", &self.site.to_string())?; - state.end() - } -} - // manual impl to make tracing look nicer impl Debug for Website { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -73,82 +47,13 @@ impl Website { self.crawled = true } - pub fn get_url_as_string(site: &Url) -> String { - let domain = match site.domain() { - Some(s) => s.to_string(), - None => { - warn!("Failed to get domain of URL: {}, falling back to 'localhost'", site.to_string()); - "localhost".to_string() - } - }; - let path = site.path(); - - domain + path - } - - pub fn get_url_as_b64_path(site: &Url) -> String { - let domain = site.domain().unwrap_or("DOMAIN").to_string(); - let path = &CUSTOM_ENGINE.encode(site.path()); - - domain + path - } - - #[instrument(skip_all)] - pub async fn links_to(&self, other: Vec, db: &Surreal) { - let len = other.len(); - if len == 0 { - return; - } - - let from = &self.site; - - // let to = other.site.to_string(); - trace!("Linking {} pages to {from}", other.len()); - counter!(LINK).increment(1); - match db - .query("COUNT(RELATE (SELECT id FROM website WHERE site = $in) -> links_to -> $out)") - .bind(("in", from.clone())) - .bind(("out", other)) - .await - { - Ok(mut e) => { - // The relate could technically "fail" (not relate anything), this just means that - // the query was ok. - let _: Response = e; - if let Ok(vec) = e.take(0) { - let _: Vec = vec; - if let Some(num) = vec.first() { - if *num == len { - trace!("Link for {from} OK - {num}/{len}"); - return; - } else { - error!("Didn't link all the records. {num}/{len}. Surreal response: {:?}", e); - return; - } - } - } - error!("Linking request succeeded but couldn't verify the results."); - } - Err(e) => { - error!("{}", e.to_string()); - } - } - } - // Insert ever item in the vec into surreal, crawled state will be preserved as TRUE // if already in the database as such or incoming data is TRUE. + #[instrument(skip(db))] pub async fn store_all(all: Vec, db: &Surreal) -> Vec { counter!(STORE).increment(1); let mut things = Vec::with_capacity(all.len()); - // TODO this only allows for one thread to be in the database at a time. - // This is currently required since otherwise we get write errors. - // If the default `crawled` is set to false, we might not need to write any more - // than just the name. `accessed_at` is fun but not needed. - let now = Instant::now(); - let lock = LOCK.lock().await; - counter!(TIME_SPENT_ON_LOCK).increment(now.elapsed().as_millis() as u64); - match db .query( "INSERT INTO website $array @@ -169,7 +74,6 @@ impl Website { error!("{:?}", err); } } - drop(lock); things } } diff --git a/src/filesystem.rs b/src/filesystem.rs new file mode 100644 index 0000000..81ad057 --- /dev/null +++ b/src/filesystem.rs @@ -0,0 +1,60 @@ +use std::{ffi::OsStr, path::PathBuf}; + +use tokio::fs; +use tracing::{debug, error, instrument, trace, warn}; +use url::Url; + +#[instrument(skip(data))] +/// Returns whether or not the saved file should be parsed. +/// If the file is just data, like an image, it doesn't need to be parsed. +/// If it's html, then it does need to be parsed. +pub async fn store(data: &str, url: &Url) -> bool { + // extract data from url to save it accurately + let url_path = PathBuf::from("./downloaded/".to_string() + url.domain().unwrap_or("UnknownDomain") + url.path()); + + // if it's a file + let (basepath, filename) = if url_path.extension().filter(valid_file_extension).is_some() { + // get everything up till the file + let basepath = url_path.ancestors().skip(1).take(1).collect::(); + // get the file name + let filename = url_path.file_name().expect("This should exist").to_string_lossy(); + trace!("Save path: {:?} and base path: {:?}", &url_path, &basepath); + (basepath, filename.to_string()) + } else { + (url_path.clone(), "index.html".into()) + }; + + let should_parse = filename.ends_with(".html"); + + debug!("Writing at: {:?} {:?}", basepath, filename); + + // create the folders + if let Err(err) = fs::create_dir_all(&basepath).await { + error!("Dir creation: {err} {:?}", basepath); + } else { + if let Err(err) = fs::write(&basepath.join(filename), data).await { + error!("File creation: {err} {:?}", url_path); + } + } + + should_parse +} + +fn valid_file_extension(take: &&OsStr) -> bool { + let los = take.to_string_lossy(); + let all = los.split('.'); + match all.last() { + Some(s) => { + // FIXME it's worth noting that the dumb tlds like .zip are in here, + // which could cause problems + let all_domains = include_str!("tlds-alpha-by-domain.txt"); + + // check if it is a domain + match all_domains.lines().map(str::to_lowercase).find(|x| x==s.to_lowercase().as_str()) { + Some(_) => false, + None => true + } + }, + None => false, + } +} diff --git a/src/main.rs b/src/main.rs index fcdd10b..36ecb59 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,30 +3,29 @@ extern crate html5ever; use std::{ - fs::File, - io::Read, - net::{IpAddr, Ipv4Addr}, + collections::HashSet, fs::File, io::Read, net::{IpAddr, Ipv4Addr} }; use db::{connect, Website}; use metrics::{counter, gauge}; use metrics_exporter_prometheus::PrometheusBuilder; -use s3::S3; use serde::Deserialize; use surrealdb::{engine::remote::ws::Client, Surreal}; use tokio::task::JoinSet; -use tracing::{debug, error, info, instrument, trace, trace_span}; +use tracing::{debug, error, info, instrument, level_filters::LevelFilter, trace, trace_span}; use tracing_subscriber::{fmt, layer::SubscriberExt, EnvFilter, Layer, Registry}; mod db; mod parser; -mod s3; +mod filesystem; const GET_METRIC: &str = "total_gets"; const GET_IN_FLIGHT: &str = "gets_in_flight"; const SITES_CRAWLED: &str = "pages_crawled"; const BEING_PROCESSED: &str = "pages_being_processed"; +const BATCH_SIZE: usize = 2; + #[derive(Deserialize)] struct Config { surreal_ns: String, @@ -35,24 +34,24 @@ struct Config { surreal_username: String, surreal_password: String, - s3_url: String, - s3_bucket: String, - s3_access_key: String, - s3_secret_key: String, - crawl_filter: String, + start_url: String, budget: usize, } #[tokio::main] async fn main() { + println!("Logs and metrics are provided to the Grafana dashboard"); + let writer = std::fs::OpenOptions::new() .append(true) .create(true) .open("./docker/logs/tracing.log") .expect("Couldn't make log file!"); - let filter = EnvFilter::from_default_env(); + let filter = EnvFilter::builder() + .with_default_directive(LevelFilter::DEBUG.into()) + .from_env_lossy(); let registry = Registry::default().with( fmt::layer() @@ -75,9 +74,8 @@ async fn main() { .install() .expect("failed to install recorder/exporter"); - debug!("Starting..."); - // Would probably take these in as parameters from a cli - let starting_url = "https://en.wikipedia.org/"; + info!("Starting..."); + // When getting uncrawled pages, name must contain this variable. "" will effectively get ignored. // let crawl_filter = "en.wikipedia.org/"; // let budget = 50; @@ -88,13 +86,11 @@ async fn main() { let _ = file.read_to_string(&mut buf); let config: Config = toml::from_str(&buf).expect("Failed to parse Crawler.toml"); + let starting_url = &config.start_url; let db = connect(&config) .await .expect("Failed to connect to surreal, aborting."); - let s3 = S3::connect(&config) - .await - .expect("Failed to connect to minio, aborting.\n\nThis probably means you need to login to the minio console and get a new access key!\n\n(Probably here) http://localhost:9001/access-keys/new-account\n\n"); let reqwest = reqwest::Client::builder() // .use_rustls_tls() @@ -108,38 +104,27 @@ async fn main() { let pre_loop_span = span.enter(); // Download the site let site = Website::new(starting_url, false); - process(site, db.clone(), reqwest.clone(), s3.clone()).await; + process(site, db.clone(), reqwest.clone()).await; drop(pre_loop_span); let span = trace_span!("Loop"); let span = span.enter(); while crawled < config.budget { - let get_num = if config.budget - crawled < 100 { - config.budget - crawled - } else { - 100 - }; - - let uncrawled = get_uncrawled_links(&db, get_num, config.crawl_filter.clone()).await; + let uncrawled = get_uncrawled_links(&db, config.budget - crawled, config.crawl_filter.clone()).await; if uncrawled.is_empty() { info!("Had more budget but finished crawling everything."); return; } - debug!("Crawling {} pages...", uncrawled.len()); - - let span = trace_span!("Crawling"); - let _ = span.enter(); { let mut futures = JoinSet::new(); for site in uncrawled { gauge!(BEING_PROCESSED).increment(1); - futures.spawn(process(site, db.clone(), reqwest.clone(), s3.clone())); + futures.spawn(process(site, db.clone(), reqwest.clone())); // let percent = format!("{:.2}%", (crawled as f32 / budget as f32) * 100f32); // info!("Crawled {crawled} out of {budget} pages. ({percent})"); } - debug!("Joining {} futures...", futures.len()); let c = counter!(SITES_CRAWLED); // As futures complete runs code in while block @@ -152,13 +137,22 @@ async fn main() { } drop(span); - debug!("Done"); + if let Ok(mut ok) = db.query("count(select id from website where crawled = true)").await { + let res = ok.take::>(0); + if let Ok(i) = res { + if let Some(n) = i { + info!("Total crawled pages now equals {n}"); + } + } + } + + info!("Done"); } -#[instrument(skip(db, s3, reqwest))] +#[instrument(skip(db, reqwest))] /// Downloads and crawls and stores a webpage. /// It is acceptable to clone `db`, `reqwest`, and `s3` because they all use `Arc`s internally. - Noted by Oliver -async fn process(mut site: Website, db: Surreal, reqwest: reqwest::Client, s3: S3) { +async fn process(mut site: Website, db: Surreal, reqwest: reqwest::Client) { // METRICS trace!("Process: {}", &site.site); @@ -172,31 +166,44 @@ async fn process(mut site: Website, db: Surreal, reqwest: reqwest::Clien // Send the http request (get) if let Ok(response) = request_builder.send().await { - // METRICS - g.decrement(1); - counter!(GET_METRIC).increment(1); - + // TODO if this will fail if the object we are downloading is + // larger than the memory of the device it's running on. + // We should store it *as* we download it then parse it in-place. // Get body from response let data = response .text() .await .expect("Failed to read http response's body!"); - // Store document - s3.store(&data, &site.site).await; - // Parse document and get relationships - let sites = parser::parse(&site, &data).await; + // METRICS + g.decrement(1); + counter!(GET_METRIC).increment(1); + + // Store document + let should_parse = filesystem::store(&data, &site.site).await; + + if should_parse { + // Parse document and get relationships + let sites = parser::parse(&site, &data).await; + // De-duplicate this list + let prev_len = sites.len(); + let set = sites.into_iter().fold(HashSet::new(), |mut set,item| { + set.insert(item); + set + }); + let de_dupe_sites: Vec = set.into_iter().collect(); + let diff = prev_len - de_dupe_sites.len(); + trace!("Saved {diff} from being entered into the db by de-duping"); + + // Store all the other sites so that we can link to them. + let _ = Website::store_all(de_dupe_sites, &db).await; + } + // update self in db site.set_crawled(); Website::store_all(vec![site], &db).await; - // Store all the other sites so that we can link to them. - // let mut links_to = Vec::new(); - let _ = Website::store_all(sites, &db).await; - - // Make the database's links reflect the html links between sites - // site.links_to(others, &db).await; } else { error!("Failed to get: {}", &site.site); } @@ -209,9 +216,11 @@ async fn get_uncrawled_links( mut count: usize, filter: String, ) -> Vec { - if count > 100 { - count = 100 + + if count > BATCH_SIZE { + count = BATCH_SIZE; } + debug!("Getting uncrawled links"); let mut response = db diff --git a/src/parser.rs b/src/parser.rs index 3fc1276..c1e87e2 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -5,18 +5,21 @@ use html5ever::tokenizer::{BufferQueue, TokenizerResult}; use html5ever::tokenizer::{StartTag, TagToken}; use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts}; use html5ever::{local_name, tendril::*}; -use tracing::instrument; +use tracing::{debug, error, instrument, trace, warn}; +use url::Url; use crate::db::Website; impl TokenSink for Website { type Handle = Vec; + #[instrument(skip(token, _line_number))] fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult { match token { TagToken(tag) => { if tag.kind == StartTag { match tag.name { + // this should be all the html elements that have links local_name!("a") | local_name!("audio") | local_name!("area") @@ -31,23 +34,18 @@ impl TokenSink for Website { let attr_name = attr.name.local.to_string(); if attr_name == "src" || attr_name == "href" || attr_name == "data" { - // Get clone of the current site object - let mut web = self.clone(); + trace!("Found `{}` in html `{}` tag", &attr.value, tag.name); + let url = try_get_url(&self.site, &attr.value); - // Set url - let mut url = web.site; - url.set_fragment(None); // removes #xyz - let joined = url - .join(&attr.value) - .expect("Failed to join url during parsing!"); - web.site = joined; - - web.crawled = false; - - links.push(web); + if let Some(mut parsed) = url { + parsed.set_query(None); + parsed.set_fragment(None); + debug!("Final cleaned URL: `{}`", parsed.to_string()); + let web = Website::new(&parsed.to_string(), false); + links.push(web); + } } } - return TokenSinkResult::Script(links); } local_name!("button") | local_name!("meta") | local_name!("iframe") => { @@ -76,7 +74,7 @@ pub async fn parse(site: &Website, data: &str) -> Vec { token_buffer.push_back( chunk .try_reinterpret::() - .expect("Failed to reinterprt chunk!"), + .expect("Failed to reinterpret chunk!"), ); // create the tokenizer let tokenizer = Tokenizer::new(site.clone(), TokenizerOpts::default()); @@ -92,3 +90,56 @@ pub async fn parse(site: &Website, data: &str) -> Vec { other_sites } +#[instrument] +fn try_get_url(parent: &Url, link: &str) -> Option { + match Url::parse(link) { + Ok(ok) => Some(ok), + Err(e) => { + if link.starts_with('#') { + trace!("Rejecting # url"); + None + } else if link.starts_with("//") { + // if a url starts with "//" is assumed that it will adopt + // the same scheme as it's parent + // https://stackoverflow.com/questions/9646407/two-forward-slashes-in-a-url-src-href-attribute + let scheme = parent.scheme(); + + match Url::parse(&format!("{scheme}://{}", link)) { + Ok(url) => Some(url), + Err(err) => { + error!("Failed parsing realative scheme url: {}", err); + None + } + } + } else { + // # This is some sort of realative url, gonna try patching it up into an absolute + // url + match e { + url::ParseError::RelativeUrlWithoutBase => { + // Is: scheme://host:port + let origin = parent.origin().ascii_serialization(); + let url = origin.clone() + link; + + trace!("Built `{url}` from `{origin} + {}`", link.to_string()); + + if let Ok(url) = Url::parse(&url) { + trace!("Saved relative url `{}` AS: `{}`", link, url); + Some(url) + } else { + error!( + "Failed to reconstruct a url from relative url: `{}` on site: `{}`", + link, + parent.to_string() + ); + None + } + } + _ => { + error!("MISC error: {:?} {:?}", e, link); + None + } + } + } + } + } +} diff --git a/src/s3.rs b/src/s3.rs deleted file mode 100644 index 4efc2b4..0000000 --- a/src/s3.rs +++ /dev/null @@ -1,100 +0,0 @@ -use metrics::counter; -use minio::s3::{ - args::{BucketExistsArgs, MakeBucketArgs}, - client::ClientBuilder, - creds::StaticProvider, - error::Error, - http::BaseUrl, - Client, -}; -use tracing::{instrument, trace, warn}; -use url::Url; - -use crate::{db::Website, Config}; - -const S3_ROUND_TRIP_METRIC: &str = "s3_trips"; - -#[derive(Clone)] -pub struct S3 { - bucket_name: String, - client: Client, -} - -impl S3 { - #[instrument(skip_all, name = "S3")] - pub async fn connect(config: &Config) -> Result { - let base_url = config - .s3_url - .parse::() - .expect("Failed to parse url into BaseUrl"); - - let static_provider = - StaticProvider::new(&config.s3_access_key, &config.s3_secret_key, None); - - let client = ClientBuilder::new(base_url) - .provider(Some(Box::new(static_provider))) - .build()?; - - trace!("Checking bucket..."); - let exists = client - .bucket_exists( - &BucketExistsArgs::new(&config.s3_bucket) - .expect("Failed to check if bucket exists"), - ) - .await?; - counter!(S3_ROUND_TRIP_METRIC).increment(1); - - if !exists { - trace!("Creating bucket..."); - client - .make_bucket( - &MakeBucketArgs::new(&config.s3_bucket).expect("Failed to create bucket!"), - ) - .await?; - } - counter!(S3_ROUND_TRIP_METRIC).increment(1); - - trace!("Connection successful"); - - Ok(Self { - bucket_name: config.s3_bucket.to_owned(), - client, - }) - } - - #[instrument(name = "s3_store", skip_all)] - pub async fn store(&self, data: &str, url: &Url) { - let counter = counter!(S3_ROUND_TRIP_METRIC); - - let filename = Website::get_url_as_string(url); - trace!("Storing {} as {filename}", url.to_string()); - - counter.increment(1); - match &self - .client - .put_object_content(&self.bucket_name, &filename, data.to_owned()) - .send() - .await - { - Ok(_) => {} - Err(err) => match err { - Error::InvalidObjectName(_) => { - // This code will really only run if the url has non-english chars - warn!("Tried storing invalid object name, retrying with Base64 encoding. Last try."); - - let filename: String = Website::get_url_as_b64_path(url); - - counter.increment(1); - let _ = &self - .client - .put_object_content(&self.bucket_name, &filename, data.to_owned()) - .send() - .await - .unwrap(); - } - _ => {} - }, - }; - } -} - diff --git a/src/tlds-alpha-by-domain.txt b/src/tlds-alpha-by-domain.txt new file mode 100644 index 0000000..327f649 --- /dev/null +++ b/src/tlds-alpha-by-domain.txt @@ -0,0 +1,1444 @@ +# Version 2025041500, Last Updated Tue Apr 15 07:07:01 2025 UTC +AAA +AARP +ABB +ABBOTT +ABBVIE +ABC +ABLE +ABOGADO +ABUDHABI +AC +ACADEMY +ACCENTURE +ACCOUNTANT +ACCOUNTANTS +ACO +ACTOR +AD +ADS +ADULT +AE +AEG +AERO +AETNA +AF +AFL +AFRICA +AG +AGAKHAN +AGENCY +AI +AIG +AIRBUS +AIRFORCE +AIRTEL +AKDN +AL +ALIBABA +ALIPAY +ALLFINANZ +ALLSTATE +ALLY +ALSACE +ALSTOM +AM +AMAZON +AMERICANEXPRESS +AMERICANFAMILY +AMEX +AMFAM +AMICA +AMSTERDAM +ANALYTICS +ANDROID +ANQUAN +ANZ +AO +AOL +APARTMENTS +APP +APPLE +AQ +AQUARELLE +AR +ARAB +ARAMCO +ARCHI +ARMY +ARPA +ART +ARTE +AS +ASDA +ASIA +ASSOCIATES +AT +ATHLETA +ATTORNEY +AU +AUCTION +AUDI +AUDIBLE +AUDIO +AUSPOST +AUTHOR +AUTO +AUTOS +AW +AWS +AX +AXA +AZ +AZURE +BA +BABY +BAIDU +BANAMEX +BAND +BANK +BAR +BARCELONA +BARCLAYCARD +BARCLAYS +BAREFOOT +BARGAINS +BASEBALL +BASKETBALL +BAUHAUS +BAYERN +BB +BBC +BBT +BBVA +BCG +BCN +BD +BE +BEATS +BEAUTY +BEER +BENTLEY +BERLIN +BEST +BESTBUY +BET +BF +BG +BH +BHARTI +BI +BIBLE +BID +BIKE +BING +BINGO +BIO +BIZ +BJ +BLACK +BLACKFRIDAY +BLOCKBUSTER +BLOG +BLOOMBERG +BLUE +BM +BMS +BMW +BN +BNPPARIBAS +BO +BOATS +BOEHRINGER +BOFA +BOM +BOND +BOO +BOOK +BOOKING +BOSCH +BOSTIK +BOSTON +BOT +BOUTIQUE +BOX +BR +BRADESCO +BRIDGESTONE +BROADWAY +BROKER +BROTHER +BRUSSELS +BS +BT +BUILD +BUILDERS +BUSINESS +BUY +BUZZ +BV +BW +BY +BZ +BZH +CA +CAB +CAFE +CAL +CALL +CALVINKLEIN +CAM +CAMERA +CAMP +CANON +CAPETOWN +CAPITAL +CAPITALONE +CAR +CARAVAN +CARDS +CARE +CAREER +CAREERS +CARS +CASA +CASE +CASH +CASINO +CAT +CATERING +CATHOLIC +CBA +CBN +CBRE +CC +CD +CENTER +CEO +CERN +CF +CFA +CFD +CG +CH +CHANEL +CHANNEL +CHARITY +CHASE +CHAT +CHEAP +CHINTAI +CHRISTMAS +CHROME +CHURCH +CI +CIPRIANI +CIRCLE +CISCO +CITADEL +CITI +CITIC +CITY +CK +CL +CLAIMS +CLEANING +CLICK +CLINIC +CLINIQUE +CLOTHING +CLOUD +CLUB +CLUBMED +CM +CN +CO +COACH +CODES +COFFEE +COLLEGE +COLOGNE +COM +COMMBANK +COMMUNITY +COMPANY +COMPARE +COMPUTER +COMSEC +CONDOS +CONSTRUCTION +CONSULTING +CONTACT +CONTRACTORS +COOKING +COOL +COOP +CORSICA +COUNTRY +COUPON +COUPONS +COURSES +CPA +CR +CREDIT +CREDITCARD +CREDITUNION +CRICKET +CROWN +CRS +CRUISE +CRUISES +CU +CUISINELLA +CV +CW +CX +CY +CYMRU +CYOU +CZ +DAD +DANCE +DATA +DATE +DATING +DATSUN +DAY +DCLK +DDS +DE +DEAL +DEALER +DEALS +DEGREE +DELIVERY +DELL +DELOITTE +DELTA +DEMOCRAT +DENTAL +DENTIST +DESI +DESIGN +DEV +DHL +DIAMONDS +DIET +DIGITAL +DIRECT +DIRECTORY +DISCOUNT +DISCOVER +DISH +DIY +DJ +DK +DM +DNP +DO +DOCS +DOCTOR +DOG +DOMAINS +DOT +DOWNLOAD +DRIVE +DTV +DUBAI +DUNLOP +DUPONT +DURBAN +DVAG +DVR +DZ +EARTH +EAT +EC +ECO +EDEKA +EDU +EDUCATION +EE +EG +EMAIL +EMERCK +ENERGY +ENGINEER +ENGINEERING +ENTERPRISES +EPSON +EQUIPMENT +ER +ERICSSON +ERNI +ES +ESQ +ESTATE +ET +EU +EUROVISION +EUS +EVENTS +EXCHANGE +EXPERT +EXPOSED +EXPRESS +EXTRASPACE +FAGE +FAIL +FAIRWINDS +FAITH +FAMILY +FAN +FANS +FARM +FARMERS +FASHION +FAST +FEDEX +FEEDBACK +FERRARI +FERRERO +FI +FIDELITY +FIDO +FILM +FINAL +FINANCE +FINANCIAL +FIRE +FIRESTONE +FIRMDALE +FISH +FISHING +FIT +FITNESS +FJ +FK +FLICKR +FLIGHTS +FLIR +FLORIST +FLOWERS +FLY +FM +FO +FOO +FOOD +FOOTBALL +FORD +FOREX +FORSALE +FORUM +FOUNDATION +FOX +FR +FREE +FRESENIUS +FRL +FROGANS +FRONTIER +FTR +FUJITSU +FUN +FUND +FURNITURE +FUTBOL +FYI +GA +GAL +GALLERY +GALLO +GALLUP +GAME +GAMES +GAP +GARDEN +GAY +GB +GBIZ +GD +GDN +GE +GEA +GENT +GENTING +GEORGE +GF +GG +GGEE +GH +GI +GIFT +GIFTS +GIVES +GIVING +GL +GLASS +GLE +GLOBAL +GLOBO +GM +GMAIL +GMBH +GMO +GMX +GN +GODADDY +GOLD +GOLDPOINT +GOLF +GOO +GOODYEAR +GOOG +GOOGLE +GOP +GOT +GOV +GP +GQ +GR +GRAINGER +GRAPHICS +GRATIS +GREEN +GRIPE +GROCERY +GROUP +GS +GT +GU +GUCCI +GUGE +GUIDE +GUITARS +GURU +GW +GY +HAIR +HAMBURG +HANGOUT +HAUS +HBO +HDFC +HDFCBANK +HEALTH +HEALTHCARE +HELP +HELSINKI +HERE +HERMES +HIPHOP +HISAMITSU +HITACHI +HIV +HK +HKT +HM +HN +HOCKEY +HOLDINGS +HOLIDAY +HOMEDEPOT +HOMEGOODS +HOMES +HOMESENSE +HONDA +HORSE +HOSPITAL +HOST +HOSTING +HOT +HOTELS +HOTMAIL +HOUSE +HOW +HR +HSBC +HT +HU +HUGHES +HYATT +HYUNDAI +IBM +ICBC +ICE +ICU +ID +IE +IEEE +IFM +IKANO +IL +IM +IMAMAT +IMDB +IMMO +IMMOBILIEN +IN +INC +INDUSTRIES +INFINITI +INFO +ING +INK +INSTITUTE +INSURANCE +INSURE +INT +INTERNATIONAL +INTUIT +INVESTMENTS +IO +IPIRANGA +IQ +IR +IRISH +IS +ISMAILI +IST +ISTANBUL +IT +ITAU +ITV +JAGUAR +JAVA +JCB +JE +JEEP +JETZT +JEWELRY +JIO +JLL +JM +JMP +JNJ +JO +JOBS +JOBURG +JOT +JOY +JP +JPMORGAN +JPRS +JUEGOS +JUNIPER +KAUFEN +KDDI +KE +KERRYHOTELS +KERRYPROPERTIES +KFH +KG +KH +KI +KIA +KIDS +KIM +KINDLE +KITCHEN +KIWI +KM +KN +KOELN +KOMATSU +KOSHER +KP +KPMG +KPN +KR +KRD +KRED +KUOKGROUP +KW +KY +KYOTO +KZ +LA +LACAIXA +LAMBORGHINI +LAMER +LANCASTER +LAND +LANDROVER +LANXESS +LASALLE +LAT +LATINO +LATROBE +LAW +LAWYER +LB +LC +LDS +LEASE +LECLERC +LEFRAK +LEGAL +LEGO +LEXUS +LGBT +LI +LIDL +LIFE +LIFEINSURANCE +LIFESTYLE +LIGHTING +LIKE +LILLY +LIMITED +LIMO +LINCOLN +LINK +LIVE +LIVING +LK +LLC +LLP +LOAN +LOANS +LOCKER +LOCUS +LOL +LONDON +LOTTE +LOTTO +LOVE +LPL +LPLFINANCIAL +LR +LS +LT +LTD +LTDA +LU +LUNDBECK +LUXE +LUXURY +LV +LY +MA +MADRID +MAIF +MAISON +MAKEUP +MAN +MANAGEMENT +MANGO +MAP +MARKET +MARKETING +MARKETS +MARRIOTT +MARSHALLS +MATTEL +MBA +MC +MCKINSEY +MD +ME +MED +MEDIA +MEET +MELBOURNE +MEME +MEMORIAL +MEN +MENU +MERCKMSD +MG +MH +MIAMI +MICROSOFT +MIL +MINI +MINT +MIT +MITSUBISHI +MK +ML +MLB +MLS +MM +MMA +MN +MO +MOBI +MOBILE +MODA +MOE +MOI +MOM +MONASH +MONEY +MONSTER +MORMON +MORTGAGE +MOSCOW +MOTO +MOTORCYCLES +MOV +MOVIE +MP +MQ +MR +MS +MSD +MT +MTN +MTR +MU +MUSEUM +MUSIC +MV +MW +MX +MY +MZ +NA +NAB +NAGOYA +NAME +NAVY +NBA +NC +NE +NEC +NET +NETBANK +NETFLIX +NETWORK +NEUSTAR +NEW +NEWS +NEXT +NEXTDIRECT +NEXUS +NF +NFL +NG +NGO +NHK +NI +NICO +NIKE +NIKON +NINJA +NISSAN +NISSAY +NL +NO +NOKIA +NORTON +NOW +NOWRUZ +NOWTV +NP +NR +NRA +NRW +NTT +NU +NYC +NZ +OBI +OBSERVER +OFFICE +OKINAWA +OLAYAN +OLAYANGROUP +OLLO +OM +OMEGA +ONE +ONG +ONL +ONLINE +OOO +OPEN +ORACLE +ORANGE +ORG +ORGANIC +ORIGINS +OSAKA +OTSUKA +OTT +OVH +PA +PAGE +PANASONIC +PARIS +PARS +PARTNERS +PARTS +PARTY +PAY +PCCW +PE +PET +PF +PFIZER +PG +PH +PHARMACY +PHD +PHILIPS +PHONE +PHOTO +PHOTOGRAPHY +PHOTOS +PHYSIO +PICS +PICTET +PICTURES +PID +PIN +PING +PINK +PIONEER +PIZZA +PK +PL +PLACE +PLAY +PLAYSTATION +PLUMBING +PLUS +PM +PN +PNC +POHL +POKER +POLITIE +PORN +POST +PR +PRAMERICA +PRAXI +PRESS +PRIME +PRO +PROD +PRODUCTIONS +PROF +PROGRESSIVE +PROMO +PROPERTIES +PROPERTY +PROTECTION +PRU +PRUDENTIAL +PS +PT +PUB +PW +PWC +PY +QA +QPON +QUEBEC +QUEST +RACING +RADIO +RE +READ +REALESTATE +REALTOR +REALTY +RECIPES +RED +REDSTONE +REDUMBRELLA +REHAB +REISE +REISEN +REIT +RELIANCE +REN +RENT +RENTALS +REPAIR +REPORT +REPUBLICAN +REST +RESTAURANT +REVIEW +REVIEWS +REXROTH +RICH +RICHARDLI +RICOH +RIL +RIO +RIP +RO +ROCKS +RODEO +ROGERS +ROOM +RS +RSVP +RU +RUGBY +RUHR +RUN +RW +RWE +RYUKYU +SA +SAARLAND +SAFE +SAFETY +SAKURA +SALE +SALON +SAMSCLUB +SAMSUNG +SANDVIK +SANDVIKCOROMANT +SANOFI +SAP +SARL +SAS +SAVE +SAXO +SB +SBI +SBS +SC +SCB +SCHAEFFLER +SCHMIDT +SCHOLARSHIPS +SCHOOL +SCHULE +SCHWARZ +SCIENCE +SCOT +SD +SE +SEARCH +SEAT +SECURE +SECURITY +SEEK +SELECT +SENER +SERVICES +SEVEN +SEW +SEX +SEXY +SFR +SG +SH +SHANGRILA +SHARP +SHELL +SHIA +SHIKSHA +SHOES +SHOP +SHOPPING +SHOUJI +SHOW +SI +SILK +SINA +SINGLES +SITE +SJ +SK +SKI +SKIN +SKY +SKYPE +SL +SLING +SM +SMART +SMILE +SN +SNCF +SO +SOCCER +SOCIAL +SOFTBANK +SOFTWARE +SOHU +SOLAR +SOLUTIONS +SONG +SONY +SOY +SPA +SPACE +SPORT +SPOT +SR +SRL +SS +ST +STADA +STAPLES +STAR +STATEBANK +STATEFARM +STC +STCGROUP +STOCKHOLM +STORAGE +STORE +STREAM +STUDIO +STUDY +STYLE +SU +SUCKS +SUPPLIES +SUPPLY +SUPPORT +SURF +SURGERY +SUZUKI +SV +SWATCH +SWISS +SX +SY +SYDNEY +SYSTEMS +SZ +TAB +TAIPEI +TALK +TAOBAO +TARGET +TATAMOTORS +TATAR +TATTOO +TAX +TAXI +TC +TCI +TD +TDK +TEAM +TECH +TECHNOLOGY +TEL +TEMASEK +TENNIS +TEVA +TF +TG +TH +THD +THEATER +THEATRE +TIAA +TICKETS +TIENDA +TIPS +TIRES +TIROL +TJ +TJMAXX +TJX +TK +TKMAXX +TL +TM +TMALL +TN +TO +TODAY +TOKYO +TOOLS +TOP +TORAY +TOSHIBA +TOTAL +TOURS +TOWN +TOYOTA +TOYS +TR +TRADE +TRADING +TRAINING +TRAVEL +TRAVELERS +TRAVELERSINSURANCE +TRUST +TRV +TT +TUBE +TUI +TUNES +TUSHU +TV +TVS +TW +TZ +UA +UBANK +UBS +UG +UK +UNICOM +UNIVERSITY +UNO +UOL +UPS +US +UY +UZ +VA +VACATIONS +VANA +VANGUARD +VC +VE +VEGAS +VENTURES +VERISIGN +VERSICHERUNG +VET +VG +VI +VIAJES +VIDEO +VIG +VIKING +VILLAS +VIN +VIP +VIRGIN +VISA +VISION +VIVA +VIVO +VLAANDEREN +VN +VODKA +VOLVO +VOTE +VOTING +VOTO +VOYAGE +VU +WALES +WALMART +WALTER +WANG +WANGGOU +WATCH +WATCHES +WEATHER +WEATHERCHANNEL +WEBCAM +WEBER +WEBSITE +WED +WEDDING +WEIBO +WEIR +WF +WHOSWHO +WIEN +WIKI +WILLIAMHILL +WIN +WINDOWS +WINE +WINNERS +WME +WOLTERSKLUWER +WOODSIDE +WORK +WORKS +WORLD +WOW +WS +WTC +WTF +XBOX +XEROX +XIHUAN +XIN +XN--11B4C3D +XN--1CK2E1B +XN--1QQW23A +XN--2SCRJ9C +XN--30RR7Y +XN--3BST00M +XN--3DS443G +XN--3E0B707E +XN--3HCRJ9C +XN--3PXU8K +XN--42C2D9A +XN--45BR5CYL +XN--45BRJ9C +XN--45Q11C +XN--4DBRK0CE +XN--4GBRIM +XN--54B7FTA0CC +XN--55QW42G +XN--55QX5D +XN--5SU34J936BGSG +XN--5TZM5G +XN--6FRZ82G +XN--6QQ986B3XL +XN--80ADXHKS +XN--80AO21A +XN--80AQECDR1A +XN--80ASEHDB +XN--80ASWG +XN--8Y0A063A +XN--90A3AC +XN--90AE +XN--90AIS +XN--9DBQ2A +XN--9ET52U +XN--9KRT00A +XN--B4W605FERD +XN--BCK1B9A5DRE4C +XN--C1AVG +XN--C2BR7G +XN--CCK2B3B +XN--CCKWCXETD +XN--CG4BKI +XN--CLCHC0EA0B2G2A9GCD +XN--CZR694B +XN--CZRS0T +XN--CZRU2D +XN--D1ACJ3B +XN--D1ALF +XN--E1A4C +XN--ECKVDTC9D +XN--EFVY88H +XN--FCT429K +XN--FHBEI +XN--FIQ228C5HS +XN--FIQ64B +XN--FIQS8S +XN--FIQZ9S +XN--FJQ720A +XN--FLW351E +XN--FPCRJ9C3D +XN--FZC2C9E2C +XN--FZYS8D69UVGM +XN--G2XX48C +XN--GCKR3F0F +XN--GECRJ9C +XN--GK3AT1E +XN--H2BREG3EVE +XN--H2BRJ9C +XN--H2BRJ9C8C +XN--HXT814E +XN--I1B6B1A6A2E +XN--IMR513N +XN--IO0A7I +XN--J1AEF +XN--J1AMH +XN--J6W193G +XN--JLQ480N2RG +XN--JVR189M +XN--KCRX77D1X4A +XN--KPRW13D +XN--KPRY57D +XN--KPUT3I +XN--L1ACC +XN--LGBBAT1AD8J +XN--MGB9AWBF +XN--MGBA3A3EJT +XN--MGBA3A4F16A +XN--MGBA7C0BBN0A +XN--MGBAAM7A8H +XN--MGBAB2BD +XN--MGBAH1A3HJKRD +XN--MGBAI9AZGQP6J +XN--MGBAYH7GPA +XN--MGBBH1A +XN--MGBBH1A71E +XN--MGBC0A9AZCG +XN--MGBCA7DZDO +XN--MGBCPQ6GPA1A +XN--MGBERP4A5D4AR +XN--MGBGU82A +XN--MGBI4ECEXP +XN--MGBPL2FH +XN--MGBT3DHD +XN--MGBTX2B +XN--MGBX4CD0AB +XN--MIX891F +XN--MK1BU44C +XN--MXTQ1M +XN--NGBC5AZD +XN--NGBE9E0A +XN--NGBRX +XN--NODE +XN--NQV7F +XN--NQV7FS00EMA +XN--NYQY26A +XN--O3CW4H +XN--OGBPF8FL +XN--OTU796D +XN--P1ACF +XN--P1AI +XN--PGBS0DH +XN--PSSY2U +XN--Q7CE6A +XN--Q9JYB4C +XN--QCKA1PMC +XN--QXA6A +XN--QXAM +XN--RHQV96G +XN--ROVU88B +XN--RVC1E0AM3E +XN--S9BRJ9C +XN--SES554G +XN--T60B56A +XN--TCKWE +XN--TIQ49XQYJ +XN--UNUP4Y +XN--VERMGENSBERATER-CTB +XN--VERMGENSBERATUNG-PWB +XN--VHQUV +XN--VUQ861B +XN--W4R85EL8FHU5DNRA +XN--W4RS40L +XN--WGBH1C +XN--WGBL6A +XN--XHQ521B +XN--XKC2AL3HYE2A +XN--XKC2DL3A5EE0H +XN--Y9A3AQ +XN--YFRO4I67O +XN--YGBI2AMMX +XN--ZFR164B +XXX +XYZ +YACHTS +YAHOO +YAMAXUN +YANDEX +YE +YODOBASHI +YOGA +YOKOHAMA +YOU +YOUTUBE +YT +YUN +ZA +ZAPPOS +ZARA +ZERO +ZIP +ZM +ZONE +ZUERICH +ZW