diff --git a/Crawler.toml b/Crawler.toml index 8cb179b..01b0b3a 100644 --- a/Crawler.toml +++ b/Crawler.toml @@ -6,9 +6,7 @@ surreal_ns = "test" surreal_db = "v1.21.1" # Crawler config -# crawl_filter = "https://ftpgeoinfo.msl.mt.gov/Data/Spatial/MSDI/Imagery/2023_NAIP/UTM_County_Mosaics/" -crawl_filter = "https://oliveratkinson.net" -# start_url = "https://ftpgeoinfo.msl.mt.gov/Data/Spatial/MSDI/Imagery/2023_NAIP/UTM_County_Mosaics/" -start_url = "https://oliveratkinson.net" -budget = 1000 -batch_size = 500 +crawl_filter = "https://ftpgeoinfo.msl.mt.gov/Data/Spatial/MSDI" +start_url = "https://ftpgeoinfo.msl.mt.gov/Data/Spatial/MSDI" +budget = 10000 +batch_size = 50 diff --git a/docker/prometheus.yaml b/docker/prometheus.yaml index ffc1e24..111f2ff 100644 --- a/docker/prometheus.yaml +++ b/docker/prometheus.yaml @@ -7,7 +7,7 @@ scrape_configs: static_configs: # change this your machine's ip, localhost won't work # because localhost refers to the docker container. - - targets: ['172.20.239.48:2500'] + - targets: ['192.168.1.200:2500'] #- targets: ['192.168.8.209:2500'] - job_name: loki static_configs: diff --git a/src/main.rs b/src/main.rs index 289f351..72fe8bd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -275,7 +275,7 @@ async fn get_uncrawled_links( count = config.batch_size; } - debug!("Getting uncrawled links"); + debug!("Getting {} uncrawled links", count); let mut response = db .query("SELECT * FROM website WHERE crawled = false AND site ~ type::string($format) LIMIT $count;")