From fc3a1f184a908c076d342f92e65e53401dd5a4a3 Mon Sep 17 00:00:00 2001 From: Mike C Date: Sun, 25 Feb 2018 16:50:00 -0500 Subject: [PATCH] Update azure notes, readme, add download tool for caching source maps --- README.md | 80 ++++++++++++++++++++++++++++++++++++++++-------- azure_notes.md | 5 ++- download_maps.py | 78 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 150 insertions(+), 13 deletions(-) create mode 100755 download_maps.py diff --git a/README.md b/README.md index c79dc11..cde9868 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ -Overview -= +# Overview + This project contains useful info/tools for processing OSM maps into the mapwriter file format (.map). It was developed to help generating offline map files for Locus on Android. -Dependencies / Prereqs -= +# Dependencies / Prereqs + To run this program you'll need the following * A Linux environment @@ -12,16 +12,16 @@ To run this program you'll need the following * wget installed * bunzip2 installed (usually part of bzip2 package) -Installation -= +# Installation + Head over to releases tab and download the latest. Extract the file and you're good to go. Everything is self-contained minus the above dependencies. -Running -= +# Running + To run the program, cd to the directory where you extracted the release and run './process_maps.py' with at least one '--map-list /path/to/list.txt parameter'. See the lists directory for examples on how to format the map list files. -Program Usage -= +### Program Usage + ``` usage: process_maps.py [-h] [--map-list MAP_LIST] [--no-sleep] [--use-ram] [--max-heap-space MAX_HEAP_SPACE] @@ -50,6 +50,62 @@ optional arguments: downloads using map lists ``` -Licencing -= +### Examples + +``` +./process_maps.py --max-heap-space 48g \ + --output-map-name central_america \ + --cached-maps-dir ~/osmmapdata/cache/central_america/20180225-1429 + +./process_maps.py --use-ram --max-heap-space 48g \ + --output-map-name midwest \ + --cached-maps-dir ~/osmmapdata/cache/midwest/20180225-1429 + +./process_maps.py --use-ram --max-heap-space 48g \ + --output-map-name south_america \ + --cached-maps-dir ~/osmmapdata/cache/south_america/20180225-1429 +``` + +# Caching maps for re-use / re-processing + +Included in the repo there is a ```download_maps.py``` script that is meant to help with downloading maps on a schedule or for caching. It uses the same logic as the main script but for download only. + +### Program usage + +``` +usage: download_maps.py [-h] [--map-list MAP_LIST] [--no-sleep] + [--output-map-name OUTPUT_MAP_NAME] + [--cached-maps-dir CACHED_MAPS_DIR] + +optional arguments: + -h, --help show this help message and exit + --map-list MAP_LIST a text file with one map URL per line, can be + specified more than once + --no-sleep don't sleep between downloads -- WARNING you can + easily run into throttling on mirrors if you use this + option + --output-map-name OUTPUT_MAP_NAME + set the name of the map directory before Ymd-HM + --cached-maps-dir CACHED_MAPS_DIR + The root directory where maps should be cached +``` + +### Example Usage + +``` +./download_maps.py --cached-maps-dir ./cache \ + --output-map-name central_america + --map-list lists/pbf/central_america.txt + +./download_maps.py --cached-maps-dir ./cache \ + --output-map-name midwest + --map-list lists/pbf/midwest.txt + +./download_maps.py --cached-maps-dir ./cache \ + --output-map-name south_america + --map-list lists/pbf/south_america.txt +``` + +# Licencing + All code is licensed Apache 2.0 and all non-code is licensed Creative Commons CC-BY-SA-3.0 diff --git a/azure_notes.md b/azure_notes.md index fa1d105..6d48171 100644 --- a/azure_notes.md +++ b/azure_notes.md @@ -78,8 +78,11 @@ cd osm_map_processing ``` ### Download maps +Download maps using lists to local cache (use a blob storage endpoint, these can get very large) + ``` -# Download relevant maps using upcoming download util to ~/osmmapdata/cache/blah/date-time +# Example, reformat to your needs/desires +./download_maps.py --map-list lists/test.txt --cached-maps-dir ./cache --output-map-name test ``` ### Midwest USA diff --git a/download_maps.py b/download_maps.py new file mode 100755 index 0000000..fa0df73 --- /dev/null +++ b/download_maps.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2016 Mike "KemoNine" Crosson +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import subprocess, sys, os, pprint, datetime, argparse, time + +base_path = os.path.dirname(os.path.realpath(__file__)) +env = os.environ.copy() + +FNULL = open(os.devnull, 'w') + +wget_cmd = 'wget' +bunzip2_cmd = 'bunzip2' + +if __name__ == '__main__': + current_timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M') + + parser = argparse.ArgumentParser() + parser.add_argument('--map-list', action='append', + help='a text file with one map URL per line, can be specified more than once') + parser.add_argument('--no-sleep', action='store_true', + help='don\'t sleep between downloads -- WARNING you can easily run into throttling on mirrors if you use this option') + parser.add_argument('--output-map-name', action='store', default='output', + help='set the name of the map directory before Ymd-HM') + parser.add_argument('--cached-maps-dir', action='store', + help='The root directory where maps should be cached') + args = parser.parse_args() + + if args.map_list is None or args.cached_maps_dir is None: + print('You MUST specify at least one map-list AND cached-maps-dir') + sys.exit(1) + + # Normalize map path directory based on CLI arg ahead of any path manipulations + map_dl_dirs = [args.cached_maps_dir] + if args.output_map_name is not None: + map_dl_dirs.append(args.output_map_name) + map_dl_dirs.append(current_timestamp) + + cached_maps_dir = os.path.abspath(os.path.join(*map_dl_dirs)) + + print('Downloading maps to : ' + cached_maps_dir) + + map_list = [] + + if args.map_list is not None: + for alist in args.map_list: + with open(alist, 'r') as maps: + for line in maps: + map_list.append(line.strip()) + + if args.map_list is not None: + print('Downloading maps') + for line in map_list: + print(' ', end='') + print(line) + subprocess.run([wget_cmd, '-P', cached_maps_dir, line.strip()], stdout=FNULL, stderr=subprocess.STDOUT) + if not args.no_sleep: + print(' Sleeping to prevent throttle/blocking') + time.sleep(300) # Seconds + + print('Decompressing maps (if necessary)') + for dirpath, dirnames, filenames in os.walk(cached_maps_dir): + for file in filenames: + if file.endswith('bz2'): + print(' ', end='') + print(file) + subprocess.run([bunzip2_cmd, os.path.join(dirpath, file)]) +