From 00e4620c819920ea386724be21fd984ccb28b82e Mon Sep 17 00:00:00 2001 From: Flinner Date: Wed, 18 Aug 2021 17:57:52 +0300 Subject: [PATCH] feat: scrape paul's site --- bin/bin/paul.sh | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100755 bin/bin/paul.sh diff --git a/bin/bin/paul.sh b/bin/bin/paul.sh new file mode 100755 index 0000000..701d2c4 --- /dev/null +++ b/bin/bin/paul.sh @@ -0,0 +1,48 @@ +#!/bin/sh + + +exit_m() { + echo "$1" + exit 1 +} + +# curl with cache! (in "/tmp") +curl_cache(){ + local cache_path=`echo $1 | sed 's|/|_|g'` + local cache_path="/tmp/$cache_path" + + [ -f "$cache_path" ] || curl -s "$1" -o "$cache_path" + + echo "$cache_path" +} + + +domain="https://tutorial.math.lamar.edu/" +file=`curl_cache "$1"` # main file, that has all questions +q_num=$(( $2 - 1 )) #numbering starts at 1 + +# naive method to check for only args! +[ -n "$file" ] || exit_m "No Files Given" +[ -n "$q_num" ] || exit_m "No Questoin Number Given!" +[ ! -n "$3" ] || exit_m "Incorrect number of args :(" + + +# get all practice questions +question_text=`cat $file | pup 'body .practice-problems li json{}' | + jq ".[${q_num}].text" | tr -d '"'` + +# get all solution *links* +solution_path=`cat $file | pup 'body .practice-problems li json{}' | + jq -r ".[${q_num}].children[0].href"` +solution_url="$domain/$solution_path" + + +# ==== get solution ========= +# download if not already downloaded +solution_path=`curl_cache "$solution_url"` + +echo "$question_text" +echo "\n" +cat "$solution_path" | pup 'body .soln-content p text{}' + +