feat: scrape paul's site

This commit is contained in:
Flinner 2021-08-18 17:57:52 +03:00
parent e93258dde1
commit 00e4620c81
Signed by: flinner
GPG Key ID: 95CE0DA7F0E58CA6
1 changed files with 48 additions and 0 deletions

48
bin/bin/paul.sh Executable file
View File

@ -0,0 +1,48 @@
#!/bin/sh
exit_m() {
echo "$1"
exit 1
}
# curl with cache! (in "/tmp")
curl_cache(){
local cache_path=`echo $1 | sed 's|/|_|g'`
local cache_path="/tmp/$cache_path"
[ -f "$cache_path" ] || curl -s "$1" -o "$cache_path"
echo "$cache_path"
}
domain="https://tutorial.math.lamar.edu/"
file=`curl_cache "$1"` # main file, that has all questions
q_num=$(( $2 - 1 )) #numbering starts at 1
# naive method to check for only args!
[ -n "$file" ] || exit_m "No Files Given"
[ -n "$q_num" ] || exit_m "No Questoin Number Given!"
[ ! -n "$3" ] || exit_m "Incorrect number of args :("
# get all practice questions
question_text=`cat $file | pup 'body .practice-problems li json{}' |
jq ".[${q_num}].text" | tr -d '"'`
# get all solution *links*
solution_path=`cat $file | pup 'body .practice-problems li json{}' |
jq -r ".[${q_num}].children[0].href"`
solution_url="$domain/$solution_path"
# ==== get solution =========
# download if not already downloaded
solution_path=`curl_cache "$solution_url"`
echo "$question_text"
echo "\n"
cat "$solution_path" | pup 'body .soln-content p text{}'