sync

mountaineerbr · Nov 17, 2024 · 5f29583 · 5f29583
1 parent be4388e
commit 5f29583
Show file tree

Hide file tree

Showing 51 changed files with 34,730 additions and 0 deletions.
diff --git a/PMWMT/README.md b/PMWMT/README.md
@@ -0,0 +1,32 @@
+#Poor Man's Webmaster Tools
+
+![ScreenShot](logo_ssc.jpg)
+
+## Silly Software Company
+
+-=oOo=-
+
+
+    A poor man's way of doing things
+    is still a way to get things done 
+
+
+A poor man's website will often consist of a collection of static html files that get uploaded to a webserver under an el cheapo internet account, and lack any of the tools associated with real web hosting and content management systems.
+
+The Silly Software Company fixes this problem by offering you the
+[Poor Man's Webmaster Tools](https://web.archive.org/web/20200215004706/http://users.telenet.be/mydotcom/howto/www/tools.htm),
+a collection of VB script and/or Bash shell scripts to automate common (and less common) web master tasks. 
+
+
+-------
+
+Some original scripts and my own modifications.
+
+<http://users.telenet.be/mydotcom/sillysof/index.htm>
+
+<http://users.telenet.be/mydotcom/howto/www/tools.htm>
+
+
+Wayback machine copy:
+
+<https://web.archive.org/web/20200215004706/http://users.telenet.be/mydotcom/sillysof/index.htm>
diff --git a/PMWMT/bulktext.sh b/PMWMT/bulktext.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+## find files that don't have 'webstat' text in it
+#	 grep, recursive, list non-matching --> files with .htm extension only --> list in 'targets' file
+#http://users.telenet.be/mydotcom/program/shell/textprocess.htm
+
+
+grep -L -R "webstat" /home/me/website | grep ".htm" > targets.lst
+
+## review and edit target list (remove files that don't need changing)
+vim targets.lst
+
+## read file list and process files therein
+cat targets.lst | while read filename ; do 
+	# remove /body and /html tags at end of file so insertion doesn't fall ouside html document body
+	sed -i 's/<\/html>//g' $filename
+	sed -i 's/<\/body>//g' $filename
+	sed -i 's/<\/HTML>//g' $filename
+	sed -i 's/<\/BODY>//g' $filename
+
+	# insert text from a file (eg the webstat counter script)
+	cat srcfile  >> $filename
+
+	# insert body and html end tags again
+	echo "  </body>" >> $filename
+	echo "</html>" >> $filename
+done
+
diff --git a/PMWMT/deadlinks.sh b/PMWMT/deadlinks.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# (c) Koen Noens 2009
+#
+# Silly Software Productions  - Poor Man's Webmaster Tools
+#
+# find dead links in a set of html files
+# 	only checks the off-site links, identified by absolute url href
+#http://users.telenet.be/mydotcom/howto/www/deadlinks.html
+
+## PARAMS
+#directory for search
+LOCALDIR="websites/mydotcom" 
+
+#file to list pages that contain dead links
+UPLOADFILE="websites/mydotcom/upload_after_fix"
+TODOLIST="websites/mydotcom/deadlinkslist.$( date +%F )" 
+
+TMPFILE=$(mktemp)
+
+
+
+### Main -- rather verbose so we have progress indication
+echo -e "\n\n collecting hyperlinks in $LOCALDIR \n\n"
+
+# find files and hyperlinks
+find $LOCALDIR -exec  grep -l "<a href=\"http://.*>" {} \;  |\
+  while read FILE ; do
+	#gradually reduce the matches untill we have a clean url, 
+
+	grep -o "<a href=\"http://.*>" $FILE  | \
+	grep -o "http://[[:graph:]]*\"" | 	\
+	while read URL
+	do
+		URL=${URL%'"'}	;#remove trailing double quote
+
+		#dump filenames and urls in tempfile for further processing 
+		echo "${FILE};${URL}" >> $TMPFILE
+	done
+  done 
+
+
+echo -e "\n\n starting check for broken links ... \n\n"
+
+sort -u <$TMPFILE | while read RECORD;
+  do
+	FILE=$( echo $RECORD | cut -d';' -f1 - )
+	URL=$( echo $RECORD | cut -d';' -f2 - )
+
+	echo -e "\n $FILE  - checking $URL \n"
+
+	wget --spider $URL || BADLINK="true"
+
+	if [[ "$BADLINK" = "true" ]]; then
+		# url not found
+		echo -e "ERROR retrieving  $URL \n\n"
+
+		#put file + url on todo-list, and put file on list to upload after fix
+		echo "$FILE" >>$UPLOADFILE
+		echo "$FILE  -  $URL" >>$TODOLIST
+
+		BADLINK="noted"
+	fi
+  done 
+
+# sort and uniq the upload_file
+mv  $UPLOADFILE $TMPFILE
+sort -u <$TMPFILE > $UPLOADFILE && rm $TMPFILE
diff --git a/PMWMT/logo_ssc.jpg b/PMWMT/logo_ssc.jpg
diff --git a/PMWMT/sitemap1.sh b/PMWMT/sitemap1.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# script to create sitemap.txt
+# Koen Noens, October 2006
+#http://users.telenet.be/mydotcom/howto/www/sitemap.htm
+
+LOCAL_ROOT="/home/jp/websites/mysite"		# replace with your path
+SITE_ROOT="http://my.isp.com/my_site"		# replace with your site URL
+EXTENSIONS=".htm .html .php .asp .aspx .jsp"
+
+pushd $LOCAL_ROOT
+
+#find all .htm, .html, .php, ... pages, remove trailing dot and concatenate with SITE_ROOT
+
+cd $LOCAL_ROOT
+rm sitemap.txt || echo "no previous sitemap found"
+FOUNDFILES=$(mktemp)
+
+for ext in $EXTENSIONS ; do
+	 find . -name "*$ext" >> $FOUNDFILES
+done
+
+# remove leading . and insert site_root to build urls	
+sed -i 's/\.//' $FOUNDFILES
+for FILE in $(cat $FOUNDFILES); do
+		echo $SITE_ROOT$FILE  >> $FOUNDFILES.0
+done
+
+
+# if there is an exclude list, exclude the files in it from the sitemap
+empty=""
+if [[ -e exclude.lst ]]; then
+	cat exclude.lst | while read entry; do 
+		sed  -i "s,$entry,$empty,g" $FOUNDFILES.0  
+	done; 
+	# remove blank lines as well
+	sed -i '/^$/d' $FOUNDFILES.0 
+fi
+
+# finishing touches
+sort -f -u $FOUNDFILES.0 >> sitemap.txt
+rm $FOUNDFILES.0
+rm $FOUNDFILES
+
+# add sitemap to files_to_upload
+echo "$LOCAL_ROOT/sitemap.txt" >>  $LOCAL_ROOT/upload
diff --git a/PMWMT/sitemap2.sh b/PMWMT/sitemap2.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# Koen Noens, December 2007
+# site map generator
+#
+# create indented list of hyperlinks to represent a directory listing of a web site ("sitemap")
+#http://users.telenet.be/mydotcom/howto/www/sitemap02.txt
+#http://users.telenet.be/mydotcom/upub/sitemap.htm
+
+## script gloabal vars
+TARGET="/home/me/website"
+URLPRE="http://my.hosting.provider.com/mywebsite"
+SITEMAP="/home/me/website/sitemap.htm"
+
+EXT="htm"
+OUT=""
+SKIP="0"
+
+# constants for html tags
+HTMLTAB="&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"
+
+## functions
+function countIndents {
+	# count depth in directory three
+	COUNT=1
+	STRING=$1
+
+	 while [[ "$(dirname $STRING)" != "/" ]]; do
+		STRING=$(dirname $STRING) 
+		let COUNT=$((COUNT + 1)); 
+	done
+	return $COUNT
+}
+
+
+## main
+countIndents $TARGET
+SKIP=$?
+
+OUT=$(mktemp)
+[[ -e $SITEMAP ]] && rm $SITEMAP
+
+### experiment with find and sort to get ordered output
+#find $TARGET -type d -o -name "*.$EXT" >> $OUT
+find $TARGET -name "*.$EXT" -o -type d >> $OUT
+#sort -n -o $OUT $OUT
+
+echo "<html><head><title>sitemap</title></head><body>" >> $SITEMAP
+cat $OUT | while read ENTRY ; do
+	countIndents $ENTRY
+	let TABS=$(( $? - $SKIP ))
+
+	for i in $(seq 0 $TABS); do
+		echo -n $HTMLTAB >> $SITEMAP
+	done
+	echo "<a href=\"$ENTRY\">$(basename $ENTRY)</a><br>" >> $SITEMAP
+
+	#progres
+	echo -n "."
+done
+echo "</body></html>" >> $SITEMAP
+
+### replace local hierarchy with url-prefix
+sed -i "s,$TARGET,$URLPRE,g" $SITEMAP
+
+
+# cleanup
+echo
+rm $OUT
+exit
diff --git a/PMWMT/tkn-cnt.py b/PMWMT/tkn-cnt.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python
+# tkn-cnt.py - Count tokens of text string
+# Usage: tkn-cnt.py [MODEL|ENCODING] [TEXT|FILE|-]..
+# v0.1.6  april/2023  by mountaineerbr
+import os
+import sys
+import getopt
+try:
+    import tiktoken
+except:
+    sys.stderr.write("Err: Install tiktoken module: `pip install tiktoken`\n")
+    sys.exit(1)
+
+
+text = ""
+mod = "gpt-3.5-turbo"
+fallback = "cl100k_base"
+#davinci: r50k_base
+sn = (sys.argv[0].split("/")[-1])
+usage = "\
+Usage: %s [-ttv] [MODEL|ENCODING] \"[STRING|FILE|-]..\"\n\
+Usage: %s [-hl]\n\
+Set \"-\" to read from stdin.\n" % (sn, sn)
+
+
+def usagef():
+    sys.stderr.write(usage)
+
+def list_encf():
+    for enc_name in tiktoken.list_encoding_names():
+        print(enc_name)
+
+
+#parse opts
+try:
+    opts, args = getopt.getopt((sys.argv[1:]), "hltv")
+except getopt.GetoptError:
+    print('Error: Unkown option.')
+    sys.exit(2)
+
+optt, optv, check, check_two = 0, 0, 0, 0
+for opt, arg in opts:
+    if opt == '-h':
+        usagef()
+        sys.exit()
+    elif opt == '-l':
+        list_encf()
+        sys.exit()
+    elif opt == '-t':
+        optt += 1
+    elif opt == '-v':
+        optv += 1
+
+
+#input, pos args or stdin
+if (len(args) > 1) and (args[1] == "-"):
+    text = sys.stdin.read()
+    mod = args[0]
+elif (len(args) > 1) and (args[0] == "-"):
+    text = sys.stdin.read()
+    mod = args[1]
+elif (len(args) > 1):
+    if (os.path.isfile(args[0])) or (os.path.isfile(args[1])):
+        for file in args:
+            if os.path.isfile(file):
+                text += open(file, 'r').read()
+                if not optv:
+                    sys.stderr.write("File: %s\n" % file)
+    else:
+        text = " ".join(args[1:])
+    if not os.path.isfile(args[0]):
+        mod = args[0]
+        check = 1
+elif len(args):
+    if args[0] == "-":
+        text = sys.stdin.read()
+    elif os.path.isfile(args[0]):
+        text = open(args[0], 'r').read()
+        if not optv:
+            sys.stderr.write("File: %s\n" % (args[0]))
+    else:
+        mod = args[0]
+        text = args[0]
+        check_two = 1
+else:
+    usagef()
+    sys.exit(2)
+
+#model / encoding
+try:
+    enc = tiktoken.encoding_for_model((mod[0:50]))
+    #sys.stderr.write("Model: %s %s\n" % (mod , str(enc)) )
+    if check_two:
+        text = ""
+except:
+    try:
+        enc = tiktoken.get_encoding((mod[0:50]))
+        #sys.stderr.write("Encoding: %s\n" % mod )
+        mod = ""
+    except:
+        enc = tiktoken.get_encoding(fallback)
+        #sys.stderr.write("Warning: Model or encoding not found. Using %s.\n" % fallback)
+        if check:
+            text = args[0] + " " + text
+
+#
+enc_name = str(enc)
+encoded_text = enc.encode_ordinary(text)
+#encoded_text = enc.encode(text, disallowed_special=())
+
+if optt > 1:
+    print(text)
+elif optt:
+    print(encoded_text)
+elif optv:
+    print(len(encoded_text))
+else:
+    print(len(encoded_text),enc_name)
+
+#https://github.com/openai/tiktoken/blob/main/tiktoken/core.py
+#https://github.com/openai/tiktoken/blob/main/tiktoken/model.py
+#https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb