-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
be4388e
commit 5f29583
Showing
51 changed files
with
34,730 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#Poor Man's Webmaster Tools | ||
|
||
![ScreenShot](logo_ssc.jpg) | ||
|
||
## Silly Software Company | ||
|
||
-=oOo=- | ||
|
||
|
||
A poor man's way of doing things | ||
is still a way to get things done | ||
|
||
|
||
A poor man's website will often consist of a collection of static html files that get uploaded to a webserver under an el cheapo internet account, and lack any of the tools associated with real web hosting and content management systems. | ||
|
||
The Silly Software Company fixes this problem by offering you the | ||
[Poor Man's Webmaster Tools](https://web.archive.org/web/20200215004706/http://users.telenet.be/mydotcom/howto/www/tools.htm), | ||
a collection of VB script and/or Bash shell scripts to automate common (and less common) web master tasks. | ||
|
||
|
||
------- | ||
|
||
Some original scripts and my own modifications. | ||
|
||
<http://users.telenet.be/mydotcom/sillysof/index.htm> | ||
|
||
<http://users.telenet.be/mydotcom/howto/www/tools.htm> | ||
|
||
|
||
Wayback machine copy: | ||
|
||
<https://web.archive.org/web/20200215004706/http://users.telenet.be/mydotcom/sillysof/index.htm> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
#!/bin/bash | ||
## find files that don't have 'webstat' text in it | ||
# grep, recursive, list non-matching --> files with .htm extension only --> list in 'targets' file | ||
#http://users.telenet.be/mydotcom/program/shell/textprocess.htm | ||
|
||
|
||
grep -L -R "webstat" /home/me/website | grep ".htm" > targets.lst | ||
|
||
## review and edit target list (remove files that don't need changing) | ||
vim targets.lst | ||
|
||
## read file list and process files therein | ||
cat targets.lst | while read filename ; do | ||
# remove /body and /html tags at end of file so insertion doesn't fall ouside html document body | ||
sed -i 's/<\/html>//g' $filename | ||
sed -i 's/<\/body>//g' $filename | ||
sed -i 's/<\/HTML>//g' $filename | ||
sed -i 's/<\/BODY>//g' $filename | ||
|
||
# insert text from a file (eg the webstat counter script) | ||
cat srcfile >> $filename | ||
|
||
# insert body and html end tags again | ||
echo " </body>" >> $filename | ||
echo "</html>" >> $filename | ||
done | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
#!/bin/bash | ||
# (c) Koen Noens 2009 | ||
# | ||
# Silly Software Productions - Poor Man's Webmaster Tools | ||
# | ||
# find dead links in a set of html files | ||
# only checks the off-site links, identified by absolute url href | ||
#http://users.telenet.be/mydotcom/howto/www/deadlinks.html | ||
|
||
## PARAMS | ||
#directory for search | ||
LOCALDIR="websites/mydotcom" | ||
|
||
#file to list pages that contain dead links | ||
UPLOADFILE="websites/mydotcom/upload_after_fix" | ||
TODOLIST="websites/mydotcom/deadlinkslist.$( date +%F )" | ||
|
||
TMPFILE=$(mktemp) | ||
|
||
|
||
|
||
### Main -- rather verbose so we have progress indication | ||
echo -e "\n\n collecting hyperlinks in $LOCALDIR \n\n" | ||
|
||
# find files and hyperlinks | ||
find $LOCALDIR -exec grep -l "<a href=\"http://.*>" {} \; |\ | ||
while read FILE ; do | ||
#gradually reduce the matches untill we have a clean url, | ||
|
||
grep -o "<a href=\"http://.*>" $FILE | \ | ||
grep -o "http://[[:graph:]]*\"" | \ | ||
while read URL | ||
do | ||
URL=${URL%'"'} ;#remove trailing double quote | ||
|
||
#dump filenames and urls in tempfile for further processing | ||
echo "${FILE};${URL}" >> $TMPFILE | ||
done | ||
done | ||
|
||
|
||
echo -e "\n\n starting check for broken links ... \n\n" | ||
|
||
sort -u <$TMPFILE | while read RECORD; | ||
do | ||
FILE=$( echo $RECORD | cut -d';' -f1 - ) | ||
URL=$( echo $RECORD | cut -d';' -f2 - ) | ||
|
||
echo -e "\n $FILE - checking $URL \n" | ||
|
||
wget --spider $URL || BADLINK="true" | ||
|
||
if [[ "$BADLINK" = "true" ]]; then | ||
# url not found | ||
echo -e "ERROR retrieving $URL \n\n" | ||
|
||
#put file + url on todo-list, and put file on list to upload after fix | ||
echo "$FILE" >>$UPLOADFILE | ||
echo "$FILE - $URL" >>$TODOLIST | ||
|
||
BADLINK="noted" | ||
fi | ||
done | ||
|
||
# sort and uniq the upload_file | ||
mv $UPLOADFILE $TMPFILE | ||
sort -u <$TMPFILE > $UPLOADFILE && rm $TMPFILE |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#!/bin/bash | ||
# script to create sitemap.txt | ||
# Koen Noens, October 2006 | ||
#http://users.telenet.be/mydotcom/howto/www/sitemap.htm | ||
|
||
LOCAL_ROOT="/home/jp/websites/mysite" # replace with your path | ||
SITE_ROOT="http://my.isp.com/my_site" # replace with your site URL | ||
EXTENSIONS=".htm .html .php .asp .aspx .jsp" | ||
|
||
pushd $LOCAL_ROOT | ||
|
||
#find all .htm, .html, .php, ... pages, remove trailing dot and concatenate with SITE_ROOT | ||
|
||
cd $LOCAL_ROOT | ||
rm sitemap.txt || echo "no previous sitemap found" | ||
FOUNDFILES=$(mktemp) | ||
|
||
for ext in $EXTENSIONS ; do | ||
find . -name "*$ext" >> $FOUNDFILES | ||
done | ||
|
||
# remove leading . and insert site_root to build urls | ||
sed -i 's/\.//' $FOUNDFILES | ||
for FILE in $(cat $FOUNDFILES); do | ||
echo $SITE_ROOT$FILE >> $FOUNDFILES.0 | ||
done | ||
|
||
|
||
# if there is an exclude list, exclude the files in it from the sitemap | ||
empty="" | ||
if [[ -e exclude.lst ]]; then | ||
cat exclude.lst | while read entry; do | ||
sed -i "s,$entry,$empty,g" $FOUNDFILES.0 | ||
done; | ||
# remove blank lines as well | ||
sed -i '/^$/d' $FOUNDFILES.0 | ||
fi | ||
|
||
# finishing touches | ||
sort -f -u $FOUNDFILES.0 >> sitemap.txt | ||
rm $FOUNDFILES.0 | ||
rm $FOUNDFILES | ||
|
||
# add sitemap to files_to_upload | ||
echo "$LOCAL_ROOT/sitemap.txt" >> $LOCAL_ROOT/upload |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#!/bin/bash | ||
# Koen Noens, December 2007 | ||
# site map generator | ||
# | ||
# create indented list of hyperlinks to represent a directory listing of a web site ("sitemap") | ||
#http://users.telenet.be/mydotcom/howto/www/sitemap02.txt | ||
#http://users.telenet.be/mydotcom/upub/sitemap.htm | ||
|
||
## script gloabal vars | ||
TARGET="/home/me/website" | ||
URLPRE="http://my.hosting.provider.com/mywebsite" | ||
SITEMAP="/home/me/website/sitemap.htm" | ||
|
||
EXT="htm" | ||
OUT="" | ||
SKIP="0" | ||
|
||
# constants for html tags | ||
HTMLTAB=" " | ||
|
||
## functions | ||
function countIndents { | ||
# count depth in directory three | ||
COUNT=1 | ||
STRING=$1 | ||
|
||
while [[ "$(dirname $STRING)" != "/" ]]; do | ||
STRING=$(dirname $STRING) | ||
let COUNT=$((COUNT + 1)); | ||
done | ||
return $COUNT | ||
} | ||
|
||
|
||
## main | ||
countIndents $TARGET | ||
SKIP=$? | ||
|
||
OUT=$(mktemp) | ||
[[ -e $SITEMAP ]] && rm $SITEMAP | ||
|
||
### experiment with find and sort to get ordered output | ||
#find $TARGET -type d -o -name "*.$EXT" >> $OUT | ||
find $TARGET -name "*.$EXT" -o -type d >> $OUT | ||
#sort -n -o $OUT $OUT | ||
|
||
echo "<html><head><title>sitemap</title></head><body>" >> $SITEMAP | ||
cat $OUT | while read ENTRY ; do | ||
countIndents $ENTRY | ||
let TABS=$(( $? - $SKIP )) | ||
|
||
for i in $(seq 0 $TABS); do | ||
echo -n $HTMLTAB >> $SITEMAP | ||
done | ||
echo "<a href=\"$ENTRY\">$(basename $ENTRY)</a><br>" >> $SITEMAP | ||
|
||
#progres | ||
echo -n "." | ||
done | ||
echo "</body></html>" >> $SITEMAP | ||
|
||
### replace local hierarchy with url-prefix | ||
sed -i "s,$TARGET,$URLPRE,g" $SITEMAP | ||
|
||
|
||
# cleanup | ||
echo | ||
rm $OUT | ||
exit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
#!/usr/bin/env python | ||
# tkn-cnt.py - Count tokens of text string | ||
# Usage: tkn-cnt.py [MODEL|ENCODING] [TEXT|FILE|-].. | ||
# v0.1.6 april/2023 by mountaineerbr | ||
import os | ||
import sys | ||
import getopt | ||
try: | ||
import tiktoken | ||
except: | ||
sys.stderr.write("Err: Install tiktoken module: `pip install tiktoken`\n") | ||
sys.exit(1) | ||
|
||
|
||
text = "" | ||
mod = "gpt-3.5-turbo" | ||
fallback = "cl100k_base" | ||
#davinci: r50k_base | ||
sn = (sys.argv[0].split("/")[-1]) | ||
usage = "\ | ||
Usage: %s [-ttv] [MODEL|ENCODING] \"[STRING|FILE|-]..\"\n\ | ||
Usage: %s [-hl]\n\ | ||
Set \"-\" to read from stdin.\n" % (sn, sn) | ||
|
||
|
||
def usagef(): | ||
sys.stderr.write(usage) | ||
|
||
def list_encf(): | ||
for enc_name in tiktoken.list_encoding_names(): | ||
print(enc_name) | ||
|
||
|
||
#parse opts | ||
try: | ||
opts, args = getopt.getopt((sys.argv[1:]), "hltv") | ||
except getopt.GetoptError: | ||
print('Error: Unkown option.') | ||
sys.exit(2) | ||
|
||
optt, optv, check, check_two = 0, 0, 0, 0 | ||
for opt, arg in opts: | ||
if opt == '-h': | ||
usagef() | ||
sys.exit() | ||
elif opt == '-l': | ||
list_encf() | ||
sys.exit() | ||
elif opt == '-t': | ||
optt += 1 | ||
elif opt == '-v': | ||
optv += 1 | ||
|
||
|
||
#input, pos args or stdin | ||
if (len(args) > 1) and (args[1] == "-"): | ||
text = sys.stdin.read() | ||
mod = args[0] | ||
elif (len(args) > 1) and (args[0] == "-"): | ||
text = sys.stdin.read() | ||
mod = args[1] | ||
elif (len(args) > 1): | ||
if (os.path.isfile(args[0])) or (os.path.isfile(args[1])): | ||
for file in args: | ||
if os.path.isfile(file): | ||
text += open(file, 'r').read() | ||
if not optv: | ||
sys.stderr.write("File: %s\n" % file) | ||
else: | ||
text = " ".join(args[1:]) | ||
if not os.path.isfile(args[0]): | ||
mod = args[0] | ||
check = 1 | ||
elif len(args): | ||
if args[0] == "-": | ||
text = sys.stdin.read() | ||
elif os.path.isfile(args[0]): | ||
text = open(args[0], 'r').read() | ||
if not optv: | ||
sys.stderr.write("File: %s\n" % (args[0])) | ||
else: | ||
mod = args[0] | ||
text = args[0] | ||
check_two = 1 | ||
else: | ||
usagef() | ||
sys.exit(2) | ||
|
||
#model / encoding | ||
try: | ||
enc = tiktoken.encoding_for_model((mod[0:50])) | ||
#sys.stderr.write("Model: %s %s\n" % (mod , str(enc)) ) | ||
if check_two: | ||
text = "" | ||
except: | ||
try: | ||
enc = tiktoken.get_encoding((mod[0:50])) | ||
#sys.stderr.write("Encoding: %s\n" % mod ) | ||
mod = "" | ||
except: | ||
enc = tiktoken.get_encoding(fallback) | ||
#sys.stderr.write("Warning: Model or encoding not found. Using %s.\n" % fallback) | ||
if check: | ||
text = args[0] + " " + text | ||
|
||
# | ||
enc_name = str(enc) | ||
encoded_text = enc.encode_ordinary(text) | ||
#encoded_text = enc.encode(text, disallowed_special=()) | ||
|
||
if optt > 1: | ||
print(text) | ||
elif optt: | ||
print(encoded_text) | ||
elif optv: | ||
print(len(encoded_text)) | ||
else: | ||
print(len(encoded_text),enc_name) | ||
|
||
#https://github.com/openai/tiktoken/blob/main/tiktoken/core.py | ||
#https://github.com/openai/tiktoken/blob/main/tiktoken/model.py | ||
#https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb |
Oops, something went wrong.