nelsonic · asntc · Apr 23, 2024 · Feb 4, 2024 · Feb 4, 2024 · Feb 4, 2024
diff --git a/.gitignore b/.gitignore
@@ -29,3 +29,6 @@ node_modules
 .vagrant
 crawl.js
 .DS_Store
+
+.env
+tmp/
diff --git a/index.js b/index.js
@@ -0,0 +1,18 @@
+require("env2")(".env");
+const debug = require("./lambda/debug.js");
+const gs = require('github-scraper');
+
+exports.handler = function handler (event, context, callback) {
+  console.log(event);
+  console.log("Hi Friends!")
+  debug(event);
+  console.log('rawPath:', event.rawPath)
+
+  const url = event.rawPath;
+  gs(url, function(err, data) {
+    console.log(' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ')
+    console.log(data);
+
+    return callback(null, data);
+  });
+}
diff --git a/lambda/debug.js b/lambda/debug.js
@@ -0,0 +1,24 @@
+'use strict';
+require('env2')('.env');
+const save = require('./s3.js').save;
+
+/**
+ * `debug` is used to debug SNS notification events.
+ * it only gets executed if the NODE_ENV is set to "test".
+ * To save event data to S3 you will need to add AWS_S3_BUCKET to .env
+ * see: github.com/dwyl/aws-ses-lambda/issues/12
+ * @param {Object} event - the object we want to store on S3
+ */
+module.exports = function debug (event) {
+  // console.log("process.env.NODE_ENV:", process.env.NODE_ENV);
+  if (process.env.NODE_ENV === "test") {
+    if(event.Records && !event.key) {
+      event.key = "sns";
+    }
+    save(event, function callback (error, data) {
+      console.log("DEBUG - - - error:", error, " - - - data:");
+      console.log(data);
+      console.log(" - - - - - - - - - - - - - - - - - - - - ");
+    });
+  }
+};
diff --git a/lambda/http_request.js b/lambda/http_request.js
@@ -0,0 +1,39 @@
+'use strict';
+
+require("env2")(".env"); // ensure JWT_SECRET environment variable is defined.
+const http = require('https'); // ALWAYS use TLS over the internets!
+const jwt = require('jsonwebtoken');
+/**
+ * simple_http_request is a bare-bones http request using node.js core http
+ * see: https://nodejs.org/api/http.html#http_http_request_options_callback
+ * @param {Object} json - the JSON data we want to send to the Phoenix App.
+ * @param {Function} callback - a standard callback with error & response args
+ * response is a JSON Object unless there is an error. No error handling yet ...
+ */
+
+module.exports = function simple_http_request (json, callback) {
+  const options = { // the json data is included in the token! 😮
+    headers: {
+      'Authorization': jwt.sign(json, process.env.JWT_SECRET),
+      'Accept': 'application/json'
+    },
+    hostname: process.env.EMAIL_APP_URL, // e.g: phemail.herokuapp.com
+    method: 'POST', // HTTP post sans body: stackoverflow.com/questions/4191593
+    port: '443',
+    path: '/api/sns' // the API endpoint that processes and stores SNS data
+  }
+
+  http.request(options, function (res) {
+    let resStr = '';
+    res.setEncoding('utf8');
+    res.on('data', function (chunk) {
+      resStr += chunk;
+    }).on('end', function () {
+      return callback(res.statusCode, JSON.parse(resStr));
+    });
+  })
+  // .on('error', (e) => {
+  //   console.error(`problem with request: ${e.message}`);
+  // })
+  .end();
+};
diff --git a/lambda/s3.js b/lambda/s3.js
@@ -0,0 +1,52 @@
+'use strict';
+require('env2')('.env');
+const AWS = require('aws-sdk');
+AWS.config.region = 'eu-west-1';
+var s3 = new AWS.S3({params: {Bucket: process.env.AWS_S3_BUCKET}});
+
+/**
+ * `save` saves a JSON object to S3.
+ * if you need to specify the file name, use `json.key`
+ * @param {Object} json - the object we want to store on S3
+ * @param {Function} callback - called once the file has been uploaded
+ */
+module.exports.save = function save (json, callback) {
+  if (json) {
+    const filename = json.key || 'event'
+    const params = {
+      Key: filename + '.json',
+      Body: JSON.stringify(json),
+      ContentType: 'application/json',
+      ACL: 'public-read'
+    };
+
+    s3.upload(params, function (err, data) {
+      if (callback && typeof callback === "function") {
+        return callback(err, data);
+      }
+      else {
+        return data;
+      }
+    });
+
+  } else {
+    return callback('ERROR: please provide json data');
+  }
+}
+
+/**
+ * `get` retrieves and parses a JSON file from S3
+ * this function is only used to test that the `save` method.
+ * @param {String} key - the filename of the object to get from S3
+ * @param {Function} callback - called once the file has been uploaded
+ */
+module.exports.get = function get (key, callback) {
+  s3.getObject({Key: key}, function (error, data) {
+    if (error) {
+      return callback(error);
+    }
+    else {
+      return callback(error, JSON.parse(data.Body.toString()));
+    }
+  });
+};
diff --git a/lib/next_page_beta.js b/lib/next_page_beta.js
@@ -0,0 +1,17 @@
+/**
+ * next_page checks for pagination on a "beta" page ref #131
+ * @param {Object} $ - cheerio object with DOM of page to be scraped
+ * @param {Object} data - the data we have scraped from the page so far
+ * @return {Object} the data object with a next_page key & value
+ */
+module.exports = function next_page_beta ($, data) {
+  const next = $('.TablePaginationSteps').find('[class^="Pagination__Page-"]').last().attr('href');
+  data.next_page = '';
+  /* istanbul ignore else */
+  if (next) {
+    const url = data.url.split('?')[0];
+    data.next_page = url + '?type=all&' + 'page=' + next.replace('#', '');
+  }
+
+  return data;
+}
diff --git a/lib/org.js b/lib/org.js
@@ -38,7 +38,7 @@ function org($, url, callback) {
       name: $(parent + ' a').first().text().trim(),
       lang: $(parent + 'span[itemprop=programmingLanguage]').first().text().trim(),
       url: $(parent + ' a').first().attr('href'),
-      description: $(parent + 'p.d-inline-block').first().text().trim(),
+      description: $(parent + 'p[itemprop=description]').first().text().trim(),
       updated: $(parent + ' relative-time')[0].attribs.datetime
     });
   });

diff --git a/lib/org_repos.js b/lib/org_repos.js
@@ -0,0 +1,46 @@
+/**
+ * `org_repos` parses a given GitHub organization repositories page.
+ * e.g: https://github.com/orgs/dwyl/repositories?type=all
+ * @param {object} $ - the cheerio DOM object.
+ * @param {string} url - the url of the page to be parsed.
+ * @param {function} callback - the callback we should call after scraping
+ *  a callback passed into this method should accept two parameters:
+ *  @param {objectj} error an error object (set to null if no error occurred)
+ *  @param {object} data - the complete organsiation data
+ */
+function org_repos($, url, callback) {
+  var data = { url: url, type: 'org_repos' };
+  data.name = $('h1.lh-condensed').first().text().trim();
+  // data.description = $('h1.lh-condensed').parent().next().text().trim(); // yep ...¯\_(ツ)_/¯
+  data.description = $('.container-xl .color-fg-muted').first().text().trim()
+  // var people  = $('.Counter').eq(1); // people is *second* in list of tabs!
+  // data.pcount = parseInt(people.first().text(), 10);
+  // data.pcount = isNaN(data.pcount) ? 0 : data.pcount
+  data.avatar = $('.avatar')[0].attribs.src;
+  var parts = data.avatar.split('/');
+  data.uid = parseInt(parts[parts.length-1].split('?')[0], 10);
+  // list of repos
+  var items = $('li.listviewitem');
+  // console.log('items.length', items.length);
+  data.entries = []; // avoid having circular reference objects! :-(
+  items.each( function (i) { // JS counters start at 0.
+    // console.log(i)
+    var parent = 'li:nth-child(' + (i+1) +') '; // CSS selectors start at 1.
+    console.log($(parent))
+    console.log($(parent + ' .markdown-title'))
+    data.entries.push({
+      // feel free to add more attributes to this! 🙏
+      name: $(parent + ' .markdown-title').text().trim(),
+      // lang: $(parent + ' .listview-item-main-content').find('[class^="Text-"]').text().trim(),
+      url: $(parent + ' a').first().attr('href'),
+      description: $(parent + ' .repos-list-description').first().text().trim(),
+      // updated: $(parent + ' relative-time')[0].attribs.datetime
+    });
+  });
+  // console.log(data)
+
+  data = require('./next_page_beta')($, data); // don't worry this gets cached ;-)
+  callback(null, data);
+}
+
+module.exports = org_repos
diff --git a/lib/profile.js b/lib/profile.js
@@ -1,5 +1,5 @@
 
-const selectors=require('../config/repos')
+const selectors = require('../config/repos')
 /**
  * profile method scrapes a given GitHub user profile
  * @param {string} username - a valid GitHub username
@@ -46,11 +46,11 @@ module.exports = function profile ($, url, callback) {
   data.website  = $('[data-test-selector=profile-website-url] > a').attr("href")
   // data.joined   = $('.join-date').attr('datetime');       // Joined GitHub
 
-  // Contributions to Open Source in the past 12 months
-  data.contribs = parseInt($('.js-yearly-contributions').text().trim()
-    .split(' contributions')[0].replace(',', ''), 10);
+  // Contributions to Open Source in the past 12 months #132
+  // data.contribs = parseInt($('.js-yearly-contributions h2').text().trim()
+  //   .split(' contributions')[0].replace(',', ''), 10);
   // Contribution Matrix
-  data = require('./profile_contribs.js')($, data);
+  // data = require('./profile_contribs.js')($, data);
 
   // List of (Public) organizations from profile
   // data-hovercard-type="organization"

diff --git a/lib/scrapers.js b/lib/scrapers.js
@@ -7,6 +7,7 @@ module.exports = {
   // labels : require('./labels'),
   // milestones : require('./milestones'),
   org: require('./org'),
+  org_repos: require('./org_repos'),
   people: require('./people'),
   profile: require('./profile'),
   repo: require('./repo'),

diff --git a/lib/switcher.js b/lib/switcher.js
@@ -55,6 +55,10 @@ module.exports = function switcher (url, callback) {
         console.log('repos_user - - - - - - - - -')
         scraper = 'repos_user';
       }
+      // e.g: https://github.com/orgs/dwyl/repositories?type=all
+      else if(url.match(/org/) && url.match(/repositories/)) {
+        scraper = 'org_repos';
+      }
       else if(url.match(/followers|following/)) {
         scraper = 'followers'; // html/DOM is identical for these 2 pages!
       }

diff --git a/lib/utils.js b/lib/utils.js
@@ -19,9 +19,6 @@ function parse_int (str) {
   , 10)
 }
 
-/**
- * A library of utility functions for parsing web data.
- */
 module.exports = {
   parse_int: parse_int
 }