Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
ewdurbin committed Mar 1, 2020
0 parents commit f9cffa2
Show file tree
Hide file tree
Showing 10 changed files with 923 additions and 0 deletions.
11 changes: 11 additions & 0 deletions linehaul/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
11 changes: 11 additions & 0 deletions linehaul/events/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
235 changes: 235 additions & 0 deletions linehaul/events/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import enum
import logging
import posixpath

from typing import Optional

import arrow
import attr
import attr.validators
import cattr

from pyparsing import Literal as L, Word, Optional as OptionalItem
from pyparsing import printables as _printables, restOfLine
from pyparsing import ParseException

from linehaul.ua import UserAgent, parser as user_agents


logger = logging.getLogger(__name__)


_cattr = cattr.Converter()
_cattr.register_structure_hook(
arrow.Arrow, lambda d, t: arrow.get(d[5:-4], "DD MMM YYYY HH:mm:ss")
)


class UnparseableEvent(Exception):
pass


class NullValue:
pass


NullValue = NullValue()


printables = "".join(set(_printables + " " + "\t") - {"|", "@"})

PIPE = L("|").suppress()

AT = L("@").suppress()

NULL = L("(null)")
NULL.setParseAction(lambda s, l, t: NullValue)

TIMESTAMP = Word(printables)
TIMESTAMP = TIMESTAMP.setResultsName("timestamp")
TIMESTAMP.setName("Timestamp")

COUNTRY_CODE = Word(printables)
COUNTRY_CODE = COUNTRY_CODE.setResultsName("country_code")
COUNTRY_CODE.setName("Country Code")

URL = Word(printables)
URL = URL.setResultsName("url")
URL.setName("URL")

REQUEST = TIMESTAMP + PIPE + OptionalItem(COUNTRY_CODE) + PIPE + URL

PROJECT_NAME = NULL | Word(printables)
PROJECT_NAME = PROJECT_NAME.setResultsName("project_name")
PROJECT_NAME.setName("Project Name")

VERSION = NULL | Word(printables)
VERSION = VERSION.setResultsName("version")
VERSION.setName("Version")

PACKAGE_TYPE = NULL | (
L("sdist")
| L("bdist_wheel")
| L("bdist_dmg")
| L("bdist_dumb")
| L("bdist_egg")
| L("bdist_msi")
| L("bdist_rpm")
| L("bdist_wininst")
)
PACKAGE_TYPE = PACKAGE_TYPE.setResultsName("package_type")
PACKAGE_TYPE.setName("Package Type")

PROJECT = PROJECT_NAME + PIPE + VERSION + PIPE + PACKAGE_TYPE

TLS_PROTOCOL = NULL | Word(printables)
TLS_PROTOCOL = TLS_PROTOCOL.setResultsName("tls_protocol")
TLS_PROTOCOL.setName("TLS Protocol")

TLS_CIPHER = NULL | Word(printables)
TLS_CIPHER = TLS_CIPHER.setResultsName("tls_cipher")
TLS_CIPHER.setName("TLS Cipher")

TLS = TLS_PROTOCOL + PIPE + TLS_CIPHER

USER_AGENT = restOfLine
USER_AGENT = USER_AGENT.setResultsName("user_agent")
USER_AGENT.setName("UserAgent")

V1_HEADER = OptionalItem(L("1").suppress() + AT)

MESSAGE_v1 = V1_HEADER + REQUEST + PIPE + PROJECT + PIPE + USER_AGENT
MESSAGE_v1.leaveWhitespace()

V2_HEADER = L("2").suppress() + AT

MESSAGE_v2 = V2_HEADER + REQUEST + PIPE + TLS + PIPE + PROJECT + PIPE + USER_AGENT
MESSAGE_v2.leaveWhitespace()

V3_HEADER = L("download")
MESSAGE_v3 = V3_HEADER + PIPE + REQUEST + PIPE + TLS + PIPE + PROJECT + PIPE + USER_AGENT

SIMPLE_HEADER = L("simple")
MESSAGE_SIMPLE = SIMPLE_HEADER + PIPE + REQUEST + PIPE + TLS + PIPE + PIPE + PIPE + PIPE + USER_AGENT

MESSAGE = MESSAGE_SIMPLE | MESSAGE_v3 | MESSAGE_v2 | MESSAGE_v1


@enum.unique
class PackageType(enum.Enum):
bdist_dmg = "bdist_dmg"
bdist_dumb = "bdist_dumb"
bdist_egg = "bdist_egg"
bdist_msi = "bdist_msi"
bdist_rpm = "bdist_rpm"
bdist_wheel = "bdist_wheel"
bdist_wininst = "bdist_wininst"
sdist = "sdist"


@attr.s(slots=True, frozen=True)
class File:

filename = attr.ib(validator=attr.validators.instance_of(str))
project = attr.ib(validator=attr.validators.instance_of(str))
version = attr.ib(validator=attr.validators.instance_of(str))
type = attr.ib(type=PackageType)


@attr.s(slots=True, frozen=True)
class Download:

timestamp = attr.ib(type=arrow.Arrow)
url = attr.ib(validator=attr.validators.instance_of(str))
file = attr.ib(type=File)
tls_protocol = attr.ib(
default=None,
validator=attr.validators.optional(attr.validators.instance_of(str)),
)
tls_cipher = attr.ib(
default=None,
validator=attr.validators.optional(attr.validators.instance_of(str)),
)
country_code = attr.ib(
default=None,
validator=attr.validators.optional(attr.validators.instance_of(str)),
)
details = attr.ib(type=Optional[UserAgent], default=None)


@attr.s(slots=True, frozen=True)
class Simple:

timestamp = attr.ib(type=arrow.Arrow)
url = attr.ib(validator=attr.validators.instance_of(str))
project = attr.ib(validator=attr.validators.instance_of(str))
tls_protocol = attr.ib(
default=None,
validator=attr.validators.optional(attr.validators.instance_of(str)),
)
tls_cipher = attr.ib(
default=None,
validator=attr.validators.optional(attr.validators.instance_of(str)),
)
country_code = attr.ib(
default=None,
validator=attr.validators.optional(attr.validators.instance_of(str)),
)
details = attr.ib(type=Optional[UserAgent], default=None)


def _value_or_none(value):
if value is NullValue or value == "":
return None
else:
return value


def parse(message):
try:
parsed = MESSAGE.parseString(message, parseAll=True)
except ParseException as exc:
raise UnparseableEvent("{!r} {}".format(message, exc)) from None

data = {}
data["timestamp"] = parsed.timestamp
data["tls_protocol"] = _value_or_none(parsed.tls_protocol)
data["tls_cipher"] = _value_or_none(parsed.tls_cipher)
data["country_code"] = _value_or_none(parsed.country_code)
data["url"] = parsed.url
data["file"] = {}
data["file"]["filename"] = posixpath.basename(parsed.url)
data["file"]["project"] = _value_or_none(parsed.project_name)
data["file"]["version"] = _value_or_none(parsed.version)
data["file"]["type"] = _value_or_none(parsed.package_type)

if parsed[0] == 'download':
result = _cattr.structure(data, Download)
elif parsed[0] == 'simple':
data["project"] = parsed.url.split('/')[2]
result = _cattr.structure(data, Simple)
else:
result = _cattr.structure(data, Download)

try:
ua = user_agents.parse(parsed.user_agent)
if ua is None:
return # Ignored user agents mean we'll skip trying to log this event
except user_agents.UnknownUserAgentError:
print(f"Unknown User agent: {parsed.user_agent}")
else:
result = attr.evolve(result, details=ua)

return result
19 changes: 19 additions & 0 deletions linehaul/logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging as _logging


SPEW = 5


_logging.addLevelName(SPEW, "SPEW")
16 changes: 16 additions & 0 deletions linehaul/ua/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from linehaul.ua.datastructures import UserAgent


__all__ = ["UserAgent"]
66 changes: 66 additions & 0 deletions linehaul/ua/datastructures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional

import attr


@attr.s(slots=True, frozen=True)
class Installer:

name = attr.ib(type=Optional[str], default=None)
version = attr.ib(type=Optional[str], default=None)


@attr.s(slots=True, frozen=True)
class Implementation:

name = attr.ib(type=Optional[str], default=None)
version = attr.ib(type=Optional[str], default=None)


@attr.s(slots=True, frozen=True)
class LibC:

lib = attr.ib(type=Optional[str], default=None)
version = attr.ib(type=Optional[str], default=None)


@attr.s(slots=True, frozen=True)
class Distro:

name = attr.ib(type=Optional[str], default=None)
version = attr.ib(type=Optional[str], default=None)
id = attr.ib(type=Optional[str], default=None)
libc = attr.ib(type=Optional[LibC], default=None)


@attr.s(slots=True, frozen=True)
class System:

name = attr.ib(type=Optional[str], default=None)
release = attr.ib(type=Optional[str], default=None)


@attr.s(slots=True, frozen=True)
class UserAgent:

installer = attr.ib(type=Optional[Installer], default=None)
python = attr.ib(type=Optional[str], default=None)
implementation = attr.ib(type=Optional[Implementation], default=None)
distro = attr.ib(type=Optional[Distro], default=None)
system = attr.ib(type=Optional[System], default=None)
cpu = attr.ib(type=Optional[str], default=None)
openssl_version = attr.ib(type=Optional[str], default=None)
setuptools_version = attr.ib(type=Optional[str], default=None)
ci = attr.ib(type=Optional[bool], default=None)
Loading

0 comments on commit f9cffa2

Please sign in to comment.