diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..ab666cc --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,34 @@ +## Description of the change + +> Please include a summary of the change and which issues are fixed. +> Please also include relevant motivation and context. + +## Type of change + +- [ ] Bug fix (non-breaking change that fixes an issue) +- [ ] New feature (non-breaking change that adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] Maintenance +- [ ] New release + +## Related issues + +> ClubHouse stories and GitHub issues (delete irrelevant) + +- Fix [ch] +- Fix #1 + +## Checklists + +### Development + +- [ ] Lint rules pass locally +- [ ] The code changed/added as part of this pull request has been covered with tests +- [ ] All tests related to the changed code pass in development + +### Code review + +- [ ] This pull request has a descriptive title and information useful to a reviewer. There may be a screenshot or screencast attached +- [ ] "Ready for review" label attached to the PR and reviewers assigned +- [ ] Issue from task tracker has a link to this pull request +- [ ] Changes have been reviewed by at least one other engineer diff --git a/README.md b/README.md index 68b1b12..5734588 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,35 @@ +Rollbar Info +============ + +This is a private clone of https://github.com/mattrobenolt/python-sourcemap +so we can build package distributions from it pinned to the version we use. + +As of 2022-12-02, there should be a version on our Python private registry built and ready to use: +https://pypi.rollbar.tools + +This corresponds to the commit pinned as a dependency for mox at the time of writing: `05735efbd5c8cdcaff0c2ca3b341dafc3d1dbadb` +And it's marked as `0.3.0+05735ef`. + +It has been build following this procedure, which can be replicated if we ever need to switch to a more recent version or push a patch to it. +You'll need the username and password of the registry's write user to publish. +The credentials are on LastPass, under the `devpi rollbar user` entry. + +Export these environment variables to use with twine later: + +``` +export TWINE_USERNAME=rollbar +export TWINE_PASSWORD=...... # get it from LastPass +``` + +Now: + +- checkout the repo to the commit or tag you need +- modify `sourcemap/__init__.py` to reflect the new version. Always + append the `+HASH` to the version in order not to conflict with upstream. +- launch a shell into a docker image with Python 2.7 support (password is the base64 string from before): + `docker run -ti --rm -v $(pwd):/app -e TWINE_USERNAME=$TWINE_USERNAME -e TWINE_PASSWORD=$TWINE_PASSWORD cimg/python:2.7 /bin/bash` +- launch the included `./tools/build_and_publish.sh` script + # SourceMap [![Build Status](https://travis-ci.org/mattrobenolt/python-sourcemap.png?branch=master)](https://travis-ci.org/mattrobenolt/python-sourcemap) Parse JavaScript source maps. diff --git a/sourcemap/__init__.py b/sourcemap/__init__.py index b40ac40..208fd2e 100644 --- a/sourcemap/__init__.py +++ b/sourcemap/__init__.py @@ -8,7 +8,7 @@ from .exceptions import SourceMapDecodeError # NOQA from .decoder import SourceMapDecoder -__version__ = '0.2.1' +__version__ = '0.3.1' def load(fp, cls=None): diff --git a/sourcemap/decoder.py b/sourcemap/decoder.py index 694ec48..331cd53 100644 --- a/sourcemap/decoder.py +++ b/sourcemap/decoder.py @@ -13,8 +13,8 @@ import os import sys from functools import partial -from .exceptions import SourceMapDecodeError -from .objects import Token, SourceMapIndex +from .exceptions import SourceMapDecodeError, SourceMapTypeError +from .objects import Token, SourceMapIndex, SectionedSourceMapIndex try: import simplejson as json except ImportError: @@ -63,8 +63,10 @@ def parse_vlq(self, segment): return values def decode(self, source): - """Decode a source map object into a SourceMapIndex. + """Decode a source map object into a SourceMapIndex or + SectionedSourceMapIndex. + For SourceMapIndex: The index is keyed on (dst_line, dst_column) for lookups, and a per row index is kept to help calculate which Token to retrieve. @@ -102,6 +104,29 @@ def decode(self, source): lte to the bisect_right: 2-1 => row[2-1] => 12 - At this point, we know the token location, (1, 12) - Pull (1, 12) from index => tokens[3] + + For SectionedSourceMapIndex: + The offsets are stored as tuples in sorted order: + [(0, 0), (1, 10), (1, 24), (2, 0), ...] + + For each offset there is a corresponding SourceMapIndex + which operates as described above, except the tokens + are relative to their own section and must have the offset + replied in reverse on the destination row/col when the tokens + are returned. + + To find the token at (1, 20): + - bisect_right to find the closest index (1, 20) + - Supposing that returns index i, we actually want (i - 1) + because the token we want is inside the map before that one + - We then have a SourceMapIndex and we perform the search + for (1 - offset[0], column - offset[1]). [Note this isn't + exactly correct as we have to account for different lines + being searched for and the found offset, so for the column + we use either offset[1] or 0 depending on if line matches + offset[0] or not] + - The token we find we then translate dst_line += offset[0], + and dst_col += offset[1]. """ # According to spec (https://docs.google.com/document/d/1U1RGAehQwRypUTovF1KRlpiOFze0b-_2gc6fAH0KY0k/edit#heading=h.h7yy76c5il9v) # A SouceMap may be prepended with ")]}'" to cause a Javascript error. @@ -110,7 +135,22 @@ def decode(self, source): source = source.split('\n', 1)[1] smap = json.loads(source) + if smap.get('sections'): + offsets = [] + maps = [] + for section in smap.get('sections'): + offset = section.get('offset') + offsets.append((offset.get('line'), offset.get('column'))) + maps.append(self._decode_map(section.get('map'))) + return SectionedSourceMapIndex(smap, offsets, maps) + else: + return self._decode_map(smap) + + def _decode_map(self, smap): sources = smap['sources'] + if not all(isinstance(item, str) for item in sources): + raise SourceMapTypeError("Sources must be a list of strings") + sourceRoot = smap.get('sourceRoot') names = list(map(text_type, smap['names'])) mappings = smap['mappings'] @@ -178,9 +218,10 @@ def decode(self, source): assert src_line >= 0, ('src_line', src_line) assert src_col >= 0, ('src_col', src_col) except AssertionError as e: + error_info = e.args[0] raise SourceMapDecodeError( "Segment %s has negative %s (%d), in file %s" - % (segment, e.message[0], e.message[1], src) + % (segment, error_info[0], error_info[1], src) ) token = Token(dst_line, dst_col, src, src_line, src_col, name) diff --git a/sourcemap/exceptions.py b/sourcemap/exceptions.py index 9e0350c..4c75486 100644 --- a/sourcemap/exceptions.py +++ b/sourcemap/exceptions.py @@ -8,3 +8,7 @@ class SourceMapDecodeError(ValueError): "lol sourcemap error" pass + +class SourceMapTypeError(TypeError): + "invalid sourcemap due to a type error" + pass diff --git a/sourcemap/objects.py b/sourcemap/objects.py index a397dfd..4b9fe0d 100644 --- a/sourcemap/objects.py +++ b/sourcemap/objects.py @@ -61,7 +61,7 @@ def __init__(self, raw, tokens, line_index, index, sources=None): def lookup(self, line, column): try: # Let's hope for a direct match first - return self.index[(line, column)] + return self.index[(line, column)], self except KeyError: pass @@ -75,7 +75,31 @@ def lookup(self, line, column): # We actually want the one less than current column = line_index[i - 1] # Return from the main index, based on the (line, column) tuple - return self.index[(line, column)] + return self.index[(line, column)], self + + def columns_for_line(self, line): + return self.line_index[line] + + def total_number_of_lines(self): + return len(self.line_index) + + def files(self): + f = self.raw.get('file') + return [f] if f else None + + def sources_content_map(self): + result = self._source_content_array() + return dict(result) if result else None + + def raw_sources(self): + return self.raw.get('sources') + + def _source_content_array(self): + sources = self.raw.get('sources') + content = self.raw.get('sourcesContent') + if sources and content: + return zip(sources, content) + return None def __getitem__(self, item): return self.tokens[item] @@ -88,3 +112,69 @@ def __len__(self): def __repr__(self): return '' % ', '.join(map(str, self.sources)) + + +class SectionedSourceMapIndex(object): + """The index for a source map which contains sections + containing all the Tokens and precomputed indexes for + searching.""" + + def __init__(self, raw, offsets, maps): + self.raw = raw + self.offsets = offsets + self.maps = maps + + def lookup(self, line, column): + map_index = bisect_right(self.offsets, (line, column)) - 1 + line_offset, col_offset = self.offsets[map_index] + col_offset = 0 if line != line_offset else col_offset + smap = self.maps[map_index] + result, _ = smap.lookup(line - line_offset, column - col_offset) + result.dst_line += line_offset + result.dst_col += col_offset + return result, smap + + def columns_for_line(self, line): + last_map_index = bisect_right(self.offsets, (line + 1, 0)) + first_map_index = bisect_right(self.offsets, (line, 0)) - 1 + columns = [] + for map_index in range(first_map_index, last_map_index): + smap = self.maps[map_index] + line_offset, col_offset = self.offsets[map_index] + smap_line = line - line_offset + smap_cols = smap.columns_for_line(smap_line) + columns.extend([x + col_offset for x in smap_cols]) + return columns + + def total_number_of_lines(self): + result = 0 + for smap in self.maps: + result += smap.total_number_of_lines() + return result + + def files(self): + files = [] + for smap in self.maps: + smap_files = smap.files() + if smap_files: + files.extend(smap_files) + return files if len(files) else None + + def sources_content_map(self): + content_maps = [] + for m in self.maps: + source_content_array = m._source_content_array() + if source_content_array: + content_maps.extend(source_content_array) + if len(content_maps): + return dict(content_maps) + return None + + def raw_sources(self): + sources = [] + for m in self.maps: + sources.extend(m.raw_sources()) + return sources + + def __repr__(self): + return '' % ', '.join(map(str, self.maps)) diff --git a/tests/test_integration.py b/tests/test_integration.py index 8755076..3709446 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -65,3 +65,22 @@ def test_unicode_names(self): # This shouldn't blow up sourcemap.loads(min_map) + + def test_invalid_map(self): + with self.assertRaises( + sourcemap.SourceMapDecodeError, + msg='Segment LCnBD has negative dst_col (-5), in file test-invalid2.js' + ): + sourcemap.loads( + '{"version":3,"lineCount":1,"mappings":"LCnBD;",' + '"sources":["test-invalid.js","test-invalid2.js"],"names":[]}' + ) + + def test_invalid_map_type_error(self): + with self.assertRaises( + sourcemap.exceptions.SourceMapTypeError, + msg='Sources must be a list of strings' + ): + sourcemap.loads( + '{"version":3,"sources":["1", "2", 3],"names":["x","alert"],"mappings":"AAAA,GAAIA,GAAI,EACR,IAAIA,GAAK,EAAG,CACVC,MAAM"}' + ) diff --git a/tests/test_objects.py b/tests/test_objects.py index 852bf1b..723c319 100644 --- a/tests/test_objects.py +++ b/tests/test_objects.py @@ -2,7 +2,7 @@ import unittest2 as unittest except ImportError: import unittest -from sourcemap.objects import Token, SourceMapIndex +from sourcemap.objects import Token, SourceMapIndex, SectionedSourceMapIndex class TokenTestCase(unittest.TestCase): @@ -10,6 +10,138 @@ def test_eq(self): assert Token(1, 1, 'lol.js', 1, 1, 'lol') == Token(1, 1, 'lol.js', 1, 1, 'lol') assert Token(99, 1, 'lol.js', 1, 1, 'lol') != Token(1, 1, 'lol.js', 1, 1, 'lol') +class SectionedSourceMapIndexTestCase(unittest.TestCase): + def get_index(self): + offsets = [(0, 0), (1, 14), (2, 28)] + tokens0 = [ + Token(dst_line=0, dst_col=0), + Token(dst_line=0, dst_col=5), + Token(dst_line=1, dst_col=0), + Token(dst_line=1, dst_col=12), + ] + tokens1 = [ + Token(dst_line=0, dst_col=0), + Token(dst_line=0, dst_col=5), + Token(dst_line=1, dst_col=0), + Token(dst_line=1, dst_col=12), + ] + tokens2 = [ + Token(dst_line=0, dst_col=0), + Token(dst_line=0, dst_col=5), + Token(dst_line=1, dst_col=0), + Token(dst_line=1, dst_col=12), + ] + maps = [ + SourceMapIndex({'file': 'foo0.js'}, tokens0, + [ + [0, 5], + [0, 12], + ], + { + (0, 0): tokens0[0], + (0, 5): tokens0[1], + (1, 0): tokens0[2], + (1, 12): tokens0[3], + }), + SourceMapIndex({'file': 'foo1.js'}, tokens1, + [ + [0, 5], + [0, 12], + ], + { + (0, 0): tokens1[0], + (0, 5): tokens1[1], + (1, 0): tokens1[2], + (1, 12): tokens1[3], + }), + SourceMapIndex({'file': 'foo2.js'}, tokens2, + [ + [0, 5], + [0, 12], + ], + { + (0, 0): tokens2[0], + (0, 5): tokens2[1], + (1, 0): tokens2[2], + (1, 12): tokens2[3], + }), + ] + + raw = {} + + return SectionedSourceMapIndex(raw, offsets, maps), [tokens0, tokens1, tokens2] + + def test_lookup(self): + index, tokens = self.get_index() + + for i in range(5): + assert index.lookup(0, i)[0] is tokens[0][0] + + for i in range(5, 10): + assert index.lookup(0, i)[0] is tokens[0][1] + + for i in range(12): + assert index.lookup(1, i)[0] is tokens[0][2] + + for i in range(12, 14): + assert index.lookup(1, i)[0] is tokens[0][3] + + for i in range(14, 19): + assert index.lookup(1, i)[0] is tokens[1][0] + + for i in range(19, 25): + assert index.lookup(1, i)[0] is tokens[1][1] + + for i in range(12): + assert index.lookup(2, i)[0] is tokens[1][2] + + for i in range(12, 28): + assert index.lookup(2, i)[0] is tokens[1][3] + + for i in range(28, 33): + assert index.lookup(2, i)[0] is tokens[2][0] + + for i in range(33, 40): + assert index.lookup(2, i)[0] is tokens[2][1] + + for i in range(12): + assert index.lookup(3, i)[0] is tokens[2][2] + + for i in range(12, 14): + assert index.lookup(3, i)[0] is tokens[2][3] + + def test_columns_for_line(self): + index, tokens = self.get_index() + cols = index.columns_for_line(0) + + assert cols[0] is tokens[0][0].dst_col + assert cols[1] is tokens[0][1].dst_col + + cols = index.columns_for_line(1) + + assert len(cols) is 4 + assert cols[0] is tokens[0][2].dst_col + assert cols[1] is tokens[0][3].dst_col + assert cols[2] is tokens[1][0].dst_col + index.offsets[1][1] + assert cols[3] is tokens[1][1].dst_col + index.offsets[1][1] + + cols = index.columns_for_line(2) + + assert len(cols) is 4 + assert cols[0] is tokens[1][2].dst_col + index.offsets[1][1] + assert cols[1] is tokens[1][3].dst_col + index.offsets[1][1] + assert cols[2] is tokens[2][0].dst_col + index.offsets[2][1] + assert cols[3] is tokens[2][1].dst_col + index.offsets[2][1] + + def test_lookup_from_columns_for_line(self): + index, tokens = self.get_index() + cols = index.columns_for_line(2) + t, _ = index.lookup(2, cols[2]) + assert t is tokens[2][0] + + def test_files(self): + index, _ = self.get_index() + assert len(index.files()) is 3 class SourceMapIndexTestCase(unittest.TestCase): def get_index(self): @@ -40,16 +172,28 @@ def test_lookup(self): index, tokens = self.get_index() for i in range(5): - assert index.lookup(0, i) is tokens[0] + assert index.lookup(0, i)[0] is tokens[0] for i in range(5, 10): - assert index.lookup(0, i) is tokens[1] + assert index.lookup(0, i)[0] is tokens[1] for i in range(12): - assert index.lookup(1, i) is tokens[2] + assert index.lookup(1, i)[0] is tokens[2] for i in range(12, 20): - assert index.lookup(1, i) is tokens[3] + assert index.lookup(1, i)[0] is tokens[3] + + def test_columns_for_line(self): + index, tokens = self.get_index() + cols = index.columns_for_line(0) + + assert cols[0] is tokens[0].dst_col + assert cols[1] is tokens[1].dst_col + + cols = index.columns_for_line(1) + + assert cols[0] is tokens[2].dst_col + assert cols[1] is tokens[3].dst_col def test_getitem(self): index, tokens = self.get_index() diff --git a/tools/build_and_publish.sh b/tools/build_and_publish.sh new file mode 100755 index 0000000..73054ec --- /dev/null +++ b/tools/build_and_publish.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +set -eu + +cd /app || exit 1 + +# install the twine tool to upload the python distributions +pip install twine + +# build the distributions: +python setup.py sdist bdist_wheel + +# upload to registry +twine upload \ + --repository-url https://pypi.rollbar.tools/rollbar/rollbar/ \ + --verbose \ + dist/*