File tree 1 file changed +39
-0
lines changed
1 file changed +39
-0
lines changed Original file line number Diff line number Diff line change
1
+ from lxml import etree
2
+
3
+
4
+ class XMLPruning :
5
+ def __init__ (self , xml_document_location ):
6
+ self .xml_document_location = xml_document_location
7
+ self .root = self .get_root ()
8
+
9
+ def get_root (self ) -> etree .Element :
10
+ tree = etree .parse (self .xml_document_location )
11
+ return tree .getroot ()
12
+
13
+ def remove_duplicates_by_id (self , element_type ):
14
+ visited = set ()
15
+
16
+ for element in self .root .iter (element_type ):
17
+ if 'id' in element .attrib :
18
+ self .check_visited_id (element , visited )
19
+
20
+ self .write_to_file ()
21
+ return visited
22
+
23
+ def check_visited_id (self , element , visited ):
24
+ current = element .get ('id' )
25
+ if current in visited :
26
+ print ("Removing element with id " + current )
27
+ element .getparent ().remove (element )
28
+ else :
29
+ visited .add (current )
30
+
31
+ def write_to_file (self ):
32
+ with open ("new-" + self .xml_document_location , 'wb' ) as doc :
33
+ doc .write (etree .tostring (self .root , pretty_print = True ))
34
+
35
+
36
+ if __name__ == "__main__" :
37
+ # usage example
38
+ xmlP = XMLPruning ("statii-ratt-format.xml" )
39
+ print (len (xmlP .remove_duplicates_by_id ("TransportStation" )))
You can’t perform that action at this time.
0 commit comments