diff --git a/llm_web_kit/extractor/html/recognizer/text.py b/llm_web_kit/extractor/html/recognizer/text.py index 4f99d9e6..db90f4a7 100644 --- a/llm_web_kit/extractor/html/recognizer/text.py +++ b/llm_web_kit/extractor/html/recognizer/text.py @@ -270,7 +270,11 @@ def __get_paragraph_text_recusive(el: HtmlElement, text: str) -> str: para_text.append({'c': final, 't': ParagraphTextType.TEXT}) for item in para_text: - item['c'] = restore_sub_sup_from_text_regex(item['c']).replace('$br$', PARAGRAPH_SEPARATOR) + if item['c'] is not None: + item['c'] = restore_sub_sup_from_text_regex(item['c']).replace('$br$', PARAGRAPH_SEPARATOR) + else: + item['c'] = "" + return para_text def __extract_paragraphs(self, root: HtmlElement): diff --git a/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/para_has_none.html b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/para_has_none.html new file mode 100644 index 00000000..de42f2bf --- /dev/null +++ b/tests/llm_web_kit/extractor/assets/extractor_chain_input/good_data/html/para_has_none.html @@ -0,0 +1,1245 @@ + + + Zk multiplecomposer using Spring use composer variable in zul - ZK Forum
0

Zk multiplecomposer using Spring use composer variable in zul

+ + + asked + + + + 2018-01-10 05:05:54 +0800 +

javiut gravatar image javiut 委内瑞拉玻利瓦尔共和国国旗
90 1 5

I have a view using a composer which has inside 5 columnChildren each one are the parentComponent of a macroComponent which also uses a composer.

MyView.zul which has 5 columnChildrens

<window id="win" apply='com.MyComposerController'>
+<portallayout maximizedMode="whole">
+    <portalchildren style="padding:5px" width="50%">
+        <panel sclass="innerPanel" title="${c:l('zul.m.zeus.ac.001')}" border="normal">
+            <panelchildren>
+                <columnlayout>
+                    <columnchildren id='columnChildrenForInfo'/><!-PARENT FOR THE MACROURI TEMPLATE->
+                </columnlayout>
+            </panelchildren>
+        </panel>
+        <panel sclass="innerPanel" title="${c:l('zul.m.zeus.ac.005')}" border="normal">
+            <panelchildren>
+                <columnlayout>
+                    <columnchildren id='columnChildrenForPetition'/><!-PARENT FOR THE MACROURI TEMPLATE->
+                </columnlayout>
+            </panelchildren>
+        </panel>
+        <panel sclass="innerPanel" title="${c:l('zul.m.zeus.ac.009')}" border="normal">
+            <panelchildren>
+                <columnlayout>
+                    <columnchildren id='columnChildrenForErrors'/><!-PARENT FOR THE MACROURI TEMPLATE->
+                </columnlayout>
+            </panelchildren>
+        </panel>
+    </portalchildren>
+    <portalchildren style="padding:5px" width="50%">
+        <panel sclass="innerPanel" title="${c:l('zul.m.zeus.ac.013')}" border="normal">
+            <panelchildren>
+                <columnlayout>
+                    <columnchildren id='columnChildrenForEsperaNoAtendidos'/><!-PARENT FOR THE MACROURI TEMPLATE->
+                </columnlayout>
+            </panelchildren>
+        </panel>
+        <panel sclass="innerPanel" title="${c:l('zul.m.zeus.ac.018')}" border="normal">
+            <panelchildren>
+                <columnlayout>
+                    <columnchildren id='columnChildrenForDocs'/><!-PARENT FOR THE MACROURI TEMPLATE->
+                </columnlayout>
+            </panelchildren>
+        </panel>
+    </portalchildren>
+</portallayout>
+

</window>

Latter i create in each columnChildren the following macroUri template +MyViewDetail.zul

<zk>
+                            <panel>
+                                <panelchildren>
+<listbox id="results" style="margin-right:5px;margin-top:5px;margin-left:5px">
+
+<listhead>
+   <listheader/>
+   <listheader/>
+   <listheader/>
+</listhead>
+<template name="model">
+<listitem onClick='$composer.listitemOnClick(event);'>
+    <listcell/>
+    <listcell/>
+    <listcell/>
+ </listitem>
+</template>
+</listbox>
+</panelchildren>
+</panel>
+</zk>
+

this macroComponent is mapped in lang-addon.xml like this

<component>
+    <component-name>composerDetailController</component-name>
+    <component-class>DetailController</component-class>
+    <macro-uri>myViewDetail.zul</macro-uri>
+</component>
+

Also the detail composer is a string bean which i retrieve like this

<bean id="composerDetailControllerBean" class="...." autowire="byName" scope="prototype"/>
+

I bind it like this.

    final DetailController composerDetailControllerBean = (DetailController)SpringUtil.getBean("composerDetailControllerBean");//EXTRACT IT FROM SPRING CONTAINER
+    composerDetailControllerBean.setParent(columnChildren);//EACH DETAIL CONTROLLER HAS A COLUMN CHILDREN AS A PARENT
+    detail.afterCompose();
+    detail.inicializa();//SOME BUSINESS LOGIC
+

Everything is working like a charm but i had a problem as you can see in the template there is a listitem which doesn't have a id because if i put a id a NONUNIQUEIDINSPACE exception is thrown but i need to listen the onClick method on it i have try the following

onClick='$composer.listitemOnClick(event);'
+

But the composer variables points it to the first composer i mean the MyView.zul composer and i have the method and the Business logic in the template composer DetailController but as i dont do this

<div id="compA" apply="some.package.ComposerA">
+   <div id="compB" apply="some.package.ComposerB"/> i dont use apply in the detailComposer i bind it through lang-addon.xml file
+</div>
+
+<div apply="some.package.ComposerA">
+   <custom-attributes composerName="myCompA"/>i dont use apply in the detailComposer i bind it through lang-addon.xml file i cannot use customAttributes
+</div>
+

I mean i dont set a explicit name to the detail composer and i dont know how to referred it to it in the zul code

i cannot map the composer name to a variable the only thing i did was this try to set a composerName using custom-attributes trying to accomplish the custom-name by custom-attributes aforementioned

lang-addon.xml file

<component>
+    <component-name>composerDetailController</component-name>
+    <component-class>DetailController</component-class>
+    <macro-uri>myViewDetail.zul</macro-uri>
+  <custom-attribute>
+     <attribute-name>composerName</attribute-name>
+     <attribute-value>composerDetail</attribute-value>
+  </custom-attribute>
+</component>
+
+'$composerDetail.listitemOnClick(event);'
+

Trying to map the custom attribute-name in the lang-addOn and use it in the zul but without success.

I get Caused by: Sourced file: inline evaluation of: $composerDetail.listitemOnClick(event);'' : $composerDetail .listitemOnClick ( event ) $composerDetail.listitemOnClick(event);'' : Attempt to resolve method: listitemOnClick() on undefined variable or class name: $composerDetail : at Line: 14 : in file: inline evaluation of:

I also try this in the MyViewDetail.zul

<zk>
+    <custom-attributes composerName="composerDetail"/>
+</zk>
+

But not works neither.

In resume i dont know to set a name to the detailComposer and i cannot use it in the MyViewDetail.zul

delete flag offensive retag edit

+ + + 1 Answer + + +

+ Sort by » + oldest newest most voted
0

+ + + answered + + + + 2018-01-10 16:03:56 +0800 +

cor3000 gravatar image cor3000
4406 2 7

ZK Team

I can't really follow your explanations... and maybe I don't have to. +I think you are trying to solve a problem that shouldn't be there in the first place.

Wiring a listener to dynamically created items such as listitems is best done using event-forwarding to a non dynamic component such as the surrounding listbox.

<listbox id="results" style="margin-right:5px;margin-top:5px;margin-left:5px">
+   ...
+   <template name="model">
+      <!-- don't to this, this is zscript/beanshell -->
+      <!-- <listitem onClick='$composer.listitemOnClick(event);'> -->
+      <!-- use event forwarding -->
+      <listitem forward='onClick=results.onListitemClick'>
+   </template>
+</listbox>
+

Then in your composer you can easily bind an event listener to the results listbox:

@Listen("onListitemClick=#results")
+public void listitemOnClick(ForwardEvent event) {
+    //get the original MouseEvent and click target
+    MouseEvent me = (MouseEvent) event.getOrigin();
+    Listitem listitem = me.getTarget();
+}
+

I think that should avoid the complications you are encountering.

Robert

+ link + publish delete flag offensive edit
+ + + Your answer + + +
Please start posting your answer anonymously - your answer will be saved within the current session and published after you log in or create a new account. Please try to give a substantial answer, for discussions, please use comments and please do remember to vote (after you log in)!

+ [hide preview] +

Question tools

+ Follow +

RSS

Stats

+ Asked: + 2018-01-10 05:05:54 +0800 +

+ Seen: 9 times

+ Last updated: Jan 10 '18

Support Options
  • Email Support
  • Training
  • Consulting
  • Outsourcing
Learn More
\ No newline at end of file diff --git a/tests/llm_web_kit/extractor/html/recognizer/test_text.py b/tests/llm_web_kit/extractor/html/recognizer/test_text.py index 674b9dc3..1d5c200b 100644 --- a/tests/llm_web_kit/extractor/html/recognizer/test_text.py +++ b/tests/llm_web_kit/extractor/html/recognizer/test_text.py @@ -479,6 +479,30 @@ def test_para_br(self): content_md = result.get_content_list().to_mm_md() assert 'The interquartile range formula is the first quartile subtracted from the third quartile:\n\n $IQR = Q_{3}-Q_{1}' in content_md + def test_para_has_none(self): + """ + 兼容段落可能为None的情况 + Returns: + + """ + chain = ExtractSimpleFactory.create(load_pipe_tpl('noclip_html_test')) + self.assertIsNotNone(chain) + test_data = { + 'track_id': 'text_md', + 'dataset_name': 'text_md', + 'url': 'https://br.wikipedia.org/wiki/Faustina_an_Hena%C3%B1', + 'data_source_category': 'HTML', + 'path': 'para_has_none.html', + 'main_path': 'para_has_none.html', + 'file_bytes': 1000, + 'meta_info': {'input_datetime': '2020-01-01 00:00:00'}, + 'language': 'en' + } + input_data = DataJson(test_data) + result = chain.extract(input_data) + content_md = result.get_content_list().to_mm_md() + assert content_md + def test_empty_string_fix(self): """ 测试修复字符串索引越界问题 - 当文本处理中出现空字符串时不应抛出IndexError